summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Makefile.am2
-rw-r--r--src/mesa/Makefile.sources1
-rw-r--r--src/mesa/drivers/common/driverfuncs.c4
-rw-r--r--src/mesa/drivers/common/meta.c134
-rw-r--r--src/mesa/drivers/common/meta.h14
-rw-r--r--src/mesa/drivers/common/meta_blit.c8
-rw-r--r--src/mesa/drivers/common/meta_copy_image.c4
-rw-r--r--src/mesa/drivers/common/meta_generate_mipmap.c6
-rw-r--r--src/mesa/drivers/common/meta_tex_subimage.c84
-rw-r--r--src/mesa/drivers/dri/common/Android.mk14
-rw-r--r--src/mesa/drivers/dri/common/Makefile.am8
-rw-r--r--src/mesa/drivers/dri/common/SConscript6
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c22
-rw-r--r--src/mesa/drivers/dri/common/dri_util.h5
-rw-r--r--src/mesa/drivers/dri/common/drirc23
-rw-r--r--src/mesa/drivers/dri/common/utils.c1
-rw-r--r--src/mesa/drivers/dri/i915/i830_state.c20
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c3
-rw-r--r--src/mesa/drivers/dri/i915/i915_state.c2
-rw-r--r--src/mesa/drivers/dri/i915/i915_texstate.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_context.c1
-rw-r--r--src/mesa/drivers/dri/i915/intel_context.h2
-rw-r--r--src/mesa/drivers/dri/i915/intel_fbo.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_render.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_tex_image.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_tex_subimage.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_tris.c4
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources5
-rw-r--r--src/mesa/drivers/dri/i965/brw_binding_tables.c222
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit.cpp22
-rw-r--r--src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_clear.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c28
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h118
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.cpp28
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h126
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c56
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c121
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c67
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp1810
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h80
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h96
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp25
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp86
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp38
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp617
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp63
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp33
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp1096
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.h89
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp1140
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_surface_state.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_inst.h16
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h52
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_fast_clear.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_meta_updownsample.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c127
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c69
-rw-r--r--src/mesa/drivers/dri/i965/brw_performance_monitor.c14
-rw-r--r--src/mesa/drivers/dri/i965/brw_pipe_control.c359
-rw-r--r--src/mesa/drivers/dri/i965/brw_primitive_restart.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c18
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_sampler_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp130
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h29
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_batch.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c87
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_surface_formats.c109
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex_layout.c54
-rw-r--r--src/mesa/drivers/dri/i965/brw_urb.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp107
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h105
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp118
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp191
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h11
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp41
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp1548
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp26
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp602
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vp.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp18
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c37
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c96
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c251
-rw-r--r--src/mesa/drivers/dri/i965/gen6_blorp.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_depth_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp13
-rw-r--r--src/mesa/drivers/dri/i965/gen6_gs_visitor.h6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_multisample_state.c11
-rw-r--r--src/mesa/drivers/dri/i965/gen6_queryobj.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sol.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_blorp.cpp6
-rw-r--r--src/mesa/drivers/dri/i965/gen7_disable.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sf_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sol_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen7_urb.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_viewport_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen7_vs_state.c23
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c18
-rw-r--r--src/mesa/drivers/dri/i965/gen8_depth_state.c19
-rw-r--r--src/mesa/drivers/dri/i965/gen8_disable.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_gs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c12
-rw-r--r--src/mesa/drivers/dri/i965/gen8_sf_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c11
-rw-r--r--src/mesa/drivers/dri/i965/gen8_viewport_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen8_vs_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.c420
-rw-r--r--src/mesa/drivers/dri/i965/intel_batchbuffer.h90
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.c356
-rw-r--r--src/mesa/drivers/dri/i965/intel_blit.h28
-rw-r--r--src/mesa/drivers/dri/i965/intel_buffer_objects.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_copy_image.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c41
-rw-r--r--src/mesa/drivers/dri/i965/intel_fbo.c18
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.c145
-rw-r--r--src/mesa/drivers/dri/i965/intel_mipmap_tree.h13
-rw-r--r--src/mesa/drivers/dri/i965/intel_pixel_read.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_reg.h19
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.c70
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.h7
-rw-r--r--src/mesa/drivers/dri/i965/intel_syncobj.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_image.c34
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_subimage.c4
-rw-r--r--src/mesa/drivers/dri/i965/intel_tex_validate.c5
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp3
-rw-r--r--src/mesa/drivers/dri/nouveau/Makefile.am4
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c1
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c7
-rw-r--r--src/mesa/drivers/dri/nouveau/nv04_render.c1
-rw-r--r--src/mesa/drivers/dri/r200/r200_blit.c141
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.c13
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.h5
-rw-r--r--src/mesa/drivers/dri/r200/r200_state.c4
-rw-r--r--src/mesa/drivers/dri/r200/r200_state_init.c5
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.c18
-rw-r--r--src/mesa/drivers/dri/r200/r200_tex.h64
-rw-r--r--src/mesa/drivers/dri/r200/r200_texstate.c71
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_blit.c92
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c20
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c30
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h19
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.c12
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.h1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_pixel_read.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c44
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_state_init.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex.h35
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c44
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c20
-rw-r--r--src/mesa/drivers/dri/swrast/Makefile.am2
-rw-r--r--src/mesa/drivers/osmesa/osmesa.c2
-rw-r--r--src/mesa/main/api_validate.c88
-rw-r--r--src/mesa/main/atifragshader.c30
-rw-r--r--src/mesa/main/attrib.c2
-rw-r--r--src/mesa/main/blend.c50
-rw-r--r--src/mesa/main/blit.c34
-rw-r--r--src/mesa/main/blit.h6
-rw-r--r--src/mesa/main/bufferobj.c401
-rw-r--r--src/mesa/main/buffers.c22
-rw-r--r--src/mesa/main/clear.c20
-rw-r--r--src/mesa/main/condrender.c4
-rw-r--r--src/mesa/main/config.h16
-rw-r--r--src/mesa/main/context.c80
-rw-r--r--src/mesa/main/context.h20
-rw-r--r--src/mesa/main/copyimage.c8
-rw-r--r--src/mesa/main/dd.h21
-rw-r--r--src/mesa/main/debug.c6
-rw-r--r--src/mesa/main/depth.c2
-rw-r--r--src/mesa/main/dlist.c14
-rw-r--r--src/mesa/main/drawpix.c18
-rw-r--r--src/mesa/main/enable.c16
-rw-r--r--src/mesa/main/enums.h2
-rw-r--r--src/mesa/main/errors.c4
-rw-r--r--src/mesa/main/errors.h1
-rw-r--r--src/mesa/main/extensions.c6
-rw-r--r--src/mesa/main/fbobject.c81
-rw-r--r--src/mesa/main/feedback.c2
-rw-r--r--src/mesa/main/ffvertex_prog.c10
-rw-r--r--src/mesa/main/fog.c2
-rwxr-xr-xsrc/mesa/main/format_parser.py7
-rw-r--r--src/mesa/main/format_utils.h9
-rw-r--r--src/mesa/main/formatquery.c12
-rw-r--r--src/mesa/main/formats.c23
-rw-r--r--src/mesa/main/formats.h5
-rw-r--r--src/mesa/main/framebuffer.c2
-rw-r--r--src/mesa/main/genmipmap.c2
-rw-r--r--src/mesa/main/get.c75
-rw-r--r--src/mesa/main/get_hash_generator.py12
-rw-r--r--src/mesa/main/get_hash_params.py117
-rw-r--r--src/mesa/main/getstring.c4
-rw-r--r--src/mesa/main/glformats.c66
-rw-r--r--src/mesa/main/glformats.h3
-rw-r--r--src/mesa/main/hint.c4
-rw-r--r--src/mesa/main/imports.c4
-rw-r--r--src/mesa/main/imports.h28
-rw-r--r--src/mesa/main/light.c42
-rw-r--r--src/mesa/main/lines.c4
-rw-r--r--src/mesa/main/macros.h22
-rw-r--r--src/mesa/main/matrix.c8
-rw-r--r--src/mesa/main/mipmap.c9
-rw-r--r--src/mesa/main/mtypes.h222
-rw-r--r--src/mesa/main/multisample.c9
-rw-r--r--src/mesa/main/objectlabel.c2
-rw-r--r--src/mesa/main/pack.c18
-rw-r--r--src/mesa/main/pipelineobj.c34
-rw-r--r--src/mesa/main/pixel.c4
-rw-r--r--src/mesa/main/pixeltransfer.c19
-rw-r--r--src/mesa/main/points.c8
-rw-r--r--src/mesa/main/polygon.c14
-rw-r--r--src/mesa/main/program_resource.c189
-rw-r--r--src/mesa/main/queryobj.c30
-rw-r--r--src/mesa/main/readpix.c142
-rw-r--r--src/mesa/main/readpix.h13
-rw-r--r--src/mesa/main/samplerobj.c22
-rw-r--r--src/mesa/main/shader_query.cpp348
-rw-r--r--src/mesa/main/shaderapi.c732
-rw-r--r--src/mesa/main/shaderapi.h48
-rw-r--r--src/mesa/main/shaderimage.c2
-rw-r--r--src/mesa/main/shaderobj.h105
-rw-r--r--src/mesa/main/state.c54
-rw-r--r--src/mesa/main/tests/dispatch_sanity.cpp27
-rw-r--r--src/mesa/main/tests/enum_strings.cpp13
-rw-r--r--src/mesa/main/texenv.c10
-rw-r--r--src/mesa/main/texformat.c2
-rw-r--r--src/mesa/main/texgen.c6
-rw-r--r--src/mesa/main/texgetimage.c1191
-rw-r--r--src/mesa/main/texgetimage.h40
-rw-r--r--src/mesa/main/teximage.c417
-rw-r--r--src/mesa/main/teximage.h9
-rw-r--r--src/mesa/main/texobj.c10
-rw-r--r--src/mesa/main/texparam.c30
-rw-r--r--src/mesa/main/texstate.c54
-rw-r--r--src/mesa/main/texstate.h2
-rw-r--r--src/mesa/main/texstorage.c17
-rw-r--r--src/mesa/main/texstore.c1
-rw-r--r--src/mesa/main/textureview.c10
-rw-r--r--src/mesa/main/uniform_query.cpp95
-rw-r--r--src/mesa/main/uniforms.c28
-rw-r--r--src/mesa/main/uniforms.h4
-rw-r--r--src/mesa/main/varray.c6
-rw-r--r--src/mesa/main/version.c2
-rw-r--r--src/mesa/main/viewport.c18
-rw-r--r--src/mesa/main/viewport.h2
-rw-r--r--src/mesa/math/m_clip_tmp.h20
-rw-r--r--src/mesa/math/m_matrix.c74
-rw-r--r--src/mesa/math/m_matrix.h4
-rw-r--r--src/mesa/math/m_norm_tmp.h2
-rw-r--r--src/mesa/math/m_vector.h4
-rw-r--r--src/mesa/program/ir_to_mesa.cpp9
-rw-r--r--src/mesa/program/prog_execute.c34
-rw-r--r--src/mesa/program/prog_opt_constant_fold.c2
-rw-r--r--src/mesa/program/prog_print.c10
-rw-r--r--src/mesa/program/program.c59
-rw-r--r--src/mesa/program/program.h80
-rw-r--r--src/mesa/program/program_parse_extra.c50
-rw-r--r--src/mesa/state_tracker/st_atom.c10
-rw-r--r--src/mesa/state_tracker/st_atom.h10
-rw-r--r--src/mesa/state_tracker/st_atom_clip.c7
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c88
-rw-r--r--src/mesa/state_tracker/st_atom_depth.c15
-rw-r--r--src/mesa/state_tracker/st_atom_sampler.c25
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c101
-rw-r--r--src/mesa/state_tracker/st_atom_tess.c62
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c163
-rw-r--r--src/mesa/state_tracker/st_atom_viewport.c2
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c8
-rw-r--r--src/mesa/state_tracker/st_cb_blit.c4
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c47
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c2
-rw-r--r--src/mesa/state_tracker/st_cb_perfmon.h2
-rw-r--r--src/mesa/state_tracker/st_cb_program.c58
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.c2
-rw-r--r--src/mesa/state_tracker/st_cb_readpixels.c28
-rw-r--r--src/mesa/state_tracker/st_cb_syncobj.c14
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c35
-rw-r--r--src/mesa/state_tracker/st_cb_xformfb.c61
-rw-r--r--src/mesa/state_tracker/st_cb_xformfb.h4
-rw-r--r--src/mesa/state_tracker/st_context.c9
-rw-r--r--src/mesa/state_tracker/st_context.h33
-rw-r--r--src/mesa/state_tracker/st_draw.c6
-rw-r--r--src/mesa/state_tracker/st_draw.h2
-rw-r--r--src/mesa/state_tracker/st_draw_feedback.c1
-rw-r--r--src/mesa/state_tracker/st_extensions.c66
-rw-r--r--src/mesa/state_tracker/st_glsl_to_tgsi.cpp251
-rw-r--r--src/mesa/state_tracker/st_manager.c1
-rw-r--r--src/mesa/state_tracker/st_program.c495
-rw-r--r--src/mesa/state_tracker/st_program.h122
-rw-r--r--src/mesa/state_tracker/st_texture.c10
-rw-r--r--src/mesa/swrast/s_aaline.c28
-rw-r--r--src/mesa/swrast/s_aalinetemp.h4
-rw-r--r--src/mesa/swrast/s_atifragshader.c4
-rw-r--r--src/mesa/swrast/s_copypix.c22
-rw-r--r--src/mesa/swrast/s_depth.c14
-rw-r--r--src/mesa/swrast/s_drawpix.c12
-rw-r--r--src/mesa/swrast/s_fragprog.c4
-rw-r--r--src/mesa/swrast/s_lines.c4
-rw-r--r--src/mesa/swrast/s_points.c10
-rw-r--r--src/mesa/swrast/s_span.c10
-rw-r--r--src/mesa/swrast/s_texcombine.c6
-rw-r--r--src/mesa/swrast/s_texfilter.c64
-rw-r--r--src/mesa/swrast/s_tritemp.h2
-rw-r--r--src/mesa/swrast/s_zoom.c2
-rw-r--r--src/mesa/swrast_setup/ss_tritmp.h4
-rw-r--r--src/mesa/tnl/t_context.c2
-rw-r--r--src/mesa/tnl/t_draw.c5
-rw-r--r--src/mesa/tnl/t_rasterpos.c8
-rw-r--r--src/mesa/tnl/t_vb_fog.c6
-rw-r--r--src/mesa/tnl/t_vb_light.c22
-rw-r--r--src/mesa/tnl/t_vb_lighttmp.h16
-rw-r--r--src/mesa/tnl/t_vb_normals.c4
-rw-r--r--src/mesa/tnl/t_vb_render.c2
-rw-r--r--src/mesa/tnl/t_vertex_generic.c2
-rw-r--r--src/mesa/tnl/t_vertex_sse.c2
-rw-r--r--src/mesa/tnl/tnl.h3
-rw-r--r--src/mesa/tnl_dd/t_dd_dmatmp.h2
-rw-r--r--src/mesa/tnl_dd/t_dd_unfilled.h2
-rw-r--r--src/mesa/vbo/vbo.h3
-rw-r--r--src/mesa/vbo/vbo_context.c6
-rw-r--r--src/mesa/vbo/vbo_exec_array.c82
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c2
-rw-r--r--src/mesa/vbo/vbo_primitive_restart.c4
-rw-r--r--src/mesa/vbo/vbo_rebase.c2
-rw-r--r--src/mesa/vbo/vbo_save_draw.c2
-rw-r--r--src/mesa/vbo/vbo_split_copy.c2
-rw-r--r--src/mesa/vbo/vbo_split_inplace.c2
373 files changed, 14809 insertions, 6163 deletions
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
index 4ba5b2fac29..eb4a3da3c84 100644
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
SUBDIRS = . main/tests
if HAVE_X11_DRIVER
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 83f500fbf20..ed9848c5454 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -407,6 +407,7 @@ STATETRACKER_FILES = \
state_tracker/st_atom_shader.c \
state_tracker/st_atom_shader.h \
state_tracker/st_atom_stipple.c \
+ state_tracker/st_atom_tess.c \
state_tracker/st_atom_texture.c \
state_tracker/st_atom_viewport.c \
state_tracker/st_cache.h \
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 71c1a763912..6fe42b1775c 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -94,14 +94,14 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->QuerySamplesForFormat = _mesa_query_samples_for_format;
driver->TexImage = _mesa_store_teximage;
driver->TexSubImage = _mesa_store_texsubimage;
- driver->GetTexImage = _mesa_meta_GetTexImage;
+ driver->GetTexSubImage = _mesa_meta_GetTexSubImage;
driver->ClearTexSubImage = _mesa_meta_ClearTexSubImage;
driver->CopyTexSubImage = _mesa_meta_CopyTexSubImage;
driver->GenerateMipmap = _mesa_meta_GenerateMipmap;
driver->TestProxyTexImage = _mesa_test_proxy_teximage;
driver->CompressedTexImage = _mesa_store_compressed_teximage;
driver->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
- driver->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw;
+ driver->GetCompressedTexSubImage = _mesa_GetCompressedTexSubImage_sw;
driver->BindTexture = NULL;
driver->NewTextureObject = _mesa_new_texture_object;
driver->DeleteTexture = _mesa_delete_texture_object;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 214a68a9129..bde544ef490 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -728,7 +728,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
save->DepthNear = ctx->ViewportArray[0].Near;
save->DepthFar = ctx->ViewportArray[0].Far;
/* set depth range to default */
- _mesa_DepthRange(0.0, 1.0);
+ _mesa_set_depth_range(ctx, 0, 0.0, 1.0);
}
if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
@@ -945,6 +945,8 @@ _mesa_meta_end(struct gl_context *ctx)
if (state & MESA_META_SHADER) {
static const GLenum targets[] = {
GL_VERTEX_SHADER,
+ GL_TESS_CONTROL_SHADER,
+ GL_TESS_EVALUATION_SHADER,
GL_GEOMETRY_SHADER,
GL_FRAGMENT_SHADER,
};
@@ -1129,7 +1131,7 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_set_viewport(ctx, 0, save->ViewportX, save->ViewportY,
save->ViewportW, save->ViewportH);
}
- _mesa_DepthRange(save->DepthNear, save->DepthFar);
+ _mesa_set_depth_range(ctx, 0, save->DepthNear, save->DepthFar);
}
if (state & MESA_META_CLAMP_FRAGMENT_COLOR &&
@@ -2449,30 +2451,53 @@ _mesa_meta_Bitmap(struct gl_context *ctx,
/**
* Compute the texture coordinates for the four vertices of a quad for
- * drawing a 2D texture image or slice of a cube/3D texture.
+ * drawing a 2D texture image or slice of a cube/3D texture. The offset
+ * and width, height specify a sub-region of the 2D image.
+ *
* \param faceTarget GL_TEXTURE_1D/2D/3D or cube face name
* \param slice slice of a 1D/2D array texture or 3D texture
- * \param width width of the texture image
- * \param height height of the texture image
+ * \param xoffset X position of sub texture
+ * \param yoffset Y position of sub texture
+ * \param width width of the sub texture image
+ * \param height height of the sub texture image
+ * \param total_width total width of the texture image
+ * \param total_height total height of the texture image
+ * \param total_depth total depth of the texture image
* \param coords0/1/2/3 returns the computed texcoords
*/
void
_mesa_meta_setup_texture_coords(GLenum faceTarget,
GLint slice,
+ GLint xoffset,
+ GLint yoffset,
GLint width,
GLint height,
- GLint depth,
+ GLint total_width,
+ GLint total_height,
+ GLint total_depth,
GLfloat coords0[4],
GLfloat coords1[4],
GLfloat coords2[4],
GLfloat coords3[4])
{
- static const GLfloat st[4][2] = {
- {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
- };
+ float st[4][2];
GLuint i;
+ const float s0 = (float) xoffset / (float) total_width;
+ const float s1 = (float) (xoffset + width) / (float) total_width;
+ const float t0 = (float) yoffset / (float) total_height;
+ const float t1 = (float) (yoffset + height) / (float) total_height;
GLfloat r;
+ /* setup the reference texcoords */
+ st[0][0] = s0;
+ st[0][1] = t0;
+ st[1][0] = s1;
+ st[1][1] = t0;
+ st[2][0] = s1;
+ st[2][1] = t1;
+ st[3][0] = s0;
+ st[3][1] = t1;
+
if (faceTarget == GL_TEXTURE_CUBE_MAP_ARRAY)
faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + slice % 6;
@@ -2489,52 +2514,52 @@ _mesa_meta_setup_texture_coords(GLenum faceTarget,
case GL_TEXTURE_3D:
case GL_TEXTURE_2D_ARRAY:
if (faceTarget == GL_TEXTURE_3D) {
- assert(slice < depth);
- assert(depth >= 1);
- r = (slice + 0.5f) / depth;
+ assert(slice < total_depth);
+ assert(total_depth >= 1);
+ r = (slice + 0.5f) / total_depth;
}
else if (faceTarget == GL_TEXTURE_2D_ARRAY)
r = (float) slice;
else
r = 0.0F;
- coords0[0] = 0.0F; /* s */
- coords0[1] = 0.0F; /* t */
+ coords0[0] = st[0][0]; /* s */
+ coords0[1] = st[0][1]; /* t */
coords0[2] = r; /* r */
- coords1[0] = 1.0F;
- coords1[1] = 0.0F;
+ coords1[0] = st[1][0];
+ coords1[1] = st[1][1];
coords1[2] = r;
- coords2[0] = 1.0F;
- coords2[1] = 1.0F;
+ coords2[0] = st[2][0];
+ coords2[1] = st[2][1];
coords2[2] = r;
- coords3[0] = 0.0F;
- coords3[1] = 1.0F;
+ coords3[0] = st[3][0];
+ coords3[1] = st[3][1];
coords3[2] = r;
break;
case GL_TEXTURE_RECTANGLE_ARB:
- coords0[0] = 0.0F; /* s */
- coords0[1] = 0.0F; /* t */
+ coords0[0] = (float) xoffset; /* s */
+ coords0[1] = (float) yoffset; /* t */
coords0[2] = 0.0F; /* r */
- coords1[0] = (float) width;
- coords1[1] = 0.0F;
+ coords1[0] = (float) (xoffset + width);
+ coords1[1] = (float) yoffset;
coords1[2] = 0.0F;
- coords2[0] = (float) width;
- coords2[1] = (float) height;
+ coords2[0] = (float) (xoffset + width);
+ coords2[1] = (float) (yoffset + height);
coords2[2] = 0.0F;
- coords3[0] = 0.0F;
- coords3[1] = (float) height;
+ coords3[0] = (float) xoffset;
+ coords3[1] = (float) (yoffset + height);
coords3[2] = 0.0F;
break;
case GL_TEXTURE_1D_ARRAY:
- coords0[0] = 0.0F; /* s */
+ coords0[0] = st[0][0]; /* s */
coords0[1] = (float) slice; /* t */
coords0[2] = 0.0F; /* r */
- coords1[0] = 1.0f;
+ coords1[0] = st[1][0];
coords1[1] = (float) slice;
coords1[2] = 0.0F;
- coords2[0] = 1.0F;
+ coords2[0] = st[2][0];
coords2[1] = (float) slice;
coords2[2] = 0.0F;
- coords3[0] = 0.0F;
+ coords3[0] = st[3][0];
coords3[1] = (float) slice;
coords3[2] = 0.0F;
break;
@@ -2943,15 +2968,14 @@ static bool
decompress_texture_image(struct gl_context *ctx,
struct gl_texture_image *texImage,
GLuint slice,
+ GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height,
GLenum destFormat, GLenum destType,
GLvoid *dest)
{
struct decompress_state *decompress = &ctx->Meta->Decompress;
struct decompress_fbo_state *decompress_fbo;
struct gl_texture_object *texObj = texImage->TexObject;
- const GLint width = texImage->Width;
- const GLint height = texImage->Height;
- const GLint depth = texImage->Height;
const GLenum target = texObj->Target;
GLenum rbFormat;
GLenum faceTarget;
@@ -3069,7 +3093,10 @@ decompress_texture_image(struct gl_context *ctx,
/* Silence valgrind warnings about reading uninitialized stack. */
memset(verts, 0, sizeof(verts));
- _mesa_meta_setup_texture_coords(faceTarget, slice, width, height, depth,
+ _mesa_meta_setup_texture_coords(faceTarget, slice,
+ xoffset, yoffset, width, height,
+ texImage->Width, texImage->Height,
+ texImage->Depth,
verts[0].tex,
verts[1].tex,
verts[2].tex,
@@ -3123,7 +3150,7 @@ decompress_texture_image(struct gl_context *ctx,
/* read pixels from renderbuffer */
{
GLenum baseTexFormat = texImage->_BaseFormat;
- GLenum destBaseFormat = _mesa_base_tex_format(ctx, destFormat);
+ GLenum destBaseFormat = _mesa_unpack_format_to_base_format(destFormat);
/* The pixel transfer state will be set to default values at this point
* (see MESA_META_PIXEL_TRANSFER) so pixel transfer ops are effectively
@@ -3132,19 +3159,13 @@ decompress_texture_image(struct gl_context *ctx,
* returned as red and two-channel texture values are returned as
* red/alpha.
*/
- if ((baseTexFormat == GL_LUMINANCE ||
- baseTexFormat == GL_LUMINANCE_ALPHA ||
- baseTexFormat == GL_INTENSITY) ||
+ if (_mesa_need_luminance_to_rgb_conversion(baseTexFormat,
+ destBaseFormat) ||
/* If we're reading back an RGB(A) texture (using glGetTexImage) as
* luminance then we need to return L=tex(R).
*/
- ((baseTexFormat == GL_RGBA ||
- baseTexFormat == GL_RGB ||
- baseTexFormat == GL_RG) &&
- (destBaseFormat == GL_LUMINANCE ||
- destBaseFormat == GL_LUMINANCE_ALPHA ||
- destBaseFormat == GL_LUMINANCE_INTEGER_EXT ||
- destBaseFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT))) {
+ _mesa_need_rgb_to_luminance_conversion(baseTexFormat,
+ destBaseFormat)) {
/* Green and blue must be zero */
_mesa_PixelTransferf(GL_GREEN_SCALE, 0.0f);
_mesa_PixelTransferf(GL_BLUE_SCALE, 0.0f);
@@ -3171,15 +3192,17 @@ decompress_texture_image(struct gl_context *ctx,
* from core Mesa.
*/
void
-_mesa_meta_GetTexImage(struct gl_context *ctx,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage)
+_mesa_meta_GetTexSubImage(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLvoid *pixels,
+ struct gl_texture_image *texImage)
{
if (_mesa_is_format_compressed(texImage->TexFormat)) {
GLuint slice;
bool result = true;
- for (slice = 0; slice < texImage->Depth; slice++) {
+ for (slice = 0; slice < depth; slice++) {
void *dst;
if (texImage->TexObject->Target == GL_TEXTURE_2D_ARRAY
|| texImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
@@ -3191,14 +3214,14 @@ _mesa_meta_GetTexImage(struct gl_context *ctx,
struct gl_pixelstore_attrib packing = ctx->Pack;
packing.SkipPixels = 0;
packing.SkipRows = 0;
- dst = _mesa_image_address3d(&packing, pixels, texImage->Width,
- texImage->Height, format, type,
- slice, 0, 0);
+ dst = _mesa_image_address3d(&packing, pixels, width, height,
+ format, type, slice, 0, 0);
}
else {
dst = pixels;
}
result = decompress_texture_image(ctx, texImage, slice,
+ xoffset, yoffset, width, height,
format, type, dst);
if (!result)
break;
@@ -3208,7 +3231,8 @@ _mesa_meta_GetTexImage(struct gl_context *ctx,
return;
}
- _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage);
+ _mesa_GetTexSubImage_sw(ctx, xoffset, yoffset, zoffset,
+ width, height, depth, format, type, pixels, texImage);
}
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index e7d894df1d7..fe439153aa0 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -560,9 +560,11 @@ _mesa_meta_ClearTexSubImage(struct gl_context *ctx,
const GLvoid *clearValue);
extern void
-_mesa_meta_GetTexImage(struct gl_context *ctx,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage);
+_mesa_meta_GetTexSubImage(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLvoid *pixels,
+ struct gl_texture_image *texImage);
extern void
_mesa_meta_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
@@ -594,9 +596,13 @@ _mesa_meta_alloc_texture(struct temp_texture *tex,
void
_mesa_meta_setup_texture_coords(GLenum faceTarget,
GLint slice,
+ GLint xoffset,
+ GLint yoffset,
GLint width,
GLint height,
- GLint depth,
+ GLint total_width,
+ GLint total_height,
+ GLint total_depth,
GLfloat coords0[4],
GLfloat coords1[4],
GLfloat coords2[4],
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 9cace2b245a..71d18de87db 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -82,7 +82,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
y_scale = samples * 0.5;
/* We expect only power of 2 samples in source multisample buffer. */
- assert(samples > 0 && is_power_of_two(samples));
+ assert(samples > 0 && _mesa_is_pow_two(samples));
while (samples >> (shader_offset + 1)) {
shader_offset++;
}
@@ -263,7 +263,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
}
/* We expect only power of 2 samples in source multisample buffer. */
- assert(samples > 0 && is_power_of_two(samples));
+ assert(samples > 0 && _mesa_is_pow_two(samples));
while (samples >> (shader_offset + 1)) {
shader_offset++;
}
@@ -312,7 +312,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
break;
default:
_mesa_problem(ctx, "Unkown texture target %s\n",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_RESOLVE;
}
@@ -434,7 +434,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
* (so the floating point exponent just gets increased), rather than
* doing a naive sum and dividing.
*/
- assert(is_power_of_two(samples));
+ assert(_mesa_is_pow_two(samples));
/* Fetch each individual sample. */
sample_resolve = rzalloc_size(mem_ctx, 1);
for (i = 0; i < samples; i++) {
diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c
index 1729766f78d..149ed18503c 100644
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -138,8 +138,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
goto cleanup;
}
- /* We really only need to stash the bound framebuffers. */
- _mesa_meta_begin(ctx, 0);
+ /* We really only need to stash the bound framebuffers and scissor. */
+ _mesa_meta_begin(ctx, MESA_META_SCISSOR);
_mesa_GenFramebuffers(2, fbos);
_mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index c1b6d3c1f86..0655f052219 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -66,7 +66,7 @@ fallback_required(struct gl_context *ctx, GLenum target,
if (target == GL_TEXTURE_3D) {
_mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,
"glGenerateMipmap() to %s target\n",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return true;
}
@@ -317,7 +317,9 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
/* Setup texture coordinates */
_mesa_meta_setup_texture_coords(faceTarget,
layer,
- 0, 0, 1, /* width, height never used here */
+ 0, 0, /* xoffset, yoffset */
+ srcWidth, srcHeight, /* img size */
+ srcWidth, srcHeight, srcDepth,
verts[0].tex,
verts[1].tex,
verts[2].tex,
diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c
index d2474f52718..16d8f5d4747 100644
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -25,8 +25,10 @@
* Jason Ekstrand <[email protected]>
*/
+#include "blend.h"
#include "bufferobj.h"
#include "buffers.h"
+#include "clear.h"
#include "fbobject.h"
#include "glformats.h"
#include "glheader.h"
@@ -248,6 +250,24 @@ fail:
return success;
}
+static bool
+need_signed_unsigned_int_conversion(mesa_format rbFormat,
+ GLenum format, GLenum type)
+{
+ const GLenum srcType = _mesa_get_format_datatype(rbFormat);
+ const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
+ return (srcType == GL_INT &&
+ is_dst_format_integer &&
+ (type == GL_UNSIGNED_INT ||
+ type == GL_UNSIGNED_SHORT ||
+ type == GL_UNSIGNED_BYTE)) ||
+ (srcType == GL_UNSIGNED_INT &&
+ is_dst_format_integer &&
+ (type == GL_INT ||
+ type == GL_SHORT ||
+ type == GL_BYTE));
+}
+
bool
_mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *tex_image,
@@ -260,8 +280,10 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
int full_height, image_height;
struct gl_texture_image *pbo_tex_image;
struct gl_renderbuffer *rb = NULL;
- GLenum status;
- bool success = false;
+ GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
+ GLenum status, src_base_format;
+ bool success = false, clear_channels_to_zero = false;
+ float save_clear_color[4];
int z;
if (!_mesa_is_bufferobj(packing->BufferObj))
@@ -273,13 +295,27 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
format == GL_COLOR_INDEX)
return false;
- if (ctx->_ImageTransferState)
- return false;
-
-
+ /* Don't use meta path for readpixels in below conditions. */
if (!tex_image) {
rb = ctx->ReadBuffer->_ColorReadBuffer;
- if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format))
+
+ /* _mesa_get_readpixels_transfer_ops() includes the cases of read
+ * color clamping along with the ctx->_ImageTransferState.
+ */
+ if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
+ type, GL_FALSE))
+ return false;
+
+ if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
+ dstBaseFormat))
+ return false;
+
+ /* This function rely on BlitFramebuffer to fill in the pixel data for
+ * ReadPixels. But, BlitFrameBuffer doesn't support signed to unsigned
+ * or unsigned to signed integer conversions. OpenGL spec expects an
+ * invalid operation in that case.
+ */
+ if (need_signed_unsigned_int_conversion(rb->Format, format, type))
return false;
}
@@ -300,6 +336,10 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
_mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER |
MESA_META_PIXEL_STORE));
+ /* GL_CLAMP_FRAGMENT_COLOR doesn't affect ReadPixels and GettexImage */
+ if (ctx->Extensions.ARB_color_buffer_float)
+ _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
+
_mesa_GenFramebuffers(2, fbos);
if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
@@ -345,6 +385,27 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
GL_COLOR_BUFFER_BIT, GL_NEAREST))
goto fail;
+ src_base_format = tex_image ?
+ tex_image->_BaseFormat :
+ ctx->ReadBuffer->_ColorReadBuffer->_BaseFormat;
+
+ /* Depending on the base formats involved we might need to rebase some
+ * values. For example if we download from a Luminance format to RGBA
+ * format, we want G=0 and B=0.
+ */
+ clear_channels_to_zero =
+ _mesa_need_luminance_to_rgb_conversion(src_base_format,
+ pbo_tex_image->_BaseFormat);
+
+ if (clear_channels_to_zero) {
+ memcpy(save_clear_color, ctx->Color.ClearColor.f, 4 * sizeof(float));
+ /* Clear the Green, Blue channels. */
+ _mesa_ColorMask(GL_FALSE, GL_TRUE, GL_TRUE,
+ src_base_format != GL_LUMINANCE_ALPHA);
+ _mesa_ClearColor(0.0, 0.0, 0.0, 1.0);
+ _mesa_Clear(GL_COLOR_BUFFER_BIT);
+ }
+
for (z = 1; z < depth; z++) {
_mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
tex_image, zoffset + z);
@@ -357,6 +418,15 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
0, z * image_height,
width, z * image_height + height,
GL_COLOR_BUFFER_BIT, GL_NEAREST);
+ if (clear_channels_to_zero)
+ _mesa_Clear(GL_COLOR_BUFFER_BIT);
+ }
+
+ /* Unmask the color channels and restore the saved clear color values. */
+ if (clear_channels_to_zero) {
+ _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ _mesa_ClearColor(save_clear_color[0], save_clear_color[1],
+ save_clear_color[2], save_clear_color[3]);
}
success = true;
diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk
index 6986f5e8cb4..f1a733011b9 100644
--- a/src/mesa/drivers/dri/common/Android.mk
+++ b/src/mesa/drivers/dri/common/Android.mk
@@ -43,13 +43,6 @@ LOCAL_EXPORT_C_INCLUDE_DIRS := \
$(LOCAL_PATH) \
$(intermediates)
-# swrast only
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H
-else
-LOCAL_SHARED_LIBRARIES := libdrm
-endif
-
LOCAL_SRC_FILES := \
$(DRI_COMMON_FILES) \
$(XMLCONFIG_FILES)
@@ -110,13 +103,6 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
LOCAL_C_INCLUDES := \
$(MESA_DRI_C_INCLUDES)
-# swrast only
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H
-else
-LOCAL_SHARED_LIBRARIES := libdrm
-endif
-
LOCAL_SRC_FILES := $(megadriver_stub_FILES)
include $(MESA_COMMON_MK)
diff --git a/src/mesa/drivers/dri/common/Makefile.am b/src/mesa/drivers/dri/common/Makefile.am
index ae19fcb3565..b307f10f56b 100644
--- a/src/mesa/drivers/dri/common/Makefile.am
+++ b/src/mesa/drivers/dri/common/Makefile.am
@@ -32,6 +32,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/mesa/ \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
+ $(LIBDRM_CFLAGS) \
$(DEFINES) \
$(VISIBILITY_CFLAGS)
@@ -53,10 +54,3 @@ libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN
libmegadriver_stub_la_SOURCES = $(megadriver_stub_FILES)
sysconf_DATA = drirc
-
-if DRICOMMON_NEED_LIBDRM
-AM_CFLAGS += $(LIBDRM_CFLAGS)
-libdricommon_la_LIBADD = $(LIBDRM_LIBS)
-else
-AM_CFLAGS += -D__NOT_HAVE_DRM_H
-endif
diff --git a/src/mesa/drivers/dri/common/SConscript b/src/mesa/drivers/dri/common/SConscript
index b402736db69..52d201f8913 100644
--- a/src/mesa/drivers/dri/common/SConscript
+++ b/src/mesa/drivers/dri/common/SConscript
@@ -32,11 +32,6 @@ drienv.AppendUnique(LIBS = [
'expat',
])
-# if HAVE_DRI2
-drienv.PkgUseModules('DRM')
-# else
-#env.Append(CPPDEFINES = ['__NOT_HAVE_DRM_H'])
-
sources = drienv.ParseSourceList('Makefile.sources', ['DRI_COMMON_FILES', 'XMLCONFIG_FILES' ])
dri_common = drienv.ConvenienceLibrary(
@@ -57,7 +52,6 @@ env.Append(CPPPATH = [
])
env.Append(CPPDEFINES = [
- '__NOT_HAVE_DRM_H',
'HAVE_DLADDR',
])
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index e7ababe0b67..d35ac263a45 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -40,13 +40,9 @@
#include <stdbool.h>
-#ifndef __NOT_HAVE_DRM_H
-#include <xf86drm.h>
-#endif
#include "dri_util.h"
#include "utils.h"
#include "xmlpool.h"
-#include "../glsl/glsl_parser_extras.h"
#include "main/mtypes.h"
#include "main/version.h"
#include "main/errors.h"
@@ -138,18 +134,6 @@ driCreateNewScreen2(int scrn, int fd,
setupLoaderExtensions(psp, extensions);
-#ifndef __NOT_HAVE_DRM_H
- if (fd != -1) {
- drmVersionPtr version = drmGetVersion(fd);
- if (version) {
- psp->drm_version.major = version->version_major;
- psp->drm_version.minor = version->version_minor;
- psp->drm_version.patch = version->version_patchlevel;
- drmFreeVersion(version);
- }
- }
-#endif
-
psp->loaderPrivate = data;
psp->extensions = emptyExtensionList;
@@ -179,7 +163,9 @@ driCreateNewScreen2(int scrn, int fd,
}
}
- psp->api_mask = (1 << __DRI_API_OPENGL);
+ psp->api_mask = 0;
+ if (psp->max_gl_compat_version > 0)
+ psp->api_mask |= (1 << __DRI_API_OPENGL);
if (psp->max_gl_core_version > 0)
psp->api_mask |= (1 << __DRI_API_OPENGL_CORE);
if (psp->max_gl_es1_version > 0)
@@ -238,8 +224,6 @@ static void driDestroyScreen(__DRIscreen *psp)
* stream open to the X-server anymore.
*/
- _mesa_destroy_shader_compiler();
-
psp->driver->DestroyScreen(psp);
driDestroyOptionCache(&psp->optionCache);
diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h
index 1138bf106de..6987f555e66 100644
--- a/src/mesa/drivers/dri/common/dri_util.h
+++ b/src/mesa/drivers/dri/common/dri_util.h
@@ -149,11 +149,6 @@ struct __DRIscreenRec {
int fd;
/**
- * DRM (kernel module) version information.
- */
- __DRIversion drm_version;
-
- /**
* Device-dependent private information (not stored in the SAREA).
*
* This pointer is never touched by the DRI layer.
diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc
index 145e707a64c..97d961b6597 100644
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -4,24 +4,15 @@
Application bugs worked around in this file:
============================================
+* Unigine Heaven 3.0 and older contain too many bugs and can't be supported
+ by drivers that want to be compliant.
+
* Various Unigine products don't use the #version and #extension GLSL
directives, meaning they only get GLSL 1.10 and no extensions for their
shaders.
Enabling all extensions for Unigine fixes most issues, but the GLSL version
is still 1.10.
-* Unigine Heaven 3.0 with ARB_texture_multisample uses a "ivec4 * vec4"
- expression, which is illegal in GLSL 1.10.
- Adding "#version 130" fixes this.
-
-* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses the uint keyword, which
- is illegal in GLSL 1.10.
- Adding "#version 130" fixes this.
-
-* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses a "uint & int"
- expression, which is illegal in any GLSL version.
- Disabling ARB_shader_bit_encoding fixes this.
-
* If ARB_sample_shading is supported, Unigine Heaven 4.0 and Valley 1.0 uses
an #extension directive in the middle of its shaders, which is illegal
in GLSL.
@@ -45,18 +36,10 @@ TODO: document the other workarounds.
</application>
<application name="Unigine Heaven (32-bit)" executable="heaven_x86">
- <option name="force_glsl_extensions_warn" value="true" />
- <option name="disable_blend_func_extended" value="true" />
- <option name="force_glsl_version" value="130" />
- <option name="disable_shader_bit_encoding" value="true" />
<option name="allow_glsl_extension_directive_midshader" value="true" />
</application>
<application name="Unigine Heaven (64-bit)" executable="heaven_x64">
- <option name="force_glsl_extensions_warn" value="true" />
- <option name="disable_blend_func_extended" value="true" />
- <option name="force_glsl_version" value="130" />
- <option name="disable_shader_bit_encoding" value="true" />
<option name="allow_glsl_extension_directive_midshader" value="true" />
</application>
diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
index 70d34e8ce55..b51b263fe46 100644
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -213,6 +213,7 @@ driCreateConfigs(mesa_format format,
masks = masks_table[0];
break;
case MESA_FORMAT_B8G8R8X8_UNORM:
+ case MESA_FORMAT_B8G8R8X8_SRGB:
masks = masks_table[1];
break;
case MESA_FORMAT_B8G8R8A8_UNORM:
diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c
index ea54e2b25b1..906e942b020 100644
--- a/src/mesa/drivers/dri/i915/i830_state.c
+++ b/src/mesa/drivers/dri/i915/i830_state.c
@@ -57,7 +57,7 @@ i830StencilFuncSeparate(struct gl_context * ctx, GLenum face, GLenum func, GLint
mask = mask & 0xff;
DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __func__,
- _mesa_lookup_enum_by_nr(func), ref, mask);
+ _mesa_enum_to_string(func), ref, mask);
I830_STATECHANGE(i830, I830_UPLOAD_CTX);
@@ -95,9 +95,9 @@ i830StencilOpSeparate(struct gl_context * ctx, GLenum face, GLenum fail, GLenum
int fop, dfop, dpop;
DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __func__,
- _mesa_lookup_enum_by_nr(fail),
- _mesa_lookup_enum_by_nr(zfail),
- _mesa_lookup_enum_by_nr(zpass));
+ _mesa_enum_to_string(fail),
+ _mesa_enum_to_string(zfail),
+ _mesa_enum_to_string(zpass));
fop = 0;
dfop = 0;
@@ -389,8 +389,8 @@ static void
i830BlendEquationSeparate(struct gl_context * ctx, GLenum modeRGB, GLenum modeA)
{
DBG("%s -> %s, %s\n", __func__,
- _mesa_lookup_enum_by_nr(modeRGB),
- _mesa_lookup_enum_by_nr(modeA));
+ _mesa_enum_to_string(modeRGB),
+ _mesa_enum_to_string(modeA));
(void) modeRGB;
(void) modeA;
@@ -403,10 +403,10 @@ i830BlendFuncSeparate(struct gl_context * ctx, GLenum sfactorRGB,
GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
{
DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __func__,
- _mesa_lookup_enum_by_nr(sfactorRGB),
- _mesa_lookup_enum_by_nr(dfactorRGB),
- _mesa_lookup_enum_by_nr(sfactorA),
- _mesa_lookup_enum_by_nr(dfactorA));
+ _mesa_enum_to_string(sfactorRGB),
+ _mesa_enum_to_string(dfactorRGB),
+ _mesa_enum_to_string(sfactorA),
+ _mesa_enum_to_string(dfactorA));
(void) sfactorRGB;
(void) dfactorRGB;
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 42ea54e087d..57b033c07ea 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -255,6 +255,8 @@ i915CreateContext(int api,
* FINISHME: vertex shaders?
*/
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true;
+ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler =
+ true;
struct gl_shader_compiler_options *const fs_options =
& ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT];
@@ -266,6 +268,7 @@ i915CreateContext(int api,
fs_options->EmitNoIndirectOutput = true;
fs_options->EmitNoIndirectUniform = true;
fs_options->EmitNoIndirectTemp = true;
+ fs_options->EmitNoIndirectSampler = true;
ctx->Const.MaxDrawBuffers = 1;
ctx->Const.QueryCounterBits.SamplesPassed = 0;
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
index 5f10b840b1a..4c83073e692 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -402,7 +402,7 @@ void
intelCalcViewport(struct gl_context * ctx)
{
struct intel_context *intel = intel_context(ctx);
- double scale[3], translate[3];
+ float scale[3], translate[3];
_mesa_get_viewport_xform(ctx, 0, scale, translate);
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
index aef5ff99eb2..f653f441ad8 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -342,7 +342,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
* Thus, I guess we need do this for other platforms as well.
*/
if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
- !is_power_of_two(firstImage->Height))
+ !_mesa_is_pow_two(firstImage->Height))
return false;
state[I915_TEXREG_SS3] = ss3; /* SS3_NORMALIZED_COORDS */
diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c
index 5618dcd8358..c780103228f 100644
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -428,7 +428,6 @@ intelInitContext(struct intel_context *intel,
driContextPriv->driverPrivate = intel;
intel->driContext = driContextPriv;
- intel->driFd = sPriv->fd;
intel->gen = intelScreen->gen;
diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h
index 350d35d9033..4ec4015d453 100644
--- a/src/mesa/drivers/dri/i915/intel_context.h
+++ b/src/mesa/drivers/dri/i915/intel_context.h
@@ -273,8 +273,6 @@ struct intel_context
bool use_early_z;
- int driFd;
-
__DRIcontext *driContext;
struct intel_screen *intelScreen;
diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c
index a5d5c5832fb..67013666377 100644
--- a/src/mesa/drivers/dri/i915/intel_fbo.c
+++ b/src/mesa/drivers/dri/i915/intel_fbo.c
@@ -216,7 +216,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend
intel_miptree_release(&irb->mt);
DBG("%s: %s: %s (%dx%d)\n", __func__,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(internalFormat),
_mesa_get_format_name(rb->Format), width, height);
if (width == 0 || height == 0)
diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
index e56b9859377..1aa06c18f15 100644
--- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -81,7 +81,7 @@ intel_miptree_create_layout(struct intel_context *intel,
return NULL;
DBG("%s target %s format %s level %d..%d <-- %p\n", __func__,
- _mesa_lookup_enum_by_nr(target),
+ _mesa_enum_to_string(target),
_mesa_get_format_name(format),
first_level, last_level, mt);
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index 0b0d48e1663..5962dad7d11 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -113,7 +113,7 @@ static void
intelDmaPrimitive(struct intel_context *intel, GLenum prim)
{
if (0)
- fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim));
+ fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim));
INTEL_FIREVERTICES(intel);
intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
intel_set_prim(intel, hw_prim[prim]);
diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c
index 01de966a134..0a213e9f614 100644
--- a/src/mesa/drivers/dri/i915/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_image.c
@@ -189,7 +189,7 @@ intelTexImage(struct gl_context * ctx,
const struct gl_pixelstore_attrib *unpack)
{
DBG("%s target %s level %d %dx%dx%d\n", __func__,
- _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+ _mesa_enum_to_string(texImage->TexObject->Target),
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
/* Attempt to use the blitter for PBO image uploads.
diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
index 2e02d50f13f..f11ef2ea329 100644
--- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
@@ -72,7 +72,7 @@ intel_blit_texsubimage(struct gl_context * ctx,
DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n",
__func__,
- _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+ _mesa_enum_to_string(texImage->TexObject->Target),
texImage->Level, xoffset, yoffset, width, height);
pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1,
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index 144f0fc911a..ae62a800fb7 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -1134,7 +1134,7 @@ intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, GLuint hwprim)
if (0)
fprintf(stderr, "%s %s %x\n", __func__,
- _mesa_lookup_enum_by_nr(rprim), hwprim);
+ _mesa_enum_to_string(rprim), hwprim);
intel->vtbl.reduced_primitive_state(intel, rprim);
@@ -1158,7 +1158,7 @@ intelRenderPrimitive(struct gl_context * ctx, GLenum prim)
ctx->Polygon.BackMode != GL_FILL);
if (0)
- fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim));
+ fprintf(stderr, "%s %s\n", __func__, _mesa_enum_to_string(prim));
/* Let some clipping routines know which primitive they're dealing
* with.
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 981fe79b132..dfdad75329d 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -60,6 +60,8 @@ i965_FILES = \
brw_fs_register_coalesce.cpp \
brw_fs_saturate_propagation.cpp \
brw_fs_sel_peephole.cpp \
+ brw_fs_surface_builder.cpp \
+ brw_fs_surface_builder.h \
brw_fs_vector_splitting.cpp \
brw_fs_visitor.cpp \
brw_gs.c \
@@ -86,6 +88,7 @@ i965_FILES = \
brw_object_purgeable.c \
brw_packed_float.c \
brw_performance_monitor.c \
+ brw_pipe_control.c \
brw_primitive_restart.c \
brw_program.c \
brw_program.h \
@@ -122,6 +125,8 @@ i965_FILES = \
brw_vec4.h \
brw_vec4_live_variables.cpp \
brw_vec4_live_variables.h \
+ brw_vec4_nir.cpp \
+ brw_vec4_gs_nir.cpp \
brw_vec4_reg_allocate.cpp \
brw_vec4_visitor.cpp \
brw_vec4_vp.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 98ff0ddcd58..b188fc7de57 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -44,6 +44,41 @@
#include "brw_state.h"
#include "intel_batchbuffer.h"
+static const GLuint stage_to_bt_edit[] = {
+ [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS,
+ [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS,
+ [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS,
+};
+
+static uint32_t
+reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
+{
+ /* From the Broadwell PRM, Volume 16, "Workarounds",
+ * WaStateBindingTableOverfetch:
+ * "HW over-fetches two cache lines of binding table indices. When
+ * using the resource streamer, SW needs to pad binding table pointer
+ * updates with an additional two cache lines."
+ *
+ * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
+ * the binding table pool buffer.
+ */
+ if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
+ gen7_reset_hw_bt_pool_offsets(brw);
+ }
+
+ uint32_t offset = brw->hw_bt_pool.next_offset;
+
+ /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
+ * 3DSTATE_BINDING_TABLE_POINTERS_xS:
+ *
+ * "If HW Binding Table is enabled, the offset is relative to the
+ * Binding Table Pool Base Address and the alignment is 64 bytes."
+ */
+ brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
+
+ return offset;
+}
+
/**
* Upload a shader stage's binding table as indirect state.
*
@@ -72,22 +107,41 @@ brw_upload_binding_table(struct brw_context *brw,
brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
brw->shader_time.bo->size, 1, true);
}
+ /* When RS is enabled use hw-binding table uploads, otherwise fallback to
+ * software-uploads.
+ */
+ if (brw->use_resource_streamer) {
+ gen7_update_binding_table_from_array(brw, stage_state->stage,
+ stage_state->surf_offset,
+ prog_data->binding_table
+ .size_bytes / 4);
+ } else {
+ uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ prog_data->binding_table.size_bytes,
+ 32,
+ &stage_state->bind_bo_offset);
- uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
- prog_data->binding_table.size_bytes, 32,
- &stage_state->bind_bo_offset);
-
- /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
- memcpy(bind, stage_state->surf_offset,
- prog_data->binding_table.size_bytes);
+ /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
+ memcpy(bind, stage_state->surf_offset,
+ prog_data->binding_table.size_bytes);
+ }
}
brw->ctx.NewDriverState |= brw_new_binding_table;
if (brw->gen >= 7) {
+ if (brw->use_resource_streamer) {
+ stage_state->bind_bo_offset =
+ reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
+ }
BEGIN_BATCH(2);
OUT_BATCH(packet_name << 16 | (2 - 2));
- OUT_BATCH(stage_state->bind_bo_offset);
+ /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
+ * when hw-generated binding table is enabled.
+ */
+ OUT_BATCH(brw->use_resource_streamer ?
+ (stage_state->bind_bo_offset >> 1) :
+ stage_state->bind_bo_offset);
ADVANCE_BATCH();
}
}
@@ -170,6 +224,158 @@ const struct brw_tracked_state brw_gs_binding_table = {
.emit = brw_gs_upload_binding_table,
};
+/**
+ * Edit a single entry in a hardware-generated binding table
+ */
+void
+gen7_edit_hw_binding_table_entry(struct brw_context *brw,
+ gl_shader_stage stage,
+ uint32_t index,
+ uint32_t surf_offset)
+{
+ assert(stage < ARRAY_SIZE(stage_to_bt_edit));
+ assert(stage_to_bt_edit[stage]);
+
+ uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
+ (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
+ HSW_SURFACE_STATE_EDIT(surf_offset));
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
+ OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
+ OUT_BATCH(dw2);
+ ADVANCE_BATCH();
+}
+
+/**
+ * Upload a whole hardware binding table for the given stage.
+ *
+ * Takes an array of surface offsets and the number of binding table
+ * entries.
+ */
+void
+gen7_update_binding_table_from_array(struct brw_context *brw,
+ gl_shader_stage stage,
+ const uint32_t* binding_table,
+ int num_surfaces)
+{
+ uint32_t dw2 = 0;
+
+ assert(stage < ARRAY_SIZE(stage_to_bt_edit));
+ assert(stage_to_bt_edit[stage]);
+
+ BEGIN_BATCH(num_surfaces + 2);
+ OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
+ OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
+ for (int i = 0; i < num_surfaces; i++) {
+ dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
+ (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
+ HSW_SURFACE_STATE_EDIT(binding_table[i]));
+ OUT_BATCH(dw2);
+ }
+ ADVANCE_BATCH();
+}
+
+/**
+ * Disable hardware binding table support, falling back to the
+ * older software-generated binding table mechanism.
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+ if (!brw->use_resource_streamer)
+ return;
+ /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ *
+ * "When switching between HW and SW binding table generation, SW must
+ * issue a state cache invalidate."
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+ int pkt_len = brw->gen >= 8 ? 4 : 3;
+
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+ if (brw->gen >= 8) {
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ } else {
+ OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+}
+
+/**
+ * Enable hardware binding tables and set up the binding table pool.
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+ if (!brw->use_resource_streamer)
+ return;
+
+ if (!brw->hw_bt_pool.bo) {
+ /* We use a single re-usable buffer object for the lifetime of the
+ * context and size it to maximum allowed binding tables that can be
+ * programmed per batch:
+ *
+ * From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
+ */
+ static const int max_size = 16383 * 4;
+ brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+ max_size, 64);
+ brw->hw_bt_pool.next_offset = 0;
+ }
+
+ /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
+ * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
+ *
+ * "When switching between HW and SW binding table generation, SW must
+ * issue a state cache invalidate."
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+
+ int pkt_len = brw->gen >= 8 ? 4 : 3;
+ uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
+ if (brw->is_haswell) {
+ dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
+ HSW_BT_POOL_ALLOC_MUST_BE_ONE;
+ } else if (brw->gen >= 8) {
+ dw1 |= BDW_MOCS_WB;
+ }
+
+ BEGIN_BATCH(pkt_len);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
+ if (brw->gen >= 8) {
+ OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_BATCH(brw->hw_bt_pool.bo->size);
+ } else {
+ OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+ OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ brw->hw_bt_pool.bo->size);
+ }
+ ADVANCE_BATCH();
+}
+
+void
+gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
+{
+ brw->hw_bt_pool.next_offset = 0;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
+ },
+ .emit = gen7_enable_hw_binding_tables
+};
+
/** @} */
/**
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index b404869f0c7..eac1f005496 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -220,13 +220,13 @@ brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
* data with different formats, which blorp does for stencil and depth
* data.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
retry:
intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
intel_batchbuffer_save_state(brw);
drm_intel_bo *saved_bo = brw->batch.bo;
- uint32_t saved_used = brw->batch.used;
+ uint32_t saved_used = USED_BATCH(brw->batch);
uint32_t saved_state_batch_offset = brw->batch.state_batch_offset;
switch (brw->gen) {
@@ -245,7 +245,7 @@ retry:
* reserved enough space that a wrap will never happen.
*/
assert(brw->batch.bo == saved_bo);
- assert((brw->batch.used - saved_used) * 4 +
+ assert((USED_BATCH(brw->batch) - saved_used) * 4 +
(saved_state_batch_offset - brw->batch.state_batch_offset) <
estimated_max_batch_usage);
/* Shut up compiler warnings on release build */
@@ -283,7 +283,7 @@ retry:
/* Flush the sampler cache so any texturing from the destination is
* coherent.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 1561b593969..205c905b447 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1285,8 +1285,8 @@ brw_blorp_blit_program::translate_dst_to_src()
/* Round the float coordinates down to nearest integer */
emit_rndd(Xp_f, X_f);
emit_rndd(Yp_f, Y_f);
- emit_mul(X_f, Xp_f, brw_imm_f(1 / key->x_scale));
- emit_mul(Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
+ emit_mul(X_f, Xp_f, brw_imm_f(1.0f / key->x_scale));
+ emit_mul(Y_f, Yp_f, brw_imm_f(1.0f / key->y_scale));
SWAP_XY_AND_XPYP();
} else if (!key->bilinear_filter) {
/* Round the float coordinates down to nearest integer by moving to
@@ -1442,7 +1442,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
for (int j = 0; j < 4; ++j) {
emit_mul(offset(texture_data[0], 2*j),
offset(vec8(texture_data[0]), 2*j),
- brw_imm_f(1.0/num_samples));
+ brw_imm_f(1.0f / num_samples));
}
}
@@ -1475,9 +1475,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
/* Compute pixel coordinates */
emit_add(vec16(x_sample_coords), Xp_f,
- brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale)));
+ brw_imm_f((float)(i & 0x1) * (1.0f / key->x_scale)));
emit_add(vec16(y_sample_coords), Yp_f,
- brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale)));
+ brw_imm_f((float)((i >> 1) & 0x1) * (1.0f / key->y_scale)));
emit_mov(vec16(X), x_sample_coords);
emit_mov(vec16(Y), y_sample_coords);
@@ -1789,7 +1789,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
* so 0.5 provides the necessary correction.
*/
multiplier = scale;
- offset = src0 + (-dst0 + 0.5) * scale;
+ offset = src0 + (-dst0 + 0.5f) * scale;
} else {
/* When mirroring X we need:
* src_x - src_x0 = dst_x1 - dst_x - 0.5
@@ -1797,7 +1797,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
* src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
*/
multiplier = -scale;
- offset = src0 + (dst1 - 0.5) * scale;
+ offset = src0 + (dst1 - 0.5f) * scale;
}
}
@@ -1952,8 +1952,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
/* Scaling factors used for bilinear filtering in multisample scaled
* blits.
*/
- wm_prog_key.x_scale = 2.0;
- wm_prog_key.y_scale = src_mt->num_samples / 2.0;
+ wm_prog_key.x_scale = 2.0f;
+ wm_prog_key.y_scale = src_mt->num_samples / 2.0f;
if (filter == GL_LINEAR && src.num_samples <= 1 && dst.num_samples <= 1)
wm_prog_key.bilinear_filter = true;
@@ -2000,9 +2000,9 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
x1 = wm_push_consts.dst_x1 = roundf(dst_x1);
y1 = wm_push_consts.dst_y1 = roundf(dst_y1);
wm_push_consts.rect_grid_x1 = (minify(src_mt->logical_width0, src_level) *
- wm_prog_key.x_scale - 1.0);
+ wm_prog_key.x_scale - 1.0f);
wm_push_consts.rect_grid_y1 = (minify(src_mt->logical_height0, src_level) *
- wm_prog_key.y_scale - 1.0);
+ wm_prog_key.y_scale - 1.0f);
wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 789520c7353..d458ad846bf 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -73,7 +73,7 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg &x,
emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)->predicate = BRW_PREDICATE_NORMAL;
emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)->predicate = BRW_PREDICATE_NORMAL;
- fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, g1, f0, g1);
+ fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, g1, f0, g1);
inst->force_writemask_all = true;
insts.push_tail(inst);
}
@@ -84,7 +84,7 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct brw_reg &dst,
unsigned base_mrf,
unsigned msg_length)
{
- fs_inst *inst = new (mem_ctx) fs_inst(op, dst, brw_message_reg(base_mrf),
+ fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf),
fs_reg(0u));
inst->base_mrf = base_mrf;
@@ -119,7 +119,8 @@ brw_blorp_eu_emitter::emit_combine(enum opcode combine_opcode,
{
assert(combine_opcode == BRW_OPCODE_ADD || combine_opcode == BRW_OPCODE_AVG);
- insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, dst, src_1, src_2));
+ insts.push_tail(new (mem_ctx) fs_inst(combine_opcode, 16, dst,
+ src_1, src_2));
}
fs_inst *
@@ -127,7 +128,7 @@ brw_blorp_eu_emitter::emit_cmp(enum brw_conditional_mod op,
const struct brw_reg &x,
const struct brw_reg &y)
{
- fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP,
+ fs_inst *cmp = new (mem_ctx) fs_inst(BRW_OPCODE_CMP, 16,
vec16(brw_null_reg()), x, y);
cmp->conditional_mod = op;
insts.push_tail(cmp);
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index f1f230e3751..91d53eff5a7 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -208,6 +208,7 @@ cfg_t::cfg_t(exec_list *instructions)
cur_else = cur;
next = new_block();
+ assert(cur_if != NULL);
cur_if->add_successor(mem_ctx, next);
set_next_block(&cur, next, ip);
@@ -274,6 +275,7 @@ cfg_t::cfg_t(exec_list *instructions)
inst->exec_node::remove();
cur->instructions.push_tail(inst);
+ assert(cur_do != NULL);
cur->add_successor(mem_ctx, cur_do);
next = new_block();
@@ -287,6 +289,7 @@ cfg_t::cfg_t(exec_list *instructions)
inst->exec_node::remove();
cur->instructions.push_tail(inst);
+ assert(cur_while != NULL);
cur->add_successor(mem_ctx, cur_while);
next = new_block();
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index 1d4ba3cac7e..f981388ef1a 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -184,7 +184,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
* must be issued before the rectangle primitive used for the depth
* buffer clear operation.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (fb->MaxNumLayers > 0) {
for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
@@ -204,7 +204,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
* by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
* followed by Depth FLUSH'
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index ebf12fab69e..328662da82e 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -506,6 +506,18 @@ brw_initialize_context_constants(struct brw_context *brw)
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
+
+ ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
+ BRW_MAX_IMAGES;
+ ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
+ (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
+ ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
+ BRW_MAX_IMAGES;
+ ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+ ctx->Const.MaxCombinedImageUnitsAndFragmentOutputs =
+ MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
+ ctx->Const.MaxImageSamples = 0;
+ ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
}
/* Gen6 converts quads to polygon in beginning of 3D pipeline,
@@ -716,6 +728,7 @@ brwCreateContext(gl_api api,
brw->is_baytrail = devinfo->is_baytrail;
brw->is_haswell = devinfo->is_haswell;
brw->is_cherryview = devinfo->is_cherryview;
+ brw->is_broxton = devinfo->is_broxton;
brw->has_llc = devinfo->has_llc;
brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
@@ -820,6 +833,12 @@ brwCreateContext(gl_api api,
}
}
+ if (brw_init_pipe_control(brw, devinfo)) {
+ *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+ intelDestroyContext(driContextPriv);
+ return false;
+ }
+
brw_init_state(brw);
#endif
@@ -867,6 +886,10 @@ brwCreateContext(gl_api api,
brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+ brw->use_resource_streamer = screen->has_resource_streamer &&
+ (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
+ brw_env_var_as_boolean("INTEL_USE_GATHER", false));
+
ctx->VertexProgram._MaintainTnlProgram = true;
ctx->FragmentProgram._MaintainTexEnvProgram = true;
@@ -935,6 +958,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
if (brw->wm.base.scratch_bo)
drm_intel_bo_unreference(brw->wm.base.scratch_bo);
+ gen7_reset_hw_bt_pool_offsets(brw);
+ drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+ brw->hw_bt_pool.bo = NULL;
+
drm_intel_gem_context_destroy(brw->hw_ctx);
if (ctx->swrast_context) {
@@ -946,6 +973,7 @@ intelDestroyContext(__DRIcontext * driContextPriv)
if (ctx->swrast_context)
_swrast_DestroyContext(&brw->ctx);
+ brw_fini_pipe_control(brw);
intel_batchbuffer_free(brw);
drm_intel_bo_unreference(brw->throttle_batch[1]);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 9e1f722df9e..1267a6f5a97 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -201,6 +201,7 @@ enum brw_state_id {
BRW_STATE_STATS_WM,
BRW_STATE_UNIFORM_BUFFER,
BRW_STATE_ATOMIC_BUFFER,
+ BRW_STATE_IMAGE_UNITS,
BRW_STATE_META_IN_PROGRESS,
BRW_STATE_INTERPOLATION_MAP,
BRW_STATE_PUSH_CONSTANT_ALLOCATION,
@@ -282,6 +283,7 @@ enum brw_state_id {
#define BRW_NEW_STATS_WM (1ull << BRW_STATE_STATS_WM)
#define BRW_NEW_UNIFORM_BUFFER (1ull << BRW_STATE_UNIFORM_BUFFER)
#define BRW_NEW_ATOMIC_BUFFER (1ull << BRW_STATE_ATOMIC_BUFFER)
+#define BRW_NEW_IMAGE_UNITS (1ull << BRW_STATE_IMAGE_UNITS)
#define BRW_NEW_META_IN_PROGRESS (1ull << BRW_STATE_META_IN_PROGRESS)
#define BRW_NEW_INTERPOLATION_MAP (1ull << BRW_STATE_INTERPOLATION_MAP)
#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
@@ -367,6 +369,7 @@ struct brw_stage_prog_data {
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
+ unsigned nr_image_params;
unsigned curb_read_length;
unsigned total_scratch;
@@ -387,6 +390,59 @@ struct brw_stage_prog_data {
*/
const gl_constant_value **param;
const gl_constant_value **pull_param;
+
+ /**
+ * Image metadata passed to the shader as uniforms. This is deliberately
+ * ignored by brw_stage_prog_data_compare() because its contents don't have
+ * any influence on program compilation.
+ */
+ struct brw_image_param *image_param;
+};
+
+/*
+ * Image metadata structure as laid out in the shader parameter
+ * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them. That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
+#define BRW_IMAGE_PARAM_TILING_OFFSET 16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
+#define BRW_IMAGE_PARAM_SIZE 24
+
+struct brw_image_param {
+ /** Surface binding table index. */
+ uint32_t surface_idx;
+
+ /** Offset applied to the X and Y surface coordinates. */
+ uint32_t offset[2];
+
+ /** Surface X, Y and Z dimensions. */
+ uint32_t size[3];
+
+ /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
+ * pixels, vertical slice stride in pixels.
+ */
+ uint32_t stride[4];
+
+ /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+ uint32_t tiling[3];
+
+ /**
+ * Right shift to apply for bit 6 address swizzling. Two different
+ * swizzles can be specified and will be applied one after the other. The
+ * resulting address will be:
+ *
+ * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
+ * (addr >> swizzling[1])))
+ *
+ * Use \c 0xff if any of the swizzles is not required.
+ */
+ uint32_t swizzling[2];
};
/* Data about a particular attempt to compile a program. Note that
@@ -416,11 +472,13 @@ struct brw_wm_prog_data {
uint8_t computed_depth_mode;
+ bool early_fragment_tests;
bool no_8;
bool dual_src_blend;
bool uses_pos_offset;
bool uses_omask;
bool uses_kill;
+ bool pulls_bary;
uint32_t prog_offset_16;
/**
@@ -874,11 +932,12 @@ struct intel_batchbuffer {
drm_intel_bo *bo;
/** Last BO submitted to the hardware. Used for glFinish(). */
drm_intel_bo *last_bo;
- /** BO for post-sync nonzero writes for gen6 workaround. */
- drm_intel_bo *workaround_bo;
+#ifdef DEBUG
uint16_t emit, total;
- uint16_t used, reserved_space;
+#endif
+ uint16_t reserved_space;
+ uint32_t *map_next;
uint32_t *map;
uint32_t *cpu_map;
#define BATCH_SZ (8192*sizeof(uint32_t))
@@ -887,10 +946,8 @@ struct intel_batchbuffer {
enum brw_gpu_ring ring;
bool needs_sol_reset;
- uint8_t pipe_controls_since_last_cs_stall;
-
struct {
- uint16_t used;
+ uint32_t *map_next;
int reloc_count;
} saved;
};
@@ -1040,6 +1097,10 @@ struct brw_context
drm_intel_context *hw_ctx;
+ /** BO for post-sync nonzero writes for gen6 workaround. */
+ drm_intel_bo *workaround_bo;
+ uint8_t pipe_controls_since_last_cs_stall;
+
/**
* Set of drm_intel_bo * that have been rendered to within this batchbuffer
* and would need flushing before being used from another cache domain that
@@ -1123,6 +1184,7 @@ struct brw_context
bool is_baytrail;
bool is_haswell;
bool is_cherryview;
+ bool is_broxton;
bool has_hiz;
bool has_separate_stencil;
@@ -1135,6 +1197,7 @@ struct brw_context
bool has_pln;
bool no_simd8;
bool use_rep_send;
+ bool use_resource_streamer;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
@@ -1241,12 +1304,12 @@ struct brw_context
* Platform specific constants containing the maximum number of threads
* for each pipeline stage.
*/
- int max_vs_threads;
- int max_hs_threads;
- int max_ds_threads;
- int max_gs_threads;
- int max_wm_threads;
- int max_cs_threads;
+ unsigned max_vs_threads;
+ unsigned max_hs_threads;
+ unsigned max_ds_threads;
+ unsigned max_gs_threads;
+ unsigned max_wm_threads;
+ unsigned max_cs_threads;
/* BRW_NEW_URB_ALLOCATIONS:
*/
@@ -1398,6 +1461,12 @@ struct brw_context
struct brw_cs_prog_data *prog_data;
} cs;
+ /* RS hardware binding table */
+ struct {
+ drm_intel_bo *bo;
+ uint32_t next_offset;
+ } hw_bt_pool;
+
struct {
uint32_t state_offset;
uint32_t blend_state_offset;
@@ -1453,8 +1522,8 @@ struct brw_context
} perfmon;
int num_atoms[BRW_NUM_PIPELINES];
- const struct brw_tracked_state render_atoms[57];
- const struct brw_tracked_state compute_atoms[3];
+ const struct brw_tracked_state render_atoms[60];
+ const struct brw_tracked_state compute_atoms[4];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
@@ -1732,11 +1801,17 @@ void brw_upload_abo_surfaces(struct brw_context *brw,
struct gl_shader_program *prog,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data);
+void brw_upload_image_surfaces(struct brw_context *brw,
+ struct gl_shader *shader,
+ struct brw_stage_state *stage_state,
+ struct brw_stage_prog_data *prog_data);
/* brw_surface_formats.c */
bool brw_render_target_supported(struct brw_context *brw,
struct gl_renderbuffer *rb);
uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
+mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+ mesa_format format);
/* brw_performance_monitor.c */
void brw_init_performance_monitors(struct brw_context *brw);
@@ -2013,6 +2088,21 @@ bool
gen9_use_linear_1d_layout(const struct brw_context *brw,
const struct intel_mipmap_tree *mt);
+/* brw_pipe_control.c */
+int brw_init_pipe_control(struct brw_context *brw,
+ const struct brw_device_info *info);
+void brw_fini_pipe_control(struct brw_context *brw);
+
+void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper);
+void brw_emit_mi_flush(struct brw_context *brw);
+void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void brw_emit_depth_stall_flushes(struct brw_context *brw);
+void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 42a082b57b6..6ce5779137e 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -82,7 +82,7 @@ brw_cs_emit(struct brw_context *brw,
prog_data->local_size[0] = cp->LocalSize[0];
prog_data->local_size[1] = cp->LocalSize[1];
prog_data->local_size[2] = cp->LocalSize[2];
- int local_workgroup_size =
+ unsigned local_workgroup_size =
cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
cfg_t *cfg = NULL;
@@ -182,7 +182,8 @@ brw_codegen_cs_prog(struct brw_context *brw,
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
- int param_count = cs->num_uniform_components;
+ int param_count = cs->num_uniform_components +
+ cs->NumImages * BRW_IMAGE_PARAM_SIZE;
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@ -190,7 +191,10 @@ brw_codegen_cs_prog(struct brw_context *brw,
rzalloc_array(NULL, const gl_constant_value *, param_count);
prog_data.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
prog_data.base.nr_params = param_count;
+ prog_data.base.nr_image_params = cs->NumImages;
program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
&cp->program, prog, &program_size);
@@ -291,6 +295,17 @@ brw_cs_precompile(struct gl_context *ctx,
}
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+ const unsigned simd_size = cs_prog_data->simd_size;
+ unsigned group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+ return (group_size + simd_size - 1) / simd_size;
+}
+
+
static void
brw_upload_cs_state(struct brw_context *brw)
{
@@ -316,6 +331,8 @@ brw_upload_cs_state(struct brw_context *brw)
prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
+ unsigned threads = get_cs_thread_count(cs_prog_data);
+
uint32_t dwords = brw->gen < 8 ? 8 : 9;
BEGIN_BATCH(dwords);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
@@ -365,6 +382,13 @@ brw_upload_cs_state(struct brw_context *brw)
desc[dw++] = 0;
desc[dw++] = 0;
desc[dw++] = stage_state->bind_bo_offset;
+ desc[dw++] = 0;
+ const uint32_t media_threads =
+ brw->gen >= 8 ?
+ SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+ SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+ assert(threads <= brw->max_cs_threads);
+ desc[dw++] = media_threads;
BEGIN_BATCH(4);
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index befd7a9538c..a149ce3ba12 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -176,7 +176,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw)
ADVANCE_BATCH();
}
-static GLfloat fixed_plane[6][4] = {
+static const GLfloat fixed_plane[6][4] = {
{ 0, 0, -1, 1 },
{ 0, 0, 1, 1 },
{ 0, -1, 0, 1 },
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index c113d52a3d3..3bbaf977bc5 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -877,6 +877,21 @@ enum opcode {
* instructions.
*/
FS_OPCODE_FB_WRITE = 128,
+
+ /**
+ * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
+ * individual sources instead of as a single payload blob:
+ *
+ * Source 0: [required] Color 0.
+ * Source 1: [optional] Color 1 (for dual source blend messages).
+ * Source 2: [optional] Src0 Alpha.
+ * Source 3: [optional] Source Depth (passthrough from the thread payload).
+ * Source 4: [optional] Destination Depth (gl_FragDepth).
+ * Source 5: [optional] Sample Mask (gl_SampleMask).
+ * Source 6: [required] Number of color components (as a UD immediate).
+ */
+ FS_OPCODE_FB_WRITE_LOGICAL,
+
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
SHADER_OPCODE_RCP,
@@ -890,18 +905,49 @@ enum opcode {
SHADER_OPCODE_SIN,
SHADER_OPCODE_COS,
+ /**
+ * Texture sampling opcodes.
+ *
+ * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
+ * opcode but instead of taking a single payload blob they expect their
+ * arguments separately as individual sources:
+ *
+ * Source 0: [optional] Texture coordinates.
+ * Source 1: [optional] Shadow comparitor.
+ * Source 2: [optional] dPdx if the operation takes explicit derivatives,
+ * otherwise LOD value.
+ * Source 3: [optional] dPdy if the operation takes explicit derivatives.
+ * Source 4: [optional] Sample index.
+ * Source 5: [optional] MCS data.
+ * Source 6: [required] Texture sampler.
+ * Source 7: [optional] Texel offset.
+ * Source 8: [required] Number of coordinate components (as UD immediate).
+ * Source 9: [required] Number derivative components (as UD immediate).
+ */
SHADER_OPCODE_TEX,
+ SHADER_OPCODE_TEX_LOGICAL,
SHADER_OPCODE_TXD,
+ SHADER_OPCODE_TXD_LOGICAL,
SHADER_OPCODE_TXF,
+ SHADER_OPCODE_TXF_LOGICAL,
SHADER_OPCODE_TXL,
+ SHADER_OPCODE_TXL_LOGICAL,
SHADER_OPCODE_TXS,
+ SHADER_OPCODE_TXS_LOGICAL,
FS_OPCODE_TXB,
+ FS_OPCODE_TXB_LOGICAL,
SHADER_OPCODE_TXF_CMS,
+ SHADER_OPCODE_TXF_CMS_LOGICAL,
SHADER_OPCODE_TXF_UMS,
+ SHADER_OPCODE_TXF_UMS_LOGICAL,
SHADER_OPCODE_TXF_MCS,
+ SHADER_OPCODE_TXF_MCS_LOGICAL,
SHADER_OPCODE_LOD,
+ SHADER_OPCODE_LOD_LOGICAL,
SHADER_OPCODE_TG4,
+ SHADER_OPCODE_TG4_LOGICAL,
SHADER_OPCODE_TG4_OFFSET,
+ SHADER_OPCODE_TG4_OFFSET_LOGICAL,
/**
* Combines multiple sources of size 1 into a larger virtual GRF.
@@ -919,13 +965,33 @@ enum opcode {
SHADER_OPCODE_SHADER_TIME_ADD,
+ /**
+ * Typed and untyped surface access opcodes.
+ *
+ * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
+ * opcode but instead of taking a single payload blob they expect their
+ * arguments separately as individual sources:
+ *
+ * Source 0: [required] Surface coordinates.
+ * Source 1: [optional] Operation source.
+ * Source 2: [required] Surface index.
+ * Source 3: [required] Number of coordinate components (as UD immediate).
+ * Source 4: [required] Opcode-specific control immediate, same as source 2
+ * of the matching non-LOGICAL opcode.
+ */
SHADER_OPCODE_UNTYPED_ATOMIC,
+ SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
+ SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+ SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
SHADER_OPCODE_TYPED_ATOMIC,
+ SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
SHADER_OPCODE_TYPED_SURFACE_READ,
+ SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
SHADER_OPCODE_TYPED_SURFACE_WRITE,
+ SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
SHADER_OPCODE_MEMORY_FENCE,
@@ -971,7 +1037,6 @@ enum opcode {
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
- FS_OPCODE_SET_OMASK,
FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_SET_SIMD4X2_OFFSET,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
@@ -1151,6 +1216,11 @@ enum opcode {
* GLSL barrier()
*/
SHADER_OPCODE_BARRIER,
+
+ /**
+ * Calculate the high 32-bits of a 32x32 multiply.
+ */
+ SHADER_OPCODE_MULH,
};
enum brw_urb_write_flags {
@@ -1642,6 +1712,36 @@ enum brw_message_target {
#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POOL_ALLOC 0x7919 /* GEN7.5+ */
+#define BRW_HW_BINDING_TABLE_ENABLE (1 << 11)
+#define GEN7_HW_BT_POOL_MOCS_SHIFT 7
+#define GEN7_HW_BT_POOL_MOCS_MASK INTEL_MASK(10, 7)
+#define GEN8_HW_BT_POOL_MOCS_SHIFT 0
+#define GEN8_HW_BT_POOL_MOCS_MASK INTEL_MASK(6, 0)
+/* Only required in HSW */
+#define HSW_BT_POOL_ALLOC_MUST_BE_ONE (3 << 5)
+
+#define _3DSTATE_BINDING_TABLE_EDIT_VS 0x7843 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_GS 0x7844 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_HS 0x7845 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_DS 0x7846 /* GEN7.5 */
+#define _3DSTATE_BINDING_TABLE_EDIT_PS 0x7847 /* GEN7.5 */
+#define BRW_BINDING_TABLE_INDEX_SHIFT 16
+#define BRW_BINDING_TABLE_INDEX_MASK INTEL_MASK(23, 16)
+
+#define BRW_BINDING_TABLE_EDIT_TARGET_ALL 3
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE1 2
+#define BRW_BINDING_TABLE_EDIT_TARGET_CORE0 1
+/* In HSW, when editing binding table entries to surface state offsets,
+ * the surface state offset is a 16-bit value aligned to 32 bytes. But
+ * Surface State Pointer in dword 2 is [15:0]. Right shift surf_offset
+ * by 5 bits so it won't disturb bit 16 (which is used as the binding
+ * table index entry), otherwise it would hang the GPU.
+ */
+#define HSW_SURFACE_STATE_EDIT(value) (value >> 5)
+/* Same as Haswell, but surface state offsets now aligned to 64 bytes.*/
+#define GEN8_SURFACE_STATE_EDIT(value) (value >> 6)
+
#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
# define GS_SAMPLER_STATE_CHANGE (1 << 9)
@@ -1757,6 +1857,7 @@ enum brw_message_target {
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
+# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12)
/* DW4 */
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -1782,6 +1883,7 @@ enum brw_message_target {
# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
+# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12)
/* DW4 */
# define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23
# define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17
@@ -2147,6 +2249,7 @@ enum brw_pixel_shader_computed_depth_mode {
# define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE (1 << 7)
# define GEN8_PSX_SHADER_IS_PER_SAMPLE (1 << 6)
# define GEN8_PSX_SHADER_COMPUTES_STENCIL (1 << 5)
+# define GEN9_PSX_SHADER_PULLS_BARY (1 << 3)
# define GEN8_PSX_SHADER_HAS_UAV (1 << 2)
# define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK (1 << 1)
@@ -2283,6 +2386,9 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_WM_KILL_ENABLE (1 << 25)
# define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT 23
# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN7_WM_EARLY_DS_CONTROL_NORMAL (0 << 21)
+# define GEN7_WM_EARLY_DS_CONTROL_PSEXEC (1 << 21)
+# define GEN7_WM_EARLY_DS_CONTROL_PREPS (2 << 21)
# define GEN7_WM_USES_SOURCE_W (1 << 19)
# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
@@ -2307,6 +2413,7 @@ enum brw_wm_barycentric_interp_mode {
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
+# define HSW_WM_UAV_ONLY (1 << 30)
#define _3DSTATE_PS 0x7820 /* GEN7+ */
/* DW1: kernel pointer */
@@ -2330,6 +2437,7 @@ enum brw_wm_barycentric_interp_mode {
# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6)
+# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
@@ -2493,12 +2601,13 @@ enum brw_wm_barycentric_interp_mode {
#define BDW_MOCS_WT 0x58
#define BDW_MOCS_PTE 0x18
-/* Skylake: MOCS is now an index into an array of 64 different configurable
- * cache settings. We still use only either write-back or write-through; and
- * rely on the documented default values.
+/* Skylake: MOCS is now an index into an array of 62 different caching
+ * configurations programmed by the kernel.
*/
-#define SKL_MOCS_WB (0b001001 << 1)
-#define SKL_MOCS_WT (0b000101 << 1)
+/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
+#define SKL_MOCS_WB (2 << 1)
+/* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
+#define SKL_MOCS_PTE (1 << 1)
#define MEDIA_VFE_STATE 0x7000
/* GEN7 DW2, GEN8+ DW3 */
@@ -2519,6 +2628,11 @@ enum brw_wm_barycentric_interp_mode {
# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+/* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
+# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_WALKER 0x7105
/* GEN8+ DW2 */
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index a07b86e60e2..16c125d07ee 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -170,7 +170,8 @@ static const struct brw_device_info brw_device_info_byt = {
#define HSW_FEATURES \
GEN7_FEATURES, \
.is_haswell = true, \
- .supports_simd16_3src = true
+ .supports_simd16_3src = true, \
+ .has_resource_streamer = true
static const struct brw_device_info brw_device_info_hsw_gt1 = {
HSW_FEATURES, .gt = 1,
@@ -229,6 +230,7 @@ static const struct brw_device_info brw_device_info_hsw_gt3 = {
#define GEN8_FEATURES \
.gen = 8, \
.has_hiz_and_separate_stencil = true, \
+ .has_resource_streamer = true, \
.must_use_separate_stencil = true, \
.has_llc = true, \
.has_pln = true, \
@@ -297,41 +299,62 @@ static const struct brw_device_info brw_device_info_chv = {
}
};
-/* Thread counts and URB limits are placeholders, and may not be accurate. */
#define GEN9_FEATURES \
.gen = 9, \
.has_hiz_and_separate_stencil = true, \
+ .has_resource_streamer = true, \
.must_use_separate_stencil = true, \
.has_llc = true, \
.has_pln = true, \
- .max_vs_threads = 280, \
- .max_gs_threads = 256, \
- .max_wm_threads = 408, \
+ .supports_simd16_3src = true, \
+ .max_vs_threads = 336, \
+ .max_gs_threads = 336, \
+ .max_hs_threads = 336, \
+ .max_ds_threads = 336, \
+ .max_wm_threads = 64 * 6, \
+ .max_cs_threads = 56, \
.urb = { \
- .size = 128, \
+ .size = 192, \
.min_vs_entries = 64, \
- .max_vs_entries = 1664, \
+ .max_vs_entries = 1856, \
+ .max_hs_entries = 672, \
+ .max_ds_entries = 1120, \
.max_gs_entries = 640, \
}
-static const struct brw_device_info brw_device_info_skl_early = {
- GEN9_FEATURES, .gt = 1,
- .supports_simd16_3src = false,
-};
-
static const struct brw_device_info brw_device_info_skl_gt1 = {
GEN9_FEATURES, .gt = 1,
- .supports_simd16_3src = true,
};
static const struct brw_device_info brw_device_info_skl_gt2 = {
GEN9_FEATURES, .gt = 2,
- .supports_simd16_3src = true,
};
static const struct brw_device_info brw_device_info_skl_gt3 = {
GEN9_FEATURES, .gt = 3,
- .supports_simd16_3src = true,
+};
+
+static const struct brw_device_info brw_device_info_bxt = {
+ GEN9_FEATURES,
+ .is_broxton = 1,
+ .gt = 1,
+ .has_llc = false,
+
+ /* XXX: These are preliminary thread counts and URB sizes. */
+ .max_vs_threads = 56,
+ .max_hs_threads = 56,
+ .max_ds_threads = 56,
+ .max_gs_threads = 56,
+ .max_wm_threads = 32,
+ .max_cs_threads = 28,
+ .urb = {
+ .size = 64,
+ .min_vs_entries = 34,
+ .max_vs_entries = 640,
+ .max_hs_entries = 80,
+ .max_ds_entries = 80,
+ .max_gs_entries = 256,
+ }
};
const struct brw_device_info *
@@ -348,9 +371,6 @@ brw_get_device_info(int devid, int revision)
return NULL;
}
- if (devinfo->gen == 9 && (revision == 2 || revision == 3 || revision == -1))
- return &brw_device_info_skl_early;
-
return devinfo;
}
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 9192235fb0e..7bab5716b43 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -35,6 +35,7 @@ struct brw_device_info
bool is_baytrail;
bool is_haswell;
bool is_cherryview;
+ bool is_broxton;
bool has_hiz_and_separate_stencil;
bool must_use_separate_stencil;
@@ -45,6 +46,7 @@ struct brw_device_info
bool has_compr4;
bool has_surface_tile_offset;
bool supports_simd16_3src;
+ bool has_resource_streamer;
/**
* Quirks:
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index b91597a9f5d..e092ef4a7c6 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -104,13 +104,13 @@ get_hw_prim_for_gl_prim(int mode)
* programs be immune to the active primitive (ie. cope with all
* possibilities). That may not be realistic however.
*/
-static void brw_set_prim(struct brw_context *brw,
- const struct _mesa_prim *prim)
+static void
+brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
{
struct gl_context *ctx = &brw->ctx;
uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
- DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+ DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
/* Slight optimization to avoid the GS program when not needed:
*/
@@ -138,15 +138,12 @@ static void brw_set_prim(struct brw_context *brw,
}
}
-static void gen6_set_prim(struct brw_context *brw,
- const struct _mesa_prim *prim)
+static void
+gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
{
- uint32_t hw_prim;
-
- DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
-
- hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+ DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
+ const uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
if (hw_prim != brw->primitive) {
brw->primitive = hw_prim;
brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;
@@ -162,7 +159,8 @@ static void gen6_set_prim(struct brw_context *brw,
* quads so that those dangling vertices won't get drawn when we convert to
* trifans/tristrips.
*/
-static GLuint trim(GLenum prim, GLuint length)
+static GLuint
+trim(GLenum prim, GLuint length)
{
if (prim == GL_QUAD_STRIP)
return length > 3 ? (length - length % 2) : 0;
@@ -173,16 +171,16 @@ static GLuint trim(GLenum prim, GLuint length)
}
-static void brw_emit_prim(struct brw_context *brw,
- const struct _mesa_prim *prim,
- uint32_t hw_prim)
+static void
+brw_emit_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ uint32_t hw_prim)
{
int verts_per_instance;
int vertex_access_type;
int indirect_flag;
- int predicate_enable;
- DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+ DBG("PRIM: %s %d %d\n", _mesa_enum_to_string(prim->mode),
prim->start, prim->count);
int start_vertex_location = prim->start;
@@ -216,9 +214,8 @@ static void brw_emit_prim(struct brw_context *brw,
* and missed flushes of the render cache as it heads to other parts of
* the besides the draw code.
*/
- if (brw->always_flush_cache) {
- intel_batchbuffer_emit_mi_flush(brw);
- }
+ if (brw->always_flush_cache)
+ brw_emit_mi_flush(brw);
/* If indirect, emit a bunch of loads from the indirect BO. */
if (prim->is_indirect) {
@@ -256,22 +253,20 @@ static void brw_emit_prim(struct brw_context *brw,
OUT_BATCH(0);
ADVANCE_BATCH();
}
- }
- else {
+ } else {
indirect_flag = 0;
}
+ BEGIN_BATCH(brw->gen >= 7 ? 7 : 6);
+
if (brw->gen >= 7) {
- if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
- predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE;
- else
- predicate_enable = 0;
+ const int predicate_enable =
+ (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
+ ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;
- BEGIN_BATCH(7);
OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable);
OUT_BATCH(hw_prim | vertex_access_type);
} else {
- BEGIN_BATCH(6);
OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
vertex_access_type);
@@ -283,14 +278,14 @@ static void brw_emit_prim(struct brw_context *brw,
OUT_BATCH(base_vertex_location);
ADVANCE_BATCH();
- if (brw->always_flush_cache) {
- intel_batchbuffer_emit_mi_flush(brw);
- }
+ if (brw->always_flush_cache)
+ brw_emit_mi_flush(brw);
}
-static void brw_merge_inputs( struct brw_context *brw,
- const struct gl_client_array *arrays[])
+static void
+brw_merge_inputs(struct brw_context *brw,
+ const struct gl_client_array *arrays[])
{
const struct gl_context *ctx = &brw->ctx;
GLuint i;
@@ -359,7 +354,8 @@ static void brw_merge_inputs( struct brw_context *brw,
* Also mark any render targets which will be textured as needing a render
* cache flush.
*/
-static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
+static void
+brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -399,21 +395,22 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
/* May fail if out of video memory for texture or vbo upload, or on
* fallback conditions.
*/
-static void brw_try_draw_prims( struct gl_context *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint min_index,
- GLuint max_index,
- struct gl_buffer_object *indirect)
+static void
+brw_try_draw_prims(struct gl_context *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_buffer_object *indirect)
{
struct brw_context *brw = brw_context(ctx);
GLuint i;
bool fail_next = false;
if (ctx->NewState)
- _mesa_update_state( ctx );
+ _mesa_update_state(ctx);
/* Find the highest sampler unit used by each shader program. A bit-count
* won't work since ARB programs use the texture unit number as the sampler
@@ -433,7 +430,7 @@ static void brw_try_draw_prims( struct gl_context *ctx,
* software fallback will segfault if it attempts to access any
* texture level other than level 0.
*/
- brw_validate_textures( brw );
+ brw_validate_textures(brw);
intel_prepare_render(brw);
@@ -445,7 +442,7 @@ static void brw_try_draw_prims( struct gl_context *ctx,
/* Bind all inputs, derive varying and size information:
*/
- brw_merge_inputs( brw, arrays );
+ brw_merge_inputs(brw, arrays);
brw->ib.ib = ib;
brw->ctx.NewDriverState |= BRW_NEW_INDICES;
@@ -553,15 +550,17 @@ retry:
return;
}
-void brw_draw_prims( struct gl_context *ctx,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index,
- struct gl_transform_feedback_object *unused_tfb_object,
- struct gl_buffer_object *indirect )
+void
+brw_draw_prims(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index,
+ struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
+ struct gl_buffer_object *indirect)
{
struct brw_context *brw = brw_context(ctx);
const struct gl_client_array **arrays = ctx->Array._DrawArrays;
@@ -582,11 +581,11 @@ void brw_draw_prims( struct gl_context *ctx,
*/
if (ctx->RenderMode != GL_RENDER) {
perf_debug("%s render mode not supported in hardware\n",
- _mesa_lookup_enum_by_nr(ctx->RenderMode));
+ _mesa_enum_to_string(ctx->RenderMode));
_swsetup_Wakeup(ctx);
_tnl_wakeup(ctx);
_tnl_draw_prims(ctx, prims, nr_prims, ib,
- index_bounds_valid, min_index, max_index, NULL, NULL);
+ index_bounds_valid, min_index, max_index, NULL, 0, NULL);
return;
}
@@ -604,26 +603,28 @@ void brw_draw_prims( struct gl_context *ctx,
* manage it. swrast doesn't support our featureset, so we can't fall back
* to it.
*/
- brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, indirect);
+ brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
+ indirect);
}
-void brw_draw_init( struct brw_context *brw )
+void
+brw_draw_init(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
struct vbo_context *vbo = vbo_context(ctx);
- int i;
/* Register our drawing function:
*/
vbo->draw_prims = brw_draw_prims;
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
+ for (int i = 0; i < VERT_ATTRIB_MAX; i++)
brw->vb.inputs[i].buffer = -1;
brw->vb.nr_buffers = 0;
brw->vb.nr_enabled = 0;
}
-void brw_draw_destroy( struct brw_context *brw )
+void
+brw_draw_destroy(struct brw_context *brw)
{
int i;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
index fc83dcdd0bb..f994726f5b6 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -34,7 +34,7 @@
struct brw_context;
-void brw_draw_prims( struct gl_context *ctx,
+void brw_draw_prims(struct gl_context *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -42,6 +42,7 @@ void brw_draw_prims( struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *unused_tfb_object,
+ unsigned stream,
struct gl_buffer_object *indirect );
void brw_draw_init( struct brw_context *brw );
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 320e40e1007..cbfd5855410 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -40,7 +40,7 @@
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
-static GLuint double_types[5] = {
+static const GLuint double_types[5] = {
0,
BRW_SURFACEFORMAT_R64_FLOAT,
BRW_SURFACEFORMAT_R64G64_FLOAT,
@@ -48,7 +48,7 @@ static GLuint double_types[5] = {
BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
};
-static GLuint float_types[5] = {
+static const GLuint float_types[5] = {
0,
BRW_SURFACEFORMAT_R32_FLOAT,
BRW_SURFACEFORMAT_R32G32_FLOAT,
@@ -56,7 +56,7 @@ static GLuint float_types[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
};
-static GLuint half_float_types[5] = {
+static const GLuint half_float_types[5] = {
0,
BRW_SURFACEFORMAT_R16_FLOAT,
BRW_SURFACEFORMAT_R16G16_FLOAT,
@@ -64,7 +64,7 @@ static GLuint half_float_types[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
};
-static GLuint fixed_point_types[5] = {
+static const GLuint fixed_point_types[5] = {
0,
BRW_SURFACEFORMAT_R32_SFIXED,
BRW_SURFACEFORMAT_R32G32_SFIXED,
@@ -72,7 +72,7 @@ static GLuint fixed_point_types[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
};
-static GLuint uint_types_direct[5] = {
+static const GLuint uint_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R32_UINT,
BRW_SURFACEFORMAT_R32G32_UINT,
@@ -80,7 +80,7 @@ static GLuint uint_types_direct[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_UINT
};
-static GLuint uint_types_norm[5] = {
+static const GLuint uint_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R32_UNORM,
BRW_SURFACEFORMAT_R32G32_UNORM,
@@ -88,7 +88,7 @@ static GLuint uint_types_norm[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_UNORM
};
-static GLuint uint_types_scale[5] = {
+static const GLuint uint_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R32_USCALED,
BRW_SURFACEFORMAT_R32G32_USCALED,
@@ -96,7 +96,7 @@ static GLuint uint_types_scale[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_USCALED
};
-static GLuint int_types_direct[5] = {
+static const GLuint int_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R32_SINT,
BRW_SURFACEFORMAT_R32G32_SINT,
@@ -104,7 +104,7 @@ static GLuint int_types_direct[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_SINT
};
-static GLuint int_types_norm[5] = {
+static const GLuint int_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R32_SNORM,
BRW_SURFACEFORMAT_R32G32_SNORM,
@@ -112,7 +112,7 @@ static GLuint int_types_norm[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_SNORM
};
-static GLuint int_types_scale[5] = {
+static const GLuint int_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R32_SSCALED,
BRW_SURFACEFORMAT_R32G32_SSCALED,
@@ -120,7 +120,7 @@ static GLuint int_types_scale[5] = {
BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
};
-static GLuint ushort_types_direct[5] = {
+static const GLuint ushort_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R16_UINT,
BRW_SURFACEFORMAT_R16G16_UINT,
@@ -128,7 +128,7 @@ static GLuint ushort_types_direct[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_UINT
};
-static GLuint ushort_types_norm[5] = {
+static const GLuint ushort_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R16_UNORM,
BRW_SURFACEFORMAT_R16G16_UNORM,
@@ -136,7 +136,7 @@ static GLuint ushort_types_norm[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_UNORM
};
-static GLuint ushort_types_scale[5] = {
+static const GLuint ushort_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R16_USCALED,
BRW_SURFACEFORMAT_R16G16_USCALED,
@@ -144,7 +144,7 @@ static GLuint ushort_types_scale[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_USCALED
};
-static GLuint short_types_direct[5] = {
+static const GLuint short_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R16_SINT,
BRW_SURFACEFORMAT_R16G16_SINT,
@@ -152,7 +152,7 @@ static GLuint short_types_direct[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_SINT
};
-static GLuint short_types_norm[5] = {
+static const GLuint short_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R16_SNORM,
BRW_SURFACEFORMAT_R16G16_SNORM,
@@ -160,7 +160,7 @@ static GLuint short_types_norm[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_SNORM
};
-static GLuint short_types_scale[5] = {
+static const GLuint short_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R16_SSCALED,
BRW_SURFACEFORMAT_R16G16_SSCALED,
@@ -168,7 +168,7 @@ static GLuint short_types_scale[5] = {
BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
};
-static GLuint ubyte_types_direct[5] = {
+static const GLuint ubyte_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R8_UINT,
BRW_SURFACEFORMAT_R8G8_UINT,
@@ -176,7 +176,7 @@ static GLuint ubyte_types_direct[5] = {
BRW_SURFACEFORMAT_R8G8B8A8_UINT
};
-static GLuint ubyte_types_norm[5] = {
+static const GLuint ubyte_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R8_UNORM,
BRW_SURFACEFORMAT_R8G8_UNORM,
@@ -184,7 +184,7 @@ static GLuint ubyte_types_norm[5] = {
BRW_SURFACEFORMAT_R8G8B8A8_UNORM
};
-static GLuint ubyte_types_scale[5] = {
+static const GLuint ubyte_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R8_USCALED,
BRW_SURFACEFORMAT_R8G8_USCALED,
@@ -192,7 +192,7 @@ static GLuint ubyte_types_scale[5] = {
BRW_SURFACEFORMAT_R8G8B8A8_USCALED
};
-static GLuint byte_types_direct[5] = {
+static const GLuint byte_types_direct[5] = {
0,
BRW_SURFACEFORMAT_R8_SINT,
BRW_SURFACEFORMAT_R8G8_SINT,
@@ -200,7 +200,7 @@ static GLuint byte_types_direct[5] = {
BRW_SURFACEFORMAT_R8G8B8A8_SINT
};
-static GLuint byte_types_norm[5] = {
+static const GLuint byte_types_norm[5] = {
0,
BRW_SURFACEFORMAT_R8_SNORM,
BRW_SURFACEFORMAT_R8G8_SNORM,
@@ -208,7 +208,7 @@ static GLuint byte_types_norm[5] = {
BRW_SURFACEFORMAT_R8G8B8A8_SNORM
};
-static GLuint byte_types_scale[5] = {
+static const GLuint byte_types_scale[5] = {
0,
BRW_SURFACEFORMAT_R8_SSCALED,
BRW_SURFACEFORMAT_R8G8_SSCALED,
@@ -230,7 +230,7 @@ brw_get_vertex_surface_type(struct brw_context *brw,
if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
fprintf(stderr, "type %s size %d normalized %d\n",
- _mesa_lookup_enum_by_nr(glarray->Type),
+ _mesa_enum_to_string(glarray->Type),
glarray->Size, glarray->Normalized);
if (glarray->Integer) {
@@ -604,14 +604,15 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw)
/**
* Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
*/
-static void
+static uint32_t *
emit_vertex_buffer_state(struct brw_context *brw,
unsigned buffer_nr,
drm_intel_bo *bo,
unsigned bo_ending_address,
unsigned bo_offset,
unsigned stride,
- unsigned step_rate)
+ unsigned step_rate,
+ uint32_t *__map)
{
struct gl_context *ctx = &brw->ctx;
uint32_t dw0;
@@ -643,9 +644,13 @@ emit_vertex_buffer_state(struct brw_context *brw,
OUT_BATCH(0);
}
OUT_BATCH(step_rate);
+
+ return __map;
}
+#define EMIT_VERTEX_BUFFER_STATE(...) __map = emit_vertex_buffer_state(__VA_ARGS__, __map)
-static void brw_emit_vertices(struct brw_context *brw)
+static void
+brw_emit_vertices(struct brw_context *brw)
{
GLuint i;
@@ -704,14 +709,14 @@ static void brw_emit_vertices(struct brw_context *brw)
OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
for (i = 0; i < brw->vb.nr_buffers; i++) {
struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
- emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1,
+ EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1,
buffer->offset, buffer->stride,
buffer->step_rate);
}
if (brw->vs.prog_data->uses_vertexid) {
- emit_vertex_buffer_state(brw, brw->vb.nr_buffers,
+ EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
brw->draw.draw_params_bo,
brw->draw.draw_params_bo->size - 1,
brw->draw.draw_params_offset,
@@ -855,7 +860,8 @@ const struct brw_tracked_state brw_vertices = {
.emit = brw_emit_vertices,
};
-static void brw_upload_indices(struct brw_context *brw)
+static void
+brw_upload_indices(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
@@ -935,7 +941,8 @@ const struct brw_tracked_state brw_indices = {
.emit = brw_upload_indices,
};
-static void brw_emit_index_buffer(struct brw_context *brw)
+static void
+brw_emit_index_buffer(struct brw_context *brw)
{
const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
GLuint cut_index_setting;
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0f536046f6f..4d397622fc1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1584,8 +1584,8 @@ brw_ENDIF(struct brw_codegen *p)
}
if (devinfo->gen < 6) {
- brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (devinfo->gen == 6) {
brw_set_dest(p, insn, brw_imm_w(0));
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8984b4cb3ca..0e091ddc227 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -68,28 +68,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
assert(dst.file != IMM && dst.file != UNIFORM);
- /* If exec_size == 0, try to guess it from the registers. Since all
- * manner of things may use hardware registers, we first try to guess
- * based on GRF registers. If this fails, we will go ahead and take the
- * width from the destination register.
- */
- if (this->exec_size == 0) {
- if (dst.file == GRF) {
- this->exec_size = dst.width;
- } else {
- for (unsigned i = 0; i < sources; ++i) {
- if (src[i].file != GRF && src[i].file != ATTR)
- continue;
-
- if (this->exec_size <= 1)
- this->exec_size = src[i].width;
- assert(src[i].width == 1 || src[i].width == this->exec_size);
- }
- }
-
- if (this->exec_size == 0 && dst.file != BAD_FILE)
- this->exec_size = dst.width;
- }
assert(this->exec_size != 0);
this->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -100,8 +78,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case HW_REG:
case MRF:
case ATTR:
- this->regs_written =
- DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32);
+ this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
+ REG_SIZE);
break;
case BAD_FILE:
this->regs_written = 0;
@@ -126,9 +104,9 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)
init(opcode, exec_size, reg_undef, NULL, 0);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst)
+fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst)
{
- init(opcode, 0, dst, NULL, 0);
+ init(opcode, exec_size, dst, NULL, 0);
}
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
@@ -138,12 +116,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
init(opcode, exec_size, dst, src, 1);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
- const fs_reg src[1] = { src0 };
- init(opcode, 0, dst, src, 1);
-}
-
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1)
{
@@ -151,13 +123,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
init(opcode, exec_size, dst, src, 2);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1)
-{
- const fs_reg src[2] = { src0, src1 };
- init(opcode, 0, dst, src, 2);
-}
-
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2)
{
@@ -165,19 +130,6 @@ fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
init(opcode, exec_size, dst, src, 3);
}
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2)
-{
- const fs_reg src[3] = { src0, src1, src2 };
- init(opcode, 0, dst, src, 3);
-}
-
-fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst,
- const fs_reg src[], unsigned sources)
-{
- init(opcode, 0, dst, src, sources);
-}
-
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
const fs_reg src[], unsigned sources)
{
@@ -236,7 +188,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
int scale = 1;
- if (devinfo->gen == 4 && dst.width == 8) {
+ if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
/* Pre-gen5, we can either use a SIMD8 message that requires (header,
* u, v, r) as parameters, or we can just use the SIMD16 message
* consisting of (header, u). We choose the second, at the cost of a
@@ -251,10 +203,8 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
else
op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
- assert(dst.width % 8 == 0);
- int regs_written = 4 * (dst.width / 8) * scale;
- fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
- dst.type, dst.width);
+ int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
+ fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type);
fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
@@ -264,10 +214,10 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
if (devinfo->gen == 4)
inst->mlen = 3;
else
- inst->mlen = 1 + dispatch_width / 8;
+ inst->mlen = 1 + bld.dispatch_width() / 8;
}
- bld.MOV(dst, offset(vec4_result, (const_offset & 3) * scale));
+ bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
}
/**
@@ -358,10 +308,14 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
for (int i = 0; i < this->sources; i++) {
reg.type = this->src[i].type;
- reg.width = this->src[i].width;
if (!this->src[i].equals(reg))
return false;
- reg = ::offset(reg, 1);
+
+ if (i < this->header_size) {
+ reg.reg_offset += 1;
+ } else {
+ reg.reg_offset += this->exec_size / 8;
+ }
}
return true;
@@ -408,8 +362,8 @@ fs_reg::fs_reg(float f)
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_F;
+ this->stride = 0;
this->fixed_hw_reg.dw1.f = f;
- this->width = 1;
}
/** Immediate value constructor. */
@@ -418,8 +372,8 @@ fs_reg::fs_reg(int32_t i)
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_D;
+ this->stride = 0;
this->fixed_hw_reg.dw1.d = i;
- this->width = 1;
}
/** Immediate value constructor. */
@@ -428,8 +382,8 @@ fs_reg::fs_reg(uint32_t u)
init();
this->file = IMM;
this->type = BRW_REGISTER_TYPE_UD;
+ this->stride = 0;
this->fixed_hw_reg.dw1.ud = u;
- this->width = 1;
}
/** Vector float immediate value constructor. */
@@ -460,7 +414,6 @@ fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
this->file = HW_REG;
this->fixed_hw_reg = fixed_hw_reg;
this->type = fixed_hw_reg.type;
- this->width = 1 << fixed_hw_reg.width;
}
bool
@@ -475,7 +428,6 @@ fs_reg::equals(const fs_reg &r) const
abs == r.abs &&
!reladdr && !r.reladdr &&
memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 &&
- width == r.width &&
stride == r.stride);
}
@@ -494,6 +446,15 @@ fs_reg::is_contiguous() const
return stride == 1;
}
+unsigned
+fs_reg::component_size(unsigned width) const
+{
+ const unsigned stride = (file != HW_REG ? this->stride :
+ fixed_hw_reg.hstride == 0 ? 0 :
+ 1 << (fixed_hw_reg.hstride - 1));
+ return MAX2(width * stride, 1) * type_sz(type);
+}
+
int
fs_visitor::type_size(const struct glsl_type *type)
{
@@ -520,7 +481,10 @@ fs_visitor::type_size(const struct glsl_type *type)
return 0;
case GLSL_TYPE_ATOMIC_UINT:
return 0;
+ case GLSL_TYPE_SUBROUTINE:
+ return 1;
case GLSL_TYPE_IMAGE:
+ return BRW_IMAGE_PARAM_SIZE;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
@@ -548,12 +512,12 @@ fs_visitor::get_timestamp(const fs_builder &bld)
0),
BRW_REGISTER_TYPE_UD));
- fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
+ fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
/* We want to read the 3 fields we care about even if it's not enabled in
* the dispatch.
*/
- bld.exec_all().MOV(dst, ts);
+ bld.group(4, 0).exec_all().MOV(dst, ts);
/* The caller wants the low 32 bits of the timestamp. Since it's running
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
@@ -598,19 +562,21 @@ fs_visitor::emit_shader_time_end()
fs_reg start = shader_start_time;
start.negate = true;
- fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
+ fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
diff.set_smear(0);
- ibld.ADD(diff, start, shader_end_time);
+
+ const fs_builder cbld = ibld.group(1, 0);
+ cbld.group(1, 0).ADD(diff, start, shader_end_time);
/* If there were no instructions between the two timestamp gets, the diff
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
- ibld.ADD(diff, diff, fs_reg(-2u));
- SHADER_TIME_ADD(ibld, 0, diff);
- SHADER_TIME_ADD(ibld, 1, fs_reg(1u));
+ cbld.ADD(diff, diff, fs_reg(-2u));
+ SHADER_TIME_ADD(cbld, 0, diff);
+ SHADER_TIME_ADD(cbld, 1, fs_reg(1u));
ibld.emit(BRW_OPCODE_ELSE);
- SHADER_TIME_ADD(ibld, 2, fs_reg(1u));
+ SHADER_TIME_ADD(cbld, 2, fs_reg(1u));
ibld.emit(BRW_OPCODE_ENDIF);
}
@@ -695,50 +661,160 @@ bool
fs_inst::is_partial_write() const
{
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
- (this->dst.width * type_sz(this->dst.type)) < 32 ||
+ (this->exec_size * type_sz(this->dst.type)) < 32 ||
!this->dst.is_contiguous());
}
+unsigned
+fs_inst::components_read(unsigned i) const
+{
+ switch (opcode) {
+ case FS_OPCODE_LINTERP:
+ if (i == 0)
+ return 2;
+ else
+ return 1;
+
+ case FS_OPCODE_PIXEL_X:
+ case FS_OPCODE_PIXEL_Y:
+ assert(i == 0);
+ return 2;
+
+ case FS_OPCODE_FB_WRITE_LOGICAL:
+ assert(src[6].file == IMM);
+ /* First/second FB write color. */
+ if (i < 2)
+ return src[6].fixed_hw_reg.dw1.ud;
+ else
+ return 1;
+
+ case SHADER_OPCODE_TEX_LOGICAL:
+ case SHADER_OPCODE_TXD_LOGICAL:
+ case SHADER_OPCODE_TXF_LOGICAL:
+ case SHADER_OPCODE_TXL_LOGICAL:
+ case SHADER_OPCODE_TXS_LOGICAL:
+ case FS_OPCODE_TXB_LOGICAL:
+ case SHADER_OPCODE_TXF_CMS_LOGICAL:
+ case SHADER_OPCODE_TXF_UMS_LOGICAL:
+ case SHADER_OPCODE_TXF_MCS_LOGICAL:
+ case SHADER_OPCODE_LOD_LOGICAL:
+ case SHADER_OPCODE_TG4_LOGICAL:
+ case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+ assert(src[8].file == IMM && src[9].file == IMM);
+ /* Texture coordinates. */
+ if (i == 0)
+ return src[8].fixed_hw_reg.dw1.ud;
+ /* Texture derivatives. */
+ else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
+ return src[9].fixed_hw_reg.dw1.ud;
+ /* Texture offset. */
+ else if (i == 7)
+ return 2;
+ else
+ return 1;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ assert(src[3].file == IMM);
+ /* Surface coordinates. */
+ if (i == 0)
+ return src[3].fixed_hw_reg.dw1.ud;
+ /* Surface operation source (ignored for reads). */
+ else if (i == 1)
+ return 0;
+ else
+ return 1;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ assert(src[3].file == IMM &&
+ src[4].file == IMM);
+ /* Surface coordinates. */
+ if (i == 0)
+ return src[3].fixed_hw_reg.dw1.ud;
+ /* Surface operation source. */
+ else if (i == 1)
+ return src[4].fixed_hw_reg.dw1.ud;
+ else
+ return 1;
+
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
+ assert(src[3].file == IMM &&
+ src[4].file == IMM);
+ const unsigned op = src[4].fixed_hw_reg.dw1.ud;
+ /* Surface coordinates. */
+ if (i == 0)
+ return src[3].fixed_hw_reg.dw1.ud;
+ /* Surface operation source. */
+ else if (i == 1 && op == BRW_AOP_CMPWR)
+ return 2;
+ else if (i == 1 && (op == BRW_AOP_INC || op == BRW_AOP_DEC ||
+ op == BRW_AOP_PREDEC))
+ return 0;
+ else
+ return 1;
+ }
+
+ default:
+ return 1;
+ }
+}
+
int
fs_inst::regs_read(int arg) const
{
- if (is_tex() && arg == 0 && src[0].file == GRF) {
- return mlen;
- } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_WRITE && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_TYPED_ATOMIC && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_READ && arg == 0) {
- return mlen;
- } else if (opcode == SHADER_OPCODE_TYPED_SURFACE_WRITE && arg == 0) {
- return mlen;
- } else if (opcode == FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET && arg == 0) {
- return mlen;
- } else if (opcode == FS_OPCODE_LINTERP && arg == 0) {
- return exec_size / 4;
+ switch (opcode) {
+ case FS_OPCODE_FB_WRITE:
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ if (arg == 0)
+ return mlen;
+ break;
+
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ /* The payload is actually stored in src1 */
+ if (arg == 1)
+ return mlen;
+ break;
+
+ case FS_OPCODE_LINTERP:
+ if (arg == 1)
+ return 1;
+ break;
+
+ case SHADER_OPCODE_LOAD_PAYLOAD:
+ if (arg < this->header_size)
+ return 1;
+ break;
+
+ case CS_OPCODE_CS_TERMINATE:
+ return 1;
+
+ default:
+ if (is_tex() && arg == 0 && src[0].file == GRF)
+ return mlen;
+ break;
}
switch (src[arg].file) {
case BAD_FILE:
+ return 0;
case UNIFORM:
case IMM:
return 1;
case GRF:
+ case ATTR:
case HW_REG:
- if (src[arg].stride == 0) {
- return 1;
- } else {
- int size = src[arg].width * src[arg].stride * type_sz(src[arg].type);
- return (size + 31) / 32;
- }
+ return DIV_ROUND_UP(components_read(arg) *
+ src[arg].component_size(exec_size),
+ REG_SIZE);
case MRF:
unreachable("MRF registers are not allowed as sources");
default:
@@ -832,7 +908,7 @@ fs_visitor::vgrf(const glsl_type *const type)
{
int reg_width = dispatch_width / 8;
return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width),
- brw_type_for_base_type(type), dispatch_width);
+ brw_type_for_base_type(type));
}
/** Fixed HW reg constructor. */
@@ -842,14 +918,7 @@ fs_reg::fs_reg(enum register_file file, int reg)
this->file = file;
this->reg = reg;
this->type = BRW_REGISTER_TYPE_F;
-
- switch (file) {
- case UNIFORM:
- this->width = 1;
- break;
- default:
- this->width = 8;
- }
+ this->stride = (file == UNIFORM ? 0 : 1);
}
/** Fixed HW reg constructor. */
@@ -859,25 +928,7 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type)
this->file = file;
this->reg = reg;
this->type = type;
-
- switch (file) {
- case UNIFORM:
- this->width = 1;
- break;
- default:
- this->width = 8;
- }
-}
-
-/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type,
- uint8_t width)
-{
- init();
- this->file = file;
- this->reg = reg;
- this->type = type;
- this->width = width;
+ this->stride = (file == UNIFORM ? 0 : 1);
}
/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
@@ -892,6 +943,18 @@ fs_visitor::import_uniforms(fs_visitor *v)
this->param_size = v->param_size;
}
+void
+fs_visitor::setup_vector_uniform_values(const gl_constant_value *values, unsigned n)
+{
+ static const gl_constant_value zero = { 0 };
+
+ for (unsigned i = 0; i < n; ++i)
+ stage_prog_data->param[uniforms++] = &values[i];
+
+ for (unsigned i = n; i < 4; ++i)
+ stage_prog_data->param[uniforms++] = &zero;
+}
+
fs_reg *
fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
bool origin_upper_left)
@@ -908,23 +971,23 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
} else {
bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
}
- wpos = offset(wpos, 1);
+ wpos = offset(wpos, bld, 1);
/* gl_FragCoord.y */
if (!flip && pixel_center_integer) {
bld.MOV(wpos, this->pixel_y);
} else {
fs_reg pixel_y = this->pixel_y;
- float offset = (pixel_center_integer ? 0.0 : 0.5);
+ float offset = (pixel_center_integer ? 0.0f : 0.5f);
if (flip) {
pixel_y.negate = true;
- offset += key->drawable_height - 1.0;
+ offset += key->drawable_height - 1.0f;
}
bld.ADD(wpos, pixel_y, fs_reg(offset));
}
- wpos = offset(wpos, 1);
+ wpos = offset(wpos, bld, 1);
/* gl_FragCoord.z */
if (devinfo->gen >= 6) {
@@ -934,7 +997,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
interp_reg(VARYING_SLOT_POS, 2));
}
- wpos = offset(wpos, 1);
+ wpos = offset(wpos, bld, 1);
/* gl_FragCoord.w: Already set up in emit_interpolation */
bld.MOV(wpos, this->wpos_w);
@@ -1017,7 +1080,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
/* If there's no incoming setup data for this slot, don't
* emit interpolation for it.
*/
- attr = offset(attr, type->vector_elements);
+ attr = offset(attr, bld, type->vector_elements);
location++;
continue;
}
@@ -1032,7 +1095,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
interp = suboffset(interp, 3);
interp.type = attr.type;
bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
- attr = offset(attr, 1);
+ attr = offset(attr, bld, 1);
}
} else {
/* Smooth/noperspective interpolation case. */
@@ -1070,7 +1133,7 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
bld.MUL(attr, attr, this->pixel_w);
}
- attr = offset(attr, 1);
+ attr = offset(attr, bld, 1);
}
}
@@ -1178,7 +1241,7 @@ fs_visitor::emit_samplepos_setup()
}
/* Compute gl_SamplePosition.x */
compute_sample_position(pos, int_sample_x);
- pos = offset(pos, 1);
+ pos = offset(pos, abld, 1);
if (dispatch_width == 8) {
abld.MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1)));
} else {
@@ -1250,15 +1313,16 @@ fs_visitor::emit_sampleid_setup()
return reg;
}
-void
-fs_visitor::resolve_source_modifiers(fs_reg *src)
+fs_reg
+fs_visitor::resolve_source_modifiers(const fs_reg &src)
{
- if (!src->abs && !src->negate)
- return;
+ if (!src.abs && !src.negate)
+ return src;
- fs_reg temp = bld.vgrf(src->type);
- bld.MOV(temp, *src);
- *src = temp;
+ fs_reg temp = bld.vgrf(src.type);
+ bld.MOV(temp, src);
+
+ return temp;
}
void
@@ -1318,6 +1382,7 @@ fs_visitor::assign_curb_setup()
constant_nr / 8,
constant_nr % 8);
+ assert(inst->src[i].stride == 0);
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg = byte_offset(
retype(brw_reg, inst->src[i].type),
@@ -1867,11 +1932,12 @@ fs_visitor::demote_pull_constants()
continue;
/* Set up the annotation tracking for new generated instructions. */
- const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
- .at(block, inst);
+ const fs_builder ibld(this, block, inst);
fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
fs_reg dst = vgrf(glsl_type::float_type);
+ assert(inst->src[i].stride == 0);
+
/* Generate a pull load into dst. */
if (inst->src[i].reladdr) {
VARYING_PULL_CONSTANT_LOAD(ibld, dst,
@@ -1879,9 +1945,11 @@ fs_visitor::demote_pull_constants()
*inst->src[i].reladdr,
pull_index);
inst->src[i].reladdr = NULL;
+ inst->src[i].stride = 1;
} else {
+ const fs_builder ubld = ibld.exec_all().group(8, 0);
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
- ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
dst, surf_index, offset);
inst->src[i].set_smear(pull_index & 3);
}
@@ -1890,7 +1958,6 @@ fs_visitor::demote_pull_constants()
inst->src[i].file = GRF;
inst->src[i].reg = dst.reg;
inst->src[i].reg_offset = 0;
- inst->src[i].width = dispatch_width;
}
}
invalidate_live_intervals();
@@ -2158,11 +2225,11 @@ fs_visitor::opt_zero_samples()
* "Parameter 0 is required except for the sampleinfo message, which
* has no parameter 0"
*/
- while (inst->mlen > inst->header_size + dispatch_width / 8 &&
+ while (inst->mlen > inst->header_size + inst->exec_size / 8 &&
load_payload->src[(inst->mlen - inst->header_size) /
- (dispatch_width / 8) +
+ (inst->exec_size / 8) +
inst->header_size - 1].is_zero()) {
- inst->mlen -= dispatch_width / 8;
+ inst->mlen -= inst->exec_size / 8;
progress = true;
}
}
@@ -2199,7 +2266,8 @@ fs_visitor::opt_sampler_eot()
return false;
/* Look for a texturing instruction immediately before the final FB_WRITE. */
- fs_inst *fb_write = (fs_inst *) cfg->blocks[cfg->num_blocks - 1]->end();
+ bblock_t *block = cfg->blocks[cfg->num_blocks - 1];
+ fs_inst *fb_write = (fs_inst *)block->end();
assert(fb_write->eot);
assert(fb_write->opcode == FS_OPCODE_FB_WRITE);
@@ -2230,9 +2298,11 @@ fs_visitor::opt_sampler_eot()
assert(!tex_inst->eot); /* We can't get here twice */
assert((tex_inst->offset & (0xff << 24)) == 0);
+ const fs_builder ibld(this, block, tex_inst);
+
tex_inst->offset |= fb_write->target << 24;
tex_inst->eot = true;
- tex_inst->dst = bld.null_reg_ud();
+ tex_inst->dst = ibld.null_reg_ud();
fb_write->remove(cfg->blocks[cfg->num_blocks - 1]);
/* If a header is present, marking the eot is sufficient. Otherwise, we need
@@ -2244,8 +2314,8 @@ fs_visitor::opt_sampler_eot()
if (tex_inst->header_size != 0)
return true;
- fs_reg send_header = bld.vgrf(BRW_REGISTER_TYPE_F,
- load_payload->sources + 1);
+ fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
+ load_payload->sources + 1);
fs_reg *new_sources =
ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1);
@@ -2307,12 +2377,12 @@ fs_visitor::opt_register_renaming()
if (depth == 0 &&
inst->dst.file == GRF &&
- alloc.sizes[inst->dst.reg] == inst->dst.width / 8 &&
+ alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
!inst->is_partial_write()) {
if (remap[dst] == -1) {
remap[dst] = dst;
} else {
- remap[dst] = alloc.allocate(inst->dst.width / 8);
+ remap[dst] = alloc.allocate(inst->exec_size / 8);
inst->dst.reg = remap[dst];
progress = true;
}
@@ -2443,7 +2513,7 @@ fs_visitor::compute_to_mrf()
/* Things returning more than one register would need us to
* understand coalescing out more than one MOV at a time.
*/
- if (scan_inst->regs_written > scan_inst->dst.width / 8)
+ if (scan_inst->regs_written > scan_inst->exec_size / 8)
break;
/* SEND instructions can't have MRF as a destination. */
@@ -2780,7 +2850,8 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
if (block->start() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- DEP_RESOLVE_MOV(bld.at(block, inst), first_write_grf + i);
+ DEP_RESOLVE_MOV(fs_builder(this, block, inst),
+ first_write_grf + i);
}
return;
}
@@ -2796,7 +2867,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
if (reg >= first_write_grf &&
reg < first_write_grf + write_len &&
needs_dep[reg - first_write_grf]) {
- DEP_RESOLVE_MOV(bld.at(block, inst), reg);
+ DEP_RESOLVE_MOV(fs_builder(this, block, inst), reg);
needs_dep[reg - first_write_grf] = false;
if (scan_inst->exec_size == 16)
needs_dep[reg - first_write_grf + 1] = false;
@@ -2843,7 +2914,8 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
if (block->end() == scan_inst) {
for (int i = 0; i < write_len; i++) {
if (needs_dep[i])
- DEP_RESOLVE_MOV(bld.at(block, scan_inst), first_write_grf + i);
+ DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+ first_write_grf + i);
}
return;
}
@@ -2858,7 +2930,8 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
scan_inst->dst.reg >= first_write_grf &&
scan_inst->dst.reg < first_write_grf + write_len &&
needs_dep[scan_inst->dst.reg - first_write_grf]) {
- DEP_RESOLVE_MOV(bld.at(block, scan_inst), scan_inst->dst.reg);
+ DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
+ scan_inst->dst.reg);
needs_dep[scan_inst->dst.reg - first_write_grf] = false;
}
@@ -2928,14 +3001,18 @@ fs_visitor::lower_uniform_pull_constant_loads()
assert(const_offset_reg.file == IMM &&
const_offset_reg.type == BRW_REGISTER_TYPE_UD);
const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
- fs_reg payload = fs_reg(GRF, alloc.allocate(1));
- /* We have to use a message header on Skylake to get SIMD4x2 mode.
- * Reserve space for the register.
- */
+ fs_reg payload, offset;
if (devinfo->gen >= 9) {
- payload.reg_offset++;
- alloc.sizes[payload.reg] = 2;
+ /* We have to use a message header on Skylake to get SIMD4x2
+ * mode. Reserve space for the register.
+ */
+ offset = payload = fs_reg(GRF, alloc.allocate(2));
+ offset.reg_offset++;
+ inst->mlen = 2;
+ } else {
+ offset = payload = fs_reg(GRF, alloc.allocate(1));
+ inst->mlen = 1;
}
/* This is actually going to be a MOV, but since only the first dword
@@ -2944,7 +3021,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
* by live variable analysis, or register allocation will explode.
*/
fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
- 8, payload, const_offset_reg);
+ 8, offset, const_offset_reg);
setup->force_writemask_all = true;
setup->ir = inst->ir;
@@ -2957,6 +3034,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
*/
inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
inst->src[1] = payload;
+ inst->base_mrf = -1;
invalidate_live_intervals();
} else {
@@ -2982,28 +3060,24 @@ fs_visitor::lower_load_payload()
assert(inst->dst.file == MRF || inst->dst.file == GRF);
assert(inst->saturate == false);
-
- const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf)
- .exec_all(inst->force_writemask_all)
- .at(block, inst);
fs_reg dst = inst->dst;
/* Get rid of COMPR4. We'll add it back in if we need it */
if (dst.file == MRF)
dst.reg = dst.reg & ~BRW_MRF_COMPR4;
- dst.width = 8;
+ const fs_builder ibld(this, block, inst);
+ const fs_builder hbld = ibld.exec_all().group(8, 0);
+
for (uint8_t i = 0; i < inst->header_size; i++) {
if (inst->src[i].file != BAD_FILE) {
fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD);
fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD);
- mov_src.width = 8;
- ibld.exec_all().MOV(mov_dst, mov_src);
+ hbld.MOV(mov_dst, mov_src);
}
- dst = offset(dst, 1);
+ dst = offset(dst, hbld, 1);
}
- dst.width = inst->exec_size;
if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
inst->exec_size > 8) {
/* In this case, the payload portion of the LOAD_PAYLOAD isn't
@@ -3033,9 +3107,9 @@ fs_visitor::lower_load_payload()
} else {
/* Platform doesn't have COMPR4. We have to fake it */
fs_reg mov_dst = retype(dst, inst->src[i].type);
- mov_dst.width = 8;
ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
- ibld.half(1).MOV(offset(mov_dst, 4), half(inst->src[i], 1));
+ mov_dst.reg += 4;
+ ibld.half(1).MOV(mov_dst, half(inst->src[i], 1));
}
}
@@ -3060,7 +3134,7 @@ fs_visitor::lower_load_payload()
for (uint8_t i = inst->header_size; i < inst->sources; i++) {
if (inst->src[i].file != BAD_FILE)
ibld.MOV(retype(dst, inst->src[i].type), inst->src[i]);
- dst = offset(dst, 1);
+ dst = offset(dst, ibld, 1);
}
inst->remove(block);
@@ -3078,158 +3152,989 @@ fs_visitor::lower_integer_multiplication()
{
bool progress = false;
- /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit operation
- * directly, but Cherryview cannot.
- */
- if (devinfo->gen >= 8 && !devinfo->is_cherryview)
- return false;
-
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
- if (inst->opcode != BRW_OPCODE_MUL ||
- inst->dst.is_accumulator() ||
- (inst->dst.type != BRW_REGISTER_TYPE_D &&
- inst->dst.type != BRW_REGISTER_TYPE_UD))
- continue;
+ const fs_builder ibld(this, block, inst);
- const fs_builder ibld = bld.at(block, inst);
+ if (inst->opcode == BRW_OPCODE_MUL) {
+ if (inst->dst.is_accumulator() ||
+ (inst->dst.type != BRW_REGISTER_TYPE_D &&
+ inst->dst.type != BRW_REGISTER_TYPE_UD))
+ continue;
- /* The MUL instruction isn't commutative. On Gen <= 6, only the low
- * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
- * src1 are used.
- *
- * If multiplying by an immediate value that fits in 16-bits, do a
- * single MUL instruction with that value in the proper location.
- */
- if (inst->src[1].file == IMM &&
- inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
- if (devinfo->gen < 7) {
- fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
- inst->dst.type, dispatch_width);
- ibld.MOV(imm, inst->src[1]);
- ibld.MUL(inst->dst, imm, inst->src[0]);
- } else {
- ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
- }
- } else {
- /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
- * do 32-bit integer multiplication in one instruction, but instead
- * must do a sequence (which actually calculates a 64-bit result):
- *
- * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D
- * mach(8) null g3<8,8,1>D g4<8,8,1>D
- * mov(8) g2<1>D acc0<8,8,1>D
- *
- * But on Gen > 6, the ability to use second accumulator register
- * (acc1) for non-float data types was removed, preventing a simple
- * implementation in SIMD16. A 16-channel result can be calculated by
- * executing the three instructions twice in SIMD8, once with quarter
- * control of 1Q for the first eight channels and again with 2Q for
- * the second eight channels.
- *
- * Which accumulator register is implicitly accessed (by AccWrEnable
- * for instance) is determined by the quarter control. Unfortunately
- * Ivybridge (and presumably Baytrail) has a hardware bug in which an
- * implicit accumulator access by an instruction with 2Q will access
- * acc1 regardless of whether the data type is usable in acc1.
- *
- * Specifically, the 2Q mach(8) writes acc1 which does not exist for
- * integer data types.
- *
- * Since we only want the low 32-bits of the result, we can do two
- * 32-bit x 16-bit multiplies (like the mul and mach are doing), and
- * adjust the high result and add them (like the mach is doing):
- *
- * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW
- * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW
- * shl(8) g9<1>D g8<8,8,1>D 16D
- * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D
- *
- * We avoid the shl instruction by realizing that we only want to add
- * the low 16-bits of the "high" result to the high 16-bits of the
- * "low" result and using proper regioning on the add:
- *
- * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW
- * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW
- * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW
- *
- * Since it does not use the (single) accumulator register, we can
- * schedule multi-component multiplications much better.
+ /* Gen8's MUL instruction can do a 32-bit x 32-bit -> 32-bit
+ * operation directly, but CHV/BXT cannot.
*/
+ if (devinfo->gen >= 8 &&
+ !devinfo->is_cherryview && !devinfo->is_broxton)
+ continue;
- if (inst->conditional_mod && inst->dst.is_null()) {
- inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
- inst->dst.type, dispatch_width);
- }
- fs_reg low = inst->dst;
- fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
- inst->dst.type, dispatch_width);
+ if (inst->src[1].file == IMM &&
+ inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+ /* The MUL instruction isn't commutative. On Gen <= 6, only the low
+ * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
+ * src1 are used.
+ *
+ * If multiplying by an immediate value that fits in 16-bits, do a
+ * single MUL instruction with that value in the proper location.
+ */
+ if (devinfo->gen < 7) {
+ fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type);
+ ibld.MOV(imm, inst->src[1]);
+ ibld.MUL(inst->dst, imm, inst->src[0]);
+ } else {
+ ibld.MUL(inst->dst, inst->src[0], inst->src[1]);
+ }
+ } else {
+ /* Gen < 8 (and some Gen8+ low-power parts like Cherryview) cannot
+ * do 32-bit integer multiplication in one instruction, but instead
+ * must do a sequence (which actually calculates a 64-bit result):
+ *
+ * mul(8) acc0<1>D g3<8,8,1>D g4<8,8,1>D
+ * mach(8) null g3<8,8,1>D g4<8,8,1>D
+ * mov(8) g2<1>D acc0<8,8,1>D
+ *
+ * But on Gen > 6, the ability to use second accumulator register
+ * (acc1) for non-float data types was removed, preventing a simple
+ * implementation in SIMD16. A 16-channel result can be calculated by
+ * executing the three instructions twice in SIMD8, once with quarter
+ * control of 1Q for the first eight channels and again with 2Q for
+ * the second eight channels.
+ *
+ * Which accumulator register is implicitly accessed (by AccWrEnable
+ * for instance) is determined by the quarter control. Unfortunately
+ * Ivybridge (and presumably Baytrail) has a hardware bug in which an
+ * implicit accumulator access by an instruction with 2Q will access
+ * acc1 regardless of whether the data type is usable in acc1.
+ *
+ * Specifically, the 2Q mach(8) writes acc1 which does not exist for
+ * integer data types.
+ *
+ * Since we only want the low 32-bits of the result, we can do two
+ * 32-bit x 16-bit multiplies (like the mul and mach are doing), and
+ * adjust the high result and add them (like the mach is doing):
+ *
+ * mul(8) g7<1>D g3<8,8,1>D g4.0<8,8,1>UW
+ * mul(8) g8<1>D g3<8,8,1>D g4.1<8,8,1>UW
+ * shl(8) g9<1>D g8<8,8,1>D 16D
+ * add(8) g2<1>D g7<8,8,1>D g8<8,8,1>D
+ *
+ * We avoid the shl instruction by realizing that we only want to add
+ * the low 16-bits of the "high" result to the high 16-bits of the
+ * "low" result and using proper regioning on the add:
+ *
+ * mul(8) g7<1>D g3<8,8,1>D g4.0<16,8,2>UW
+ * mul(8) g8<1>D g3<8,8,1>D g4.1<16,8,2>UW
+ * add(8) g7.1<2>UW g7.1<16,8,2>UW g8<16,8,2>UW
+ *
+ * Since it does not use the (single) accumulator register, we can
+ * schedule multi-component multiplications much better.
+ */
- if (devinfo->gen >= 7) {
- fs_reg src1_0_w = inst->src[1];
- fs_reg src1_1_w = inst->src[1];
+ if (inst->conditional_mod && inst->dst.is_null()) {
+ inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type);
+ }
+ fs_reg low = inst->dst;
+ fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
+ inst->dst.type);
+
+ if (devinfo->gen >= 7) {
+ fs_reg src1_0_w = inst->src[1];
+ fs_reg src1_1_w = inst->src[1];
- if (inst->src[1].file == IMM) {
- src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
- src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+ if (inst->src[1].file == IMM) {
+ src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
+ src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+ } else {
+ src1_0_w.type = BRW_REGISTER_TYPE_UW;
+ if (src1_0_w.stride != 0) {
+ assert(src1_0_w.stride == 1);
+ src1_0_w.stride = 2;
+ }
+
+ src1_1_w.type = BRW_REGISTER_TYPE_UW;
+ if (src1_1_w.stride != 0) {
+ assert(src1_1_w.stride == 1);
+ src1_1_w.stride = 2;
+ }
+ src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
+ }
+ ibld.MUL(low, inst->src[0], src1_0_w);
+ ibld.MUL(high, inst->src[0], src1_1_w);
} else {
- src1_0_w.type = BRW_REGISTER_TYPE_UW;
- if (src1_0_w.stride != 0) {
- assert(src1_0_w.stride == 1);
- src1_0_w.stride = 2;
+ fs_reg src0_0_w = inst->src[0];
+ fs_reg src0_1_w = inst->src[0];
+
+ src0_0_w.type = BRW_REGISTER_TYPE_UW;
+ if (src0_0_w.stride != 0) {
+ assert(src0_0_w.stride == 1);
+ src0_0_w.stride = 2;
}
- src1_1_w.type = BRW_REGISTER_TYPE_UW;
- if (src1_1_w.stride != 0) {
- assert(src1_1_w.stride == 1);
- src1_1_w.stride = 2;
+ src0_1_w.type = BRW_REGISTER_TYPE_UW;
+ if (src0_1_w.stride != 0) {
+ assert(src0_1_w.stride == 1);
+ src0_1_w.stride = 2;
}
- src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
- }
- ibld.MUL(low, inst->src[0], src1_0_w);
- ibld.MUL(high, inst->src[0], src1_1_w);
- } else {
- fs_reg src0_0_w = inst->src[0];
- fs_reg src0_1_w = inst->src[0];
+ src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
- src0_0_w.type = BRW_REGISTER_TYPE_UW;
- if (src0_0_w.stride != 0) {
- assert(src0_0_w.stride == 1);
- src0_0_w.stride = 2;
+ ibld.MUL(low, src0_0_w, inst->src[1]);
+ ibld.MUL(high, src0_1_w, inst->src[1]);
}
- src0_1_w.type = BRW_REGISTER_TYPE_UW;
- if (src0_1_w.stride != 0) {
- assert(src0_1_w.stride == 1);
- src0_1_w.stride = 2;
+ fs_reg dst = inst->dst;
+ dst.type = BRW_REGISTER_TYPE_UW;
+ dst.subreg_offset = 2;
+ dst.stride = 2;
+
+ high.type = BRW_REGISTER_TYPE_UW;
+ high.stride = 2;
+
+ low.type = BRW_REGISTER_TYPE_UW;
+ low.subreg_offset = 2;
+ low.stride = 2;
+
+ ibld.ADD(dst, low, high);
+
+ if (inst->conditional_mod) {
+ fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+ set_condmod(inst->conditional_mod,
+ ibld.MOV(null, inst->dst));
}
- src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
+ }
- ibld.MUL(low, src0_0_w, inst->src[1]);
- ibld.MUL(high, src0_1_w, inst->src[1]);
+ } else if (inst->opcode == SHADER_OPCODE_MULH) {
+ /* Should have been lowered to 8-wide. */
+ assert(inst->exec_size <= 8);
+ const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
+ inst->dst.type);
+ fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
+ fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);
+
+ if (devinfo->gen >= 8) {
+ /* Until Gen8, integer multiplies read 32-bits from one source,
+ * and 16-bits from the other, and relying on the MACH instruction
+ * to generate the high bits of the result.
+ *
+ * On Gen8, the multiply instruction does a full 32x32-bit
+ * multiply, but in order to do a 64-bit multiply we can simulate
+ * the previous behavior and then use a MACH instruction.
+ *
+ * FINISHME: Don't use source modifiers on src1.
+ */
+ assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
+ mul->src[1].type == BRW_REGISTER_TYPE_UD);
+ mul->src[1].type = (type_is_signed(mul->src[1].type) ?
+ BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
+ mul->src[1].stride *= 2;
+
+ } else if (devinfo->gen == 7 && !devinfo->is_haswell &&
+ inst->force_sechalf) {
+ /* Among other things the quarter control bits influence which
+ * accumulator register is used by the hardware for instructions
+ * that access the accumulator implicitly (e.g. MACH). A
+ * second-half instruction would normally map to acc1, which
+ * doesn't exist on Gen7 and up (the hardware does emulate it for
+ * floating-point instructions *only* by taking advantage of the
+ * extra precision of acc0 not normally used for floating point
+ * arithmetic).
+ *
+ * HSW and up are careful enough not to try to access an
+ * accumulator register that doesn't exist, but on earlier Gen7
+ * hardware we need to make sure that the quarter control bits are
+ * zero to avoid non-deterministic behaviour and emit an extra MOV
+ * to get the result masked correctly according to the current
+ * channel enables.
+ */
+ mach->force_sechalf = false;
+ mach->force_writemask_all = true;
+ mach->dst = ibld.vgrf(inst->dst.type);
+ ibld.MOV(inst->dst, mach->dst);
}
+ } else {
+ continue;
+ }
+
+ inst->remove(block);
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
+static void
+setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
+ fs_reg *dst, fs_reg color, unsigned components)
+{
+ if (key->clamp_fragment_color) {
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+ assert(color.type == BRW_REGISTER_TYPE_F);
+
+ for (unsigned i = 0; i < components; i++)
+ set_saturate(true,
+ bld.MOV(offset(tmp, bld, i), offset(color, bld, i)));
+
+ color = tmp;
+ }
+
+ for (unsigned i = 0; i < components; i++)
+ dst[i] = offset(color, bld, i);
+}
+
+static void
+lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
+ const brw_wm_prog_data *prog_data,
+ const brw_wm_prog_key *key,
+ const fs_visitor::thread_payload &payload)
+{
+ assert(inst->src[6].file == IMM);
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ const fs_reg &color0 = inst->src[0];
+ const fs_reg &color1 = inst->src[1];
+ const fs_reg &src0_alpha = inst->src[2];
+ const fs_reg &src_depth = inst->src[3];
+ const fs_reg &dst_depth = inst->src[4];
+ fs_reg sample_mask = inst->src[5];
+ const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud;
+
+ /* We can potentially have a message length of up to 15, so we have to set
+ * base_mrf to either 0 or 1 in order to fit in m0..m15.
+ */
+ fs_reg sources[15];
+ int header_size = 2, payload_header_size;
+ unsigned length = 0;
+
+ /* From the Sandy Bridge PRM, volume 4, page 198:
+ *
+ * "Dispatched Pixel Enables. One bit per pixel indicating
+ * which pixels were originally enabled when the thread was
+ * dispatched. This field is only required for the end-of-
+ * thread message and on all dual-source messages."
+ */
+ if (devinfo->gen >= 6 &&
+ (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) &&
+ color1.file == BAD_FILE &&
+ key->nr_color_regions == 1) {
+ header_size = 0;
+ }
+
+ if (header_size != 0) {
+ assert(header_size == 2);
+ /* Allocate 2 registers for a header */
+ length += 2;
+ }
+
+ if (payload.aa_dest_stencil_reg) {
+ sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1));
+ bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
+ .MOV(sources[length],
+ fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
+ length++;
+ }
+
+ if (prog_data->uses_omask) {
+ sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1),
+ BRW_REGISTER_TYPE_UD);
+
+ /* Hand over gl_SampleMask. Only the lower 16 bits of each channel are
+ * relevant. Since it's unsigned single words one vgrf is always
+ * 16-wide, but only the lower or higher 8 channels will be used by the
+ * hardware when doing a SIMD8 write depending on whether we have
+ * selected the subspans for the first or second half respectively.
+ */
+ assert(sample_mask.file != BAD_FILE && type_sz(sample_mask.type) == 4);
+ sample_mask.type = BRW_REGISTER_TYPE_UW;
+ sample_mask.stride *= 2;
+
+ bld.exec_all().annotate("FB write oMask")
+ .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
+ inst->force_sechalf),
+ sample_mask);
+ length++;
+ }
+
+ payload_header_size = length;
+
+ if (src0_alpha.file != BAD_FILE) {
+ /* FIXME: This is being passed at the wrong location in the payload and
+ * doesn't work when gl_SampleMask and MRTs are used simultaneously.
+ * It's supposed to be immediately before oMask but there seems to be no
+ * reasonable way to pass them in the correct order because LOAD_PAYLOAD
+ * requires header sources to form a contiguous segment at the beginning
+ * of the message and src0_alpha has per-channel semantics.
+ */
+ setup_color_payload(bld, key, &sources[length], src0_alpha, 1);
+ length++;
+ }
+
+ setup_color_payload(bld, key, &sources[length], color0, components);
+ length += 4;
+
+ if (color1.file != BAD_FILE) {
+ setup_color_payload(bld, key, &sources[length], color1, components);
+ length += 4;
+ }
+
+ if (src_depth.file != BAD_FILE) {
+ sources[length] = src_depth;
+ length++;
+ }
+
+ if (dst_depth.file != BAD_FILE) {
+ sources[length] = dst_depth;
+ length++;
+ }
+
+ fs_inst *load;
+ if (devinfo->gen >= 7) {
+ /* Send from the GRF */
+ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+ load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
+ payload.reg = bld.shader->alloc.allocate(load->regs_written);
+ load->dst = payload;
+
+ inst->src[0] = payload;
+ inst->resize_sources(1);
+ inst->base_mrf = -1;
+ } else {
+ /* Send from the MRF */
+ load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
+ sources, length, payload_header_size);
+
+ /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD
+ * will do this for us if we just give it a COMPR4 destination.
+ */
+ if (devinfo->gen < 6 && bld.dispatch_width() == 16)
+ load->dst.reg |= BRW_MRF_COMPR4;
+
+ inst->resize_sources(0);
+ inst->base_mrf = 1;
+ }
+
+ inst->opcode = FS_OPCODE_FB_WRITE;
+ inst->mlen = load->regs_written;
+ inst->header_size = header_size;
+}
+
+static void
+lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
+ const fs_reg &coordinate,
+ const fs_reg &shadow_c,
+ const fs_reg &lod, const fs_reg &lod2,
+ const fs_reg &sampler,
+ unsigned coord_components,
+ unsigned grad_components)
+{
+ const bool has_lod = (op == SHADER_OPCODE_TXL || op == FS_OPCODE_TXB ||
+ op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS);
+ fs_reg msg_begin(MRF, 1, BRW_REGISTER_TYPE_F);
+ fs_reg msg_end = msg_begin;
+
+ /* g0 header. */
+ msg_end = offset(msg_end, bld.group(8, 0), 1);
+
+ for (unsigned i = 0; i < coord_components; i++)
+ bld.MOV(retype(offset(msg_end, bld, i), coordinate.type),
+ offset(coordinate, bld, i));
+
+ msg_end = offset(msg_end, bld, coord_components);
+
+ /* Messages other than SAMPLE and RESINFO in SIMD16 and TXD in SIMD8
+ * require all three components to be present and zero if they are unused.
+ */
+ if (coord_components > 0 &&
+ (has_lod || shadow_c.file != BAD_FILE ||
+ (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
+ for (unsigned i = coord_components; i < 3; i++)
+ bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
+
+ msg_end = offset(msg_end, bld, 3 - coord_components);
+ }
+
+ if (op == SHADER_OPCODE_TXD) {
+ /* TXD unsupported in SIMD16 mode. */
+ assert(bld.dispatch_width() == 8);
+
+ /* the slots for u and v are always present, but r is optional */
+ if (coord_components < 2)
+ msg_end = offset(msg_end, bld, 2 - coord_components);
+
+ /* P = u, v, r
+ * dPdx = dudx, dvdx, drdx
+ * dPdy = dudy, dvdy, drdy
+ *
+ * 1-arg: Does not exist.
+ *
+ * 2-arg: dudx dvdx dudy dvdy
+ * dPdx.x dPdx.y dPdy.x dPdy.y
+ * m4 m5 m6 m7
+ *
+ * 3-arg: dudx dvdx drdx dudy dvdy drdy
+ * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
+ * m5 m6 m7 m8 m9 m10
+ */
+ for (unsigned i = 0; i < grad_components; i++)
+ bld.MOV(offset(msg_end, bld, i), offset(lod, bld, i));
+
+ msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
+
+ for (unsigned i = 0; i < grad_components; i++)
+ bld.MOV(offset(msg_end, bld, i), offset(lod2, bld, i));
+
+ msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
+ }
+
+ if (has_lod) {
+ /* Bias/LOD with shadow comparitor is unsupported in SIMD16 -- *Without*
+ * shadow comparitor (including RESINFO) it's unsupported in SIMD8 mode.
+ */
+ assert(shadow_c.file != BAD_FILE ? bld.dispatch_width() == 8 :
+ bld.dispatch_width() == 16);
+
+ const brw_reg_type type =
+ (op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS ?
+ BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
+ bld.MOV(retype(msg_end, type), lod);
+ msg_end = offset(msg_end, bld, 1);
+ }
+
+ if (shadow_c.file != BAD_FILE) {
+ if (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8) {
+ /* There's no plain shadow compare message, so we use shadow
+ * compare with a bias of 0.0.
+ */
+ bld.MOV(msg_end, fs_reg(0.0f));
+ msg_end = offset(msg_end, bld, 1);
+ }
+
+ bld.MOV(msg_end, shadow_c);
+ msg_end = offset(msg_end, bld, 1);
+ }
+
+ inst->opcode = op;
+ inst->src[0] = reg_undef;
+ inst->src[1] = sampler;
+ inst->resize_sources(2);
+ inst->base_mrf = msg_begin.reg;
+ inst->mlen = msg_end.reg - msg_begin.reg;
+ inst->header_size = 1;
+}
+
+static void
+lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
+ fs_reg coordinate,
+ const fs_reg &shadow_c,
+ fs_reg lod, fs_reg lod2,
+ const fs_reg &sample_index,
+ const fs_reg &sampler,
+ const fs_reg &offset_value,
+ unsigned coord_components,
+ unsigned grad_components)
+{
+ fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F);
+ fs_reg msg_coords = message;
+ unsigned header_size = 0;
+
+ if (offset_value.file != BAD_FILE) {
+ /* The offsets set up by the visitor are in the m1 header, so we can't
+ * go headerless.
+ */
+ header_size = 1;
+ message.reg--;
+ }
+
+ for (unsigned i = 0; i < coord_components; i++) {
+ bld.MOV(retype(offset(msg_coords, bld, i), coordinate.type), coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ }
+ fs_reg msg_end = offset(msg_coords, bld, coord_components);
+ fs_reg msg_lod = offset(msg_coords, bld, 4);
+
+ if (shadow_c.file != BAD_FILE) {
+ fs_reg msg_shadow = msg_lod;
+ bld.MOV(msg_shadow, shadow_c);
+ msg_lod = offset(msg_shadow, bld, 1);
+ msg_end = msg_lod;
+ }
+
+ switch (op) {
+ case SHADER_OPCODE_TXL:
+ case FS_OPCODE_TXB:
+ bld.MOV(msg_lod, lod);
+ msg_end = offset(msg_lod, bld, 1);
+ break;
+ case SHADER_OPCODE_TXD:
+ /**
+ * P = u, v, r
+ * dPdx = dudx, dvdx, drdx
+ * dPdy = dudy, dvdy, drdy
+ *
+ * Load up these values:
+ * - dudx dudy dvdx dvdy drdx drdy
+ * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
+ */
+ msg_end = msg_lod;
+ for (unsigned i = 0; i < grad_components; i++) {
+ bld.MOV(msg_end, lod);
+ lod = offset(lod, bld, 1);
+ msg_end = offset(msg_end, bld, 1);
+
+ bld.MOV(msg_end, lod2);
+ lod2 = offset(lod2, bld, 1);
+ msg_end = offset(msg_end, bld, 1);
+ }
+ break;
+ case SHADER_OPCODE_TXS:
+ msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
+ bld.MOV(msg_lod, lod);
+ msg_end = offset(msg_lod, bld, 1);
+ break;
+ case SHADER_OPCODE_TXF:
+ msg_lod = offset(msg_coords, bld, 3);
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
+ msg_end = offset(msg_lod, bld, 1);
+ break;
+ case SHADER_OPCODE_TXF_CMS:
+ msg_lod = offset(msg_coords, bld, 3);
+ /* lod */
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
+ /* sample index */
+ bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
+ msg_end = offset(msg_lod, bld, 2);
+ break;
+ default:
+ break;
+ }
+
+ inst->opcode = op;
+ inst->src[0] = reg_undef;
+ inst->src[1] = sampler;
+ inst->resize_sources(2);
+ inst->base_mrf = message.reg;
+ inst->mlen = msg_end.reg - message.reg;
+ inst->header_size = header_size;
+
+ /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
+ assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
+}
- fs_reg dst = inst->dst;
- dst.type = BRW_REGISTER_TYPE_UW;
- dst.subreg_offset = 2;
- dst.stride = 2;
+static bool
+is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler)
+{
+ if (devinfo->gen < 8 && !devinfo->is_haswell)
+ return false;
+
+ return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+}
- high.type = BRW_REGISTER_TYPE_UW;
- high.stride = 2;
+static void
+lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
+ fs_reg coordinate,
+ const fs_reg &shadow_c,
+ fs_reg lod, fs_reg lod2,
+ const fs_reg &sample_index,
+ const fs_reg &mcs, const fs_reg &sampler,
+ fs_reg offset_value,
+ unsigned coord_components,
+ unsigned grad_components)
+{
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ int reg_width = bld.dispatch_width() / 8;
+ unsigned header_size = 0, length = 0;
+ fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE];
+ for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
+ sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
+
+ if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||
+ offset_value.file != BAD_FILE ||
+ is_high_sampler(devinfo, sampler)) {
+ /* For general texture offsets (no txf workaround), we need a header to
+ * put them in. Note that we're only reserving space for it in the
+ * message payload as it will be initialized implicitly by the
+ * generator.
+ *
+ * TG4 needs to place its channel select in the header, for interaction
+ * with ARB_texture_swizzle. The sampler index is only 4-bits, so for
+ * larger sampler numbers we need to offset the Sampler State Pointer in
+ * the header.
+ */
+ header_size = 1;
+ sources[0] = fs_reg();
+ length++;
+ }
- low.type = BRW_REGISTER_TYPE_UW;
- low.subreg_offset = 2;
- low.stride = 2;
+ if (shadow_c.file != BAD_FILE) {
+ bld.MOV(sources[length], shadow_c);
+ length++;
+ }
- ibld.ADD(dst, low, high);
+ bool coordinate_done = false;
- if (inst->conditional_mod) {
- fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
- set_condmod(inst->conditional_mod,
- ibld.MOV(null, inst->dst));
+ /* The sampler can only meaningfully compute LOD for fragment shader
+ * messages. For all other stages, we change the opcode to TXL and
+ * hardcode the LOD to 0.
+ */
+ if (bld.shader->stage != MESA_SHADER_FRAGMENT &&
+ op == SHADER_OPCODE_TEX) {
+ op = SHADER_OPCODE_TXL;
+ lod = fs_reg(0.0f);
+ }
+
+ /* Set up the LOD info */
+ switch (op) {
+ case FS_OPCODE_TXB:
+ case SHADER_OPCODE_TXL:
+ bld.MOV(sources[length], lod);
+ length++;
+ break;
+ case SHADER_OPCODE_TXD:
+ /* TXD should have been lowered in SIMD16 mode. */
+ assert(bld.dispatch_width() == 8);
+
+ /* Load dPdx and the coordinate together:
+ * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
+ */
+ for (unsigned i = 0; i < coord_components; i++) {
+ bld.MOV(sources[length], coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+
+ /* For cube map array, the coordinate is (u,v,r,ai) but there are
+ * only derivatives for (u, v, r).
+ */
+ if (i < grad_components) {
+ bld.MOV(sources[length], lod);
+ lod = offset(lod, bld, 1);
+ length++;
+
+ bld.MOV(sources[length], lod2);
+ lod2 = offset(lod2, bld, 1);
+ length++;
}
}
- inst->remove(block);
+ coordinate_done = true;
+ break;
+ case SHADER_OPCODE_TXS:
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
+ length++;
+ break;
+ case SHADER_OPCODE_TXF:
+ /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
+ * On Gen9 they are u, v, lod, r
+ */
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+
+ if (devinfo->gen >= 9) {
+ if (coord_components >= 2) {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ }
+ length++;
+ }
+
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
+ length++;
+
+ for (unsigned i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+ }
+
+ coordinate_done = true;
+ break;
+ case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_UMS:
+ case SHADER_OPCODE_TXF_MCS:
+ if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
+ length++;
+ }
+
+ if (op == SHADER_OPCODE_TXF_CMS) {
+ /* Data from the multisample control surface. */
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
+ length++;
+ }
+
+ /* There is no offsetting for this message; just copy in the integer
+ * texture coordinates.
+ */
+ for (unsigned i = 0; i < coord_components; i++) {
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+ }
+
+ coordinate_done = true;
+ break;
+ case SHADER_OPCODE_TG4_OFFSET:
+ /* gather4_po_c should have been lowered in SIMD16 mode. */
+ assert(bld.dispatch_width() == 8 || shadow_c.file == BAD_FILE);
+
+ /* More crazy intermixing */
+ for (unsigned i = 0; i < 2; i++) { /* u, v */
+ bld.MOV(sources[length], coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+ }
+
+ for (unsigned i = 0; i < 2; i++) { /* offu, offv */
+ bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value);
+ offset_value = offset(offset_value, bld, 1);
+ length++;
+ }
+
+ if (coord_components == 3) { /* r if present */
+ bld.MOV(sources[length], coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+ }
+
+ coordinate_done = true;
+ break;
+ default:
+ break;
+ }
+
+ /* Set up the coordinate (except for cases where it was done above) */
+ if (!coordinate_done) {
+ for (unsigned i = 0; i < coord_components; i++) {
+ bld.MOV(sources[length], coordinate);
+ coordinate = offset(coordinate, bld, 1);
+ length++;
+ }
+ }
+
+ int mlen;
+ if (reg_width == 2)
+ mlen = length * reg_width - header_size;
+ else
+ mlen = length * reg_width;
+
+ const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen),
+ BRW_REGISTER_TYPE_F);
+ bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
+
+ /* Generate the SEND. */
+ inst->opcode = op;
+ inst->src[0] = src_payload;
+ inst->src[1] = sampler;
+ inst->resize_sources(2);
+ inst->base_mrf = -1;
+ inst->mlen = mlen;
+ inst->header_size = header_size;
+
+ /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
+ assert(inst->mlen <= MAX_SAMPLER_MESSAGE_SIZE);
+}
+
+static void
+lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
+{
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ const fs_reg &coordinate = inst->src[0];
+ const fs_reg &shadow_c = inst->src[1];
+ const fs_reg &lod = inst->src[2];
+ const fs_reg &lod2 = inst->src[3];
+ const fs_reg &sample_index = inst->src[4];
+ const fs_reg &mcs = inst->src[5];
+ const fs_reg &sampler = inst->src[6];
+ const fs_reg &offset_value = inst->src[7];
+ assert(inst->src[8].file == IMM && inst->src[9].file == IMM);
+ const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
+ const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud;
+
+ if (devinfo->gen >= 7) {
+ lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
+ shadow_c, lod, lod2, sample_index,
+ mcs, sampler, offset_value,
+ coord_components, grad_components);
+ } else if (devinfo->gen >= 5) {
+ lower_sampler_logical_send_gen5(bld, inst, op, coordinate,
+ shadow_c, lod, lod2, sample_index,
+ sampler, offset_value,
+ coord_components, grad_components);
+ } else {
+ lower_sampler_logical_send_gen4(bld, inst, op, coordinate,
+ shadow_c, lod, lod2, sampler,
+ coord_components, grad_components);
+ }
+}
+
+/**
+ * Initialize the header present in some typed and untyped surface
+ * messages.
+ */
+static fs_reg
+emit_surface_header(const fs_builder &bld, const fs_reg &sample_mask)
+{
+ fs_builder ubld = bld.exec_all().group(8, 0);
+ const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld.MOV(dst, fs_reg(0));
+ ubld.MOV(component(dst, 7), sample_mask);
+ return dst;
+}
+
+static void
+lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
+ const fs_reg &sample_mask)
+{
+ /* Get the logical send arguments. */
+ const fs_reg &addr = inst->src[0];
+ const fs_reg &src = inst->src[1];
+ const fs_reg &surface = inst->src[2];
+ const UNUSED fs_reg &dims = inst->src[3];
+ const fs_reg &arg = inst->src[4];
+
+ /* Calculate the total number of components of the payload. */
+ const unsigned addr_sz = inst->components_read(0);
+ const unsigned src_sz = inst->components_read(1);
+ const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1);
+ const unsigned sz = header_sz + addr_sz + src_sz;
+
+ /* Allocate space for the payload. */
+ fs_reg *const components = new fs_reg[sz];
+ const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+ unsigned n = 0;
+
+ /* Construct the payload. */
+ if (header_sz)
+ components[n++] = emit_surface_header(bld, sample_mask);
+
+ for (unsigned i = 0; i < addr_sz; i++)
+ components[n++] = offset(addr, bld, i);
+
+ for (unsigned i = 0; i < src_sz; i++)
+ components[n++] = offset(src, bld, i);
+
+ bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+
+ /* Update the original instruction. */
+ inst->opcode = op;
+ inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+ inst->header_size = header_sz;
+
+ inst->src[0] = payload;
+ inst->src[1] = surface;
+ inst->src[2] = arg;
+ inst->resize_sources(3);
+
+ delete[] components;
+}
+
+bool
+fs_visitor::lower_logical_sends()
+{
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ const fs_builder ibld(this, block, inst);
+
+ switch (inst->opcode) {
+ case FS_OPCODE_FB_WRITE_LOGICAL:
+ assert(stage == MESA_SHADER_FRAGMENT);
+ lower_fb_write_logical_send(ibld, inst,
+ (const brw_wm_prog_data *)prog_data,
+ (const brw_wm_prog_key *)key,
+ payload);
+ break;
+
+ case SHADER_OPCODE_TEX_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TEX);
+ break;
+
+ case SHADER_OPCODE_TXD_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXD);
+ break;
+
+ case SHADER_OPCODE_TXF_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF);
+ break;
+
+ case SHADER_OPCODE_TXL_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXL);
+ break;
+
+ case SHADER_OPCODE_TXS_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXS);
+ break;
+
+ case FS_OPCODE_TXB_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, FS_OPCODE_TXB);
+ break;
+
+ case SHADER_OPCODE_TXF_CMS_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
+ break;
+
+ case SHADER_OPCODE_TXF_UMS_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
+ break;
+
+ case SHADER_OPCODE_TXF_MCS_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_MCS);
+ break;
+
+ case SHADER_OPCODE_LOD_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_LOD);
+ break;
+
+ case SHADER_OPCODE_TG4_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4);
+ break;
+
+ case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+ lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TG4_OFFSET);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_UNTYPED_SURFACE_READ,
+ fs_reg(0xffff));
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
+ ibld.sample_mask_reg());
+ break;
+
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_UNTYPED_ATOMIC,
+ ibld.sample_mask_reg());
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_TYPED_SURFACE_READ,
+ fs_reg(0xffff));
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_TYPED_SURFACE_WRITE,
+ ibld.sample_mask_reg());
+ break;
+
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ lower_surface_logical_send(ibld, inst,
+ SHADER_OPCODE_TYPED_ATOMIC,
+ ibld.sample_mask_reg());
+ break;
+
+ default:
+ continue;
+ }
+
progress = true;
}
@@ -3239,6 +4144,265 @@ fs_visitor::lower_integer_multiplication()
return progress;
}
+/**
+ * Get the closest native SIMD width supported by the hardware for instruction
+ * \p inst. The instruction will be left untouched by
+ * fs_visitor::lower_simd_width() if the returned value is equal to the
+ * original execution size.
+ */
+static unsigned
+get_lowered_simd_width(const struct brw_device_info *devinfo,
+ const fs_inst *inst)
+{
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ case BRW_OPCODE_SEL:
+ case BRW_OPCODE_NOT:
+ case BRW_OPCODE_AND:
+ case BRW_OPCODE_OR:
+ case BRW_OPCODE_XOR:
+ case BRW_OPCODE_SHR:
+ case BRW_OPCODE_SHL:
+ case BRW_OPCODE_ASR:
+ case BRW_OPCODE_CMP:
+ case BRW_OPCODE_CMPN:
+ case BRW_OPCODE_CSEL:
+ case BRW_OPCODE_F32TO16:
+ case BRW_OPCODE_F16TO32:
+ case BRW_OPCODE_BFREV:
+ case BRW_OPCODE_BFE:
+ case BRW_OPCODE_BFI1:
+ case BRW_OPCODE_BFI2:
+ case BRW_OPCODE_ADD:
+ case BRW_OPCODE_MUL:
+ case BRW_OPCODE_AVG:
+ case BRW_OPCODE_FRC:
+ case BRW_OPCODE_RNDU:
+ case BRW_OPCODE_RNDD:
+ case BRW_OPCODE_RNDE:
+ case BRW_OPCODE_RNDZ:
+ case BRW_OPCODE_LZD:
+ case BRW_OPCODE_FBH:
+ case BRW_OPCODE_FBL:
+ case BRW_OPCODE_CBIT:
+ case BRW_OPCODE_SAD2:
+ case BRW_OPCODE_MAD:
+ case BRW_OPCODE_LRP:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS: {
+ /* According to the PRMs:
+ * "A. In Direct Addressing mode, a source cannot span more than 2
+ * adjacent GRF registers.
+ * B. A destination cannot span more than 2 adjacent GRF registers."
+ *
+ * Look for the source or destination with the largest register region
+ * which is the one that is going to limit the overal execution size of
+ * the instruction due to this rule.
+ */
+ unsigned reg_count = inst->regs_written;
+
+ for (unsigned i = 0; i < inst->sources; i++)
+ reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i));
+
+ /* Calculate the maximum execution size of the instruction based on the
+ * factor by which it goes over the hardware limit of 2 GRFs.
+ */
+ return inst->exec_size / DIV_ROUND_UP(reg_count, 2);
+ }
+ case SHADER_OPCODE_MULH:
+ /* MULH is lowered to the MUL/MACH sequence using the accumulator, which
+ * is 8-wide on Gen7+.
+ */
+ return (devinfo->gen >= 7 ? 8 : inst->exec_size);
+
+ case FS_OPCODE_FB_WRITE_LOGICAL:
+ /* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
+ * here.
+ */
+ assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE ||
+ inst->exec_size == 8);
+ /* Dual-source FB writes are unsupported in SIMD16 mode. */
+ return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size);
+
+ case SHADER_OPCODE_TXD_LOGICAL:
+ /* TXD is unsupported in SIMD16 mode. */
+ return 8;
+
+ case SHADER_OPCODE_TG4_OFFSET_LOGICAL: {
+ /* gather4_po_c is unsupported in SIMD16 mode. */
+ const fs_reg &shadow_c = inst->src[1];
+ return (shadow_c.file != BAD_FILE ? 8 : inst->exec_size);
+ }
+ case SHADER_OPCODE_TXL_LOGICAL:
+ case FS_OPCODE_TXB_LOGICAL: {
+ /* Gen4 doesn't have SIMD8 non-shadow-compare bias/LOD instructions, and
+ * Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16
+ * mode because the message exceeds the maximum length of 11.
+ */
+ const fs_reg &shadow_c = inst->src[1];
+ if (devinfo->gen == 4 && shadow_c.file == BAD_FILE)
+ return 16;
+ else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE)
+ return 8;
+ else
+ return inst->exec_size;
+ }
+ case SHADER_OPCODE_TXF_LOGICAL:
+ case SHADER_OPCODE_TXS_LOGICAL:
+ /* Gen4 doesn't have SIMD8 variants for the RESINFO and LD-with-LOD
+ * messages. Use SIMD16 instead.
+ */
+ if (devinfo->gen == 4)
+ return 16;
+ else
+ return inst->exec_size;
+
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ return 8;
+
+ default:
+ return inst->exec_size;
+ }
+}
+
+/**
+ * The \p rows array of registers represents a \p num_rows by \p num_columns
+ * matrix in row-major order, write it in column-major order into the register
+ * passed as destination. \p stride gives the separation between matrix
+ * elements in the input in fs_builder::dispatch_width() units.
+ */
+static void
+emit_transpose(const fs_builder &bld,
+ const fs_reg &dst, const fs_reg *rows,
+ unsigned num_rows, unsigned num_columns, unsigned stride)
+{
+ fs_reg *const components = new fs_reg[num_rows * num_columns];
+
+ for (unsigned i = 0; i < num_columns; ++i) {
+ for (unsigned j = 0; j < num_rows; ++j)
+ components[num_rows * i + j] = offset(rows[j], bld, stride * i);
+ }
+
+ bld.LOAD_PAYLOAD(dst, components, num_rows * num_columns, 0);
+
+ delete[] components;
+}
+
+bool
+fs_visitor::lower_simd_width()
+{
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
+
+ if (lower_width != inst->exec_size) {
+ /* Builder matching the original instruction. We may also need to
+ * emit an instruction of width larger than the original, set the
+ * execution size of the builder to the highest of both for now so
+ * we're sure that both cases can be handled.
+ */
+ const fs_builder ibld = bld.at(block, inst)
+ .exec_all(inst->force_writemask_all)
+ .group(MAX2(inst->exec_size, lower_width),
+ inst->force_sechalf);
+
+ /* Split the copies in chunks of the execution width of either the
+ * original or the lowered instruction, whichever is lower.
+ */
+ const unsigned copy_width = MIN2(lower_width, inst->exec_size);
+ const unsigned n = inst->exec_size / copy_width;
+ const unsigned dst_size = inst->regs_written * REG_SIZE /
+ inst->dst.component_size(inst->exec_size);
+ fs_reg dsts[4];
+
+ assert(n > 0 && n <= ARRAY_SIZE(dsts) &&
+ !inst->writes_accumulator && !inst->mlen);
+
+ for (unsigned i = 0; i < n; i++) {
+ /* Emit a copy of the original instruction with the lowered width.
+ * If the EOT flag was set throw it away except for the last
+ * instruction to avoid killing the thread prematurely.
+ */
+ fs_inst split_inst = *inst;
+ split_inst.exec_size = lower_width;
+ split_inst.eot = inst->eot && i == n - 1;
+
+ /* Select the correct channel enables for the i-th group, then
+ * transform the sources and destination and emit the lowered
+ * instruction.
+ */
+ const fs_builder lbld = ibld.group(lower_width, i);
+
+ for (unsigned j = 0; j < inst->sources; j++) {
+ if (inst->src[j].file != BAD_FILE &&
+ !is_uniform(inst->src[j])) {
+ /* Get the i-th copy_width-wide chunk of the source. */
+ const fs_reg src = horiz_offset(inst->src[j], copy_width * i);
+ const unsigned src_size = inst->components_read(j);
+
+ /* Use a trivial transposition to copy one every n
+ * copy_width-wide components of the register into a
+ * temporary passed as source to the lowered instruction.
+ */
+ split_inst.src[j] = lbld.vgrf(inst->src[j].type, src_size);
+ emit_transpose(lbld.group(copy_width, 0),
+ split_inst.src[j], &src, 1, src_size, n);
+ }
+ }
+
+ if (inst->regs_written) {
+ /* Allocate enough space to hold the result of the lowered
+ * instruction and fix up the number of registers written.
+ */
+ split_inst.dst = dsts[i] =
+ lbld.vgrf(inst->dst.type, dst_size);
+ split_inst.regs_written =
+ DIV_ROUND_UP(inst->regs_written * lower_width,
+ inst->exec_size);
+ }
+
+ lbld.emit(split_inst);
+ }
+
+ if (inst->regs_written) {
+ /* Distance between useful channels in the temporaries, skipping
+ * garbage if the lowered instruction is wider than the original.
+ */
+ const unsigned m = lower_width / copy_width;
+
+ /* Interleave the components of the result from the lowered
+ * instructions. We need to set exec_all() when copying more than
+ * one half per component, because LOAD_PAYLOAD (in terms of which
+ * emit_transpose is implemented) can only use the same channel
+ * enable signals for all of its non-header sources.
+ */
+ emit_transpose(ibld.exec_all(inst->exec_size > copy_width)
+ .group(copy_width, 0),
+ inst->dst, dsts, n, dst_size, m);
+ }
+
+ inst->remove(block);
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
void
fs_visitor::dump_instructions()
{
@@ -3316,9 +4480,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
switch (inst->dst.file) {
case GRF:
fprintf(file, "vgrf%d", inst->dst.reg);
- if (inst->dst.width != dispatch_width)
- fprintf(file, "@%d", inst->dst.width);
- if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 ||
+ if (alloc.sizes[inst->dst.reg] != inst->regs_written ||
inst->dst.subreg_offset)
fprintf(file, "+%d.%d",
inst->dst.reg_offset, inst->dst.subreg_offset);
@@ -3376,9 +4538,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
switch (inst->src[i].file) {
case GRF:
fprintf(file, "vgrf%d", inst->src[i].reg);
- if (inst->src[i].width != dispatch_width)
- fprintf(file, "@%d", inst->src[i].width);
- if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
+ if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) ||
inst->src[i].subreg_offset)
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
@@ -3655,9 +4815,11 @@ fs_visitor::optimize()
* Ideally optimization passes wouldn't be part of the visitor so they
* wouldn't have access to bld at all, but they do, so just in case some
* pass forgets to ask for a location explicitly set it to NULL here to
- * make it trip.
+ * make it trip. The dispatch width is initialized to a bogus value to
+ * make sure that optimizations set the execution controls explicitly to
+ * match the code they are manipulating instead of relying on the defaults.
*/
- bld = bld.at(NULL, NULL);
+ bld = fs_builder(this, 64);
split_virtual_grfs();
@@ -3690,9 +4852,13 @@ fs_visitor::optimize()
backend_shader::dump_instructions(filename);
}
- bool progress;
+ bool progress = false;
int iteration = 0;
int pass_num = 0;
+
+ OPT(lower_simd_width);
+ OPT(lower_logical_sends);
+
do {
progress = false;
pass_num = 0;
@@ -3837,7 +5003,9 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
if (failed)
return false;
- emit_urb_writes(clip_planes);
+ compute_clip_distance(clip_planes);
+
+ emit_urb_writes();
if (shader_time_index >= 0)
emit_shader_time_end();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 243baf688de..975183e990d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -62,6 +62,27 @@ namespace brw {
class fs_live_variables;
}
+static inline fs_reg
+offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
+{
+ switch (reg.file) {
+ case BAD_FILE:
+ break;
+ case GRF:
+ case MRF:
+ case HW_REG:
+ case ATTR:
+ return byte_offset(reg,
+ delta * reg.component_size(bld.dispatch_width()));
+ case UNIFORM:
+ reg.reg_offset += delta;
+ break;
+ case IMM:
+ assert(delta == 0);
+ }
+ return reg;
+}
+
/**
* The fragment shader front-end.
*
@@ -161,7 +182,9 @@ public:
void no16(const char *msg);
void lower_uniform_pull_constant_loads();
bool lower_load_payload();
+ bool lower_logical_sends();
bool lower_integer_multiplication();
+ bool lower_simd_width();
bool opt_combine_constants();
void emit_dummy_fs();
@@ -185,27 +208,6 @@ public:
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
bool is_rect, uint32_t sampler, int texunit);
- fs_inst *emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_comp,
- fs_reg lod, fs_reg lod2, int grad_components,
- uint32_t sampler);
- fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int vector_elements,
- fs_reg shadow_c, fs_reg lod,
- uint32_t sampler);
- fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_comp,
- fs_reg lod, fs_reg lod2, int grad_components,
- fs_reg sample_index, uint32_t sampler,
- bool has_offset);
- fs_inst *emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_comp,
- fs_reg lod, fs_reg lod2, int grad_components,
- fs_reg sample_index, fs_reg mcs, fs_reg sampler,
- fs_reg offset_value);
void emit_texture(ir_texture_opcode op,
const glsl_type *dest_type,
fs_reg coordinate, int components,
@@ -220,9 +222,10 @@ public:
uint32_t sampler,
fs_reg sampler_reg,
int texunit);
- fs_reg emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler);
+ fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
+ const fs_reg &sampler);
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
- void resolve_source_modifiers(fs_reg *src);
+ fs_reg resolve_source_modifiers(const fs_reg &src);
void emit_discard_jump();
bool try_replace_with_sel();
bool opt_peephole_sel();
@@ -249,6 +252,10 @@ public:
void nir_emit_block(nir_block *block);
void nir_emit_instr(nir_instr *instr);
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
+ void nir_emit_load_const(const brw::fs_builder &bld,
+ nir_load_const_instr *instr);
+ void nir_emit_undef(const brw::fs_builder &bld,
+ nir_ssa_undef_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_texture(const brw::fs_builder &bld,
@@ -257,21 +264,19 @@ public:
nir_jump_instr *instr);
fs_reg get_nir_src(nir_src src);
fs_reg get_nir_dest(nir_dest dest);
+ fs_reg get_nir_image_deref(const nir_deref_var *deref);
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result);
- void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
- unsigned exec_size, bool use_2nd_half);
void emit_alpha_test();
fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
fs_reg color1, fs_reg color2,
- fs_reg src0_alpha, unsigned components,
- unsigned exec_size, bool use_2nd_half = false);
+ fs_reg src0_alpha, unsigned components);
void emit_fb_writes();
- void emit_urb_writes(gl_clip_plane *clip_planes);
+ void emit_urb_writes();
void emit_cs_terminate();
void emit_barrier();
@@ -282,16 +287,13 @@ public:
int shader_time_subindex,
fs_reg value);
- void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- fs_reg dst, fs_reg offset, fs_reg src0,
- fs_reg src1);
-
- void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
- fs_reg offset);
-
fs_reg get_timestamp(const brw::fs_builder &bld);
struct brw_reg interp_reg(int location, int channel);
+
+ virtual void setup_vector_uniform_values(const gl_constant_value *values,
+ unsigned n);
+
int implied_mrf_writes(fs_inst *inst);
virtual void dump_instructions();
@@ -345,7 +347,7 @@ public:
unsigned max_grf;
fs_reg *nir_locals;
- fs_reg *nir_globals;
+ fs_reg *nir_ssa_values;
fs_reg nir_inputs;
fs_reg nir_outputs;
fs_reg *nir_system_values;
@@ -359,7 +361,7 @@ public:
fs_reg result;
/** Register numbers for thread payload fields. */
- struct {
+ struct thread_payload {
uint8_t source_depth_reg;
uint8_t source_w_reg;
uint8_t aa_dest_stencil_reg;
@@ -468,10 +470,6 @@ private:
struct brw_reg msg_data,
unsigned msg_type);
- void generate_set_omask(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg sample_mask);
-
void generate_set_sample_id(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 58ac5980da5..34545eaa0fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -64,6 +64,22 @@ namespace brw {
}
/**
+ * Construct an fs_builder that inserts instructions into \p shader
+ * before instruction \p inst in basic block \p block. The default
+ * execution controls and debug annotation are initialized from the
+ * instruction passed as argument.
+ */
+ fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
+ shader(shader), block(block), cursor(inst),
+ _dispatch_width(inst->exec_size),
+ _group(inst->force_sechalf ? 8 : 0),
+ force_writemask_all(inst->force_writemask_all)
+ {
+ annotation.str = inst->annotation;
+ annotation.ir = inst->ir;
+ }
+
+ /**
* Construct an fs_builder that inserts instructions before \p cursor in
* basic block \p block, inheriting other code generation parameters
* from this.
@@ -99,8 +115,8 @@ namespace brw {
fs_builder
group(unsigned n, unsigned i) const
{
- assert(n <= dispatch_width() &&
- i < dispatch_width() / n);
+ assert(force_writemask_all ||
+ (n <= dispatch_width() && i < dispatch_width() / n));
fs_builder bld = *this;
bld._dispatch_width = n;
bld._group += i * n;
@@ -160,10 +176,15 @@ namespace brw {
dst_reg
vgrf(enum brw_reg_type type, unsigned n = 1) const
{
- return dst_reg(GRF, shader->alloc.allocate(
- DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
- REG_SIZE)),
- type, dispatch_width());
+ assert(dispatch_width() <= 32);
+
+ if (n > 0)
+ return dst_reg(GRF, shader->alloc.allocate(
+ DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
+ REG_SIZE)),
+ type);
+ else
+ return retype(null_reg_ud(), type);
}
/**
@@ -235,7 +256,7 @@ namespace brw {
instruction *
emit(enum opcode opcode, const dst_reg &dst) const
{
- return emit(instruction(opcode, dst));
+ return emit(instruction(opcode, dispatch_width(), dst));
}
/**
@@ -253,11 +274,11 @@ namespace brw {
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
return fix_math_instruction(
- emit(instruction(opcode, dst.width, dst,
+ emit(instruction(opcode, dispatch_width(), dst,
fix_math_operand(src0))));
default:
- return emit(instruction(opcode, dst.width, dst, src0));
+ return emit(instruction(opcode, dispatch_width(), dst, src0));
}
}
@@ -273,12 +294,12 @@ namespace brw {
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
return fix_math_instruction(
- emit(instruction(opcode, dst.width, dst,
+ emit(instruction(opcode, dispatch_width(), dst,
fix_math_operand(src0),
fix_math_operand(src1))));
default:
- return emit(instruction(opcode, dst.width, dst, src0, src1));
+ return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
}
}
@@ -295,22 +316,35 @@ namespace brw {
case BRW_OPCODE_BFI2:
case BRW_OPCODE_MAD:
case BRW_OPCODE_LRP:
- return emit(instruction(opcode, dst.width, dst,
+ return emit(instruction(opcode, dispatch_width(), dst,
fix_3src_operand(src0),
fix_3src_operand(src1),
fix_3src_operand(src2)));
default:
- return emit(instruction(opcode, dst.width, dst, src0, src1, src2));
+ return emit(instruction(opcode, dispatch_width(), dst,
+ src0, src1, src2));
}
}
/**
+ * Create and insert an instruction with a variable number of sources
+ * into the program.
+ */
+ instruction *
+ emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
+ unsigned n) const
+ {
+ return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
+ }
+
+ /**
* Insert a preallocated instruction into the program.
*/
instruction *
emit(instruction *inst) const
{
+ assert(inst->exec_size <= 32);
assert(inst->exec_size == dispatch_width() ||
force_writemask_all);
assert(_group == 0 || _group == 8);
@@ -349,17 +383,19 @@ namespace brw {
}
/**
- * Copy any live channel from \p src to the first channel of \p dst.
+ * Copy any live channel from \p src to the first channel of the result.
*/
- void
- emit_uniformize(const dst_reg &dst, const src_reg &src) const
+ src_reg
+ emit_uniformize(const src_reg &src) const
{
const fs_builder ubld = exec_all();
- const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+ const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
+ const dst_reg dst = component(vgrf(src.type), 0);
+
+ ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
- ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0));
- ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
- src, component(chan_index, 0));
+ return src_reg(dst);
}
/**
@@ -515,20 +551,10 @@ namespace brw {
LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
unsigned sources, unsigned header_size) const
{
- assert(dst.width % 8 == 0);
- instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD,
- dst.width, dst, src, sources));
+ instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
inst->header_size = header_size;
-
- for (unsigned i = 0; i < header_size; i++)
- assert(src[i].file != GRF ||
- src[i].width * type_sz(src[i].type) == 32);
- inst->regs_written = header_size;
-
- for (unsigned i = header_size; i < sources; ++i)
- assert(src[i].file != GRF ||
- src[i].width == dst.width);
- inst->regs_written += (sources - header_size) * (dst.width / 8);
+ inst->regs_written = header_size +
+ (sources - header_size) * (dispatch_width() / 8);
return inst;
}
@@ -626,8 +652,8 @@ namespace brw {
inst->resize_sources(1);
inst->src[0] = src0;
- at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type,
- dispatch_width()), src1);
+ at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type),
+ src1);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index d0f61222e5a..a8883a35ef2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -243,6 +243,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_find_msb:
case ir_unop_find_lsb:
case ir_unop_saturate:
+ case ir_unop_subroutine_to_int:
for (i = 0; i < vector_elements; i++) {
ir_rvalue *op0 = get_element(op_var[0], i);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 0af5a915c9f..c182232285e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -277,7 +277,7 @@ fs_visitor::opt_combine_constants()
*/
exec_node *n = (imm->inst ? imm->inst :
imm->block->last_non_control_flow_inst()->next);
- const fs_builder ibld = bld.at(imm->block, n).exec_all();
+ const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
ibld.MOV(reg, fs_reg(imm->val));
imm->reg = reg.reg;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index c92aae4b1d6..5445ad55670 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -339,6 +339,14 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.stride * inst->src[arg].stride > 4)
return false;
+ /* Bail if the instruction type is larger than the execution type of the
+ * copy, what implies that each channel is reading multiple channels of the
+ * destination of the copy, and simply replacing the sources would give a
+ * program with different semantics.
+ */
+ if (type_sz(entry->dst.type) < type_sz(inst->src[arg].type))
+ return false;
+
/* Bail if the result of composing both strides cannot be expressed
* as another stride. This avoids, for example, trying to transform
* this:
@@ -388,17 +396,14 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
switch (entry->src.file) {
case UNIFORM:
- assert(entry->src.width == 1);
case BAD_FILE:
case HW_REG:
- inst->src[arg].width = entry->src.width;
inst->src[arg].reg_offset = entry->src.reg_offset;
inst->src[arg].subreg_offset = entry->src.subreg_offset;
break;
case ATTR:
case GRF:
{
- assert(entry->src.width % inst->src[arg].width == 0);
/* In this case, we'll just leave the width alone. The source
* register could have different widths depending on how it is
* being used. For instance, if only half of the register was
@@ -529,6 +534,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
case BRW_OPCODE_MACH:
case BRW_OPCODE_MUL:
+ case SHADER_OPCODE_MULH:
case BRW_OPCODE_ADD:
case BRW_OPCODE_OR:
case BRW_OPCODE_AND:
@@ -715,7 +721,6 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->dst.reg_offset = offset;
- entry->dst.width = effective_width;
entry->src = inst->src[i];
entry->regs_written = regs_written;
entry->opcode = inst->opcode;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 70f0217b93d..c7628dcc2f4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -61,6 +61,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case BRW_OPCODE_CMPN:
case BRW_OPCODE_ADD:
case BRW_OPCODE_MUL:
+ case SHADER_OPCODE_MULH:
case BRW_OPCODE_FRC:
case BRW_OPCODE_RNDU:
case BRW_OPCODE_RNDD:
@@ -179,9 +180,7 @@ static void
create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
{
int written = inst->regs_written;
- int dst_width = inst->dst.width / 8;
- const fs_builder ubld = bld.group(inst->exec_size, inst->force_sechalf)
- .exec_all(inst->force_writemask_all);
+ int dst_width = inst->exec_size / 8;
fs_inst *copy;
if (written > dst_width) {
@@ -200,16 +199,15 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
for (int i = 0; i < header_size; i++) {
payload[i] = src;
- payload[i].width = 8;
src.reg_offset++;
}
for (int i = header_size; i < sources; i++) {
payload[i] = src;
- src = offset(src, 1);
+ src = offset(src, bld, 1);
}
- copy = ubld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
+ copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
- copy = ubld.MOV(inst->dst, src);
+ copy = bld.MOV(inst->dst, src);
copy->src[0].negate = negate;
}
assert(copy->regs_written == written);
@@ -259,15 +257,14 @@ fs_visitor::opt_cse_local(bblock_t *block)
*/
bool no_existing_temp = entry->tmp.file == BAD_FILE;
if (no_existing_temp && !entry->generator->dst.is_null()) {
+ const fs_builder ibld = fs_builder(this, block, entry->generator)
+ .at(block, entry->generator->next);
int written = entry->generator->regs_written;
- assert((written * 8) % entry->generator->dst.width == 0);
entry->tmp = fs_reg(GRF, alloc.allocate(written),
- entry->generator->dst.type,
- entry->generator->dst.width);
+ entry->generator->dst.type);
- create_copy_instr(bld.at(block, entry->generator->next),
- entry->generator, entry->tmp, false);
+ create_copy_instr(ibld, entry->generator, entry->tmp, false);
entry->generator->dst = entry->tmp;
}
@@ -275,10 +272,10 @@ fs_visitor::opt_cse_local(bblock_t *block)
/* dest <- temp */
if (!inst->dst.is_null()) {
assert(inst->regs_written == entry->generator->regs_written);
- assert(inst->dst.width == entry->generator->dst.width);
assert(inst->dst.type == entry->tmp.type);
+ const fs_builder ibld(this, block, inst);
- create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate);
+ create_copy_instr(ibld, inst, entry->tmp, negate);
}
/* Set our iterator so that next time through the loop inst->next
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2ed0bac6fd9..c86ca043b63 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -48,7 +48,7 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
}
static struct brw_reg
-brw_reg_from_fs_reg(fs_reg *reg)
+brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
{
struct brw_reg brw_reg;
@@ -57,10 +57,10 @@ brw_reg_from_fs_reg(fs_reg *reg)
case MRF:
if (reg->stride == 0) {
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
- } else if (reg->width < 8) {
+ } else if (inst->exec_size < 8) {
brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
- brw_reg = stride(brw_reg, reg->width * reg->stride,
- reg->width, reg->stride);
+ brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
+ inst->exec_size, reg->stride);
} else {
/* From the Haswell PRM:
*
@@ -79,6 +79,10 @@ brw_reg_from_fs_reg(fs_reg *reg)
brw_reg = byte_offset(brw_reg, reg->subreg_offset);
break;
case IMM:
+ assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V ||
+ reg->type == BRW_REGISTER_TYPE_UV ||
+ reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0));
+
switch (reg->type) {
case BRW_REGISTER_TYPE_F:
brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f);
@@ -217,11 +221,11 @@ fs_generator::fire_fb_write(fs_inst *inst,
if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
else if (prog_data->dual_src_blend) {
- if (dispatch_width == 8 || !inst->eot)
+ if (!inst->force_sechalf)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
- } else if (dispatch_width == 16)
+ } else if (inst->exec_size == 16)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
@@ -414,7 +418,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
brw_fb_WRITE(p,
16 /* dispatch_width */,
brw_message_reg(inst->base_mrf),
- brw_reg_from_fs_reg(&inst->src[0]),
+ brw_reg_from_fs_reg(inst, &inst->src[0]),
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
inst->target,
inst->mlen,
@@ -651,7 +655,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
/* Note that G45 and older determines shadow compare and dispatch width
* from message length for most messages.
*/
- if (dispatch_width == 8) {
+ if (inst->exec_size == 8) {
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
if (inst->shadow_compare) {
assert(inst->mlen == 6);
@@ -670,7 +674,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
break;
case FS_OPCODE_TXB:
if (inst->shadow_compare) {
- assert(dispatch_width == 8);
+ assert(inst->exec_size == 8);
assert(inst->mlen == 6);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
} else {
@@ -681,7 +685,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
break;
case SHADER_OPCODE_TXL:
if (inst->shadow_compare) {
- assert(dispatch_width == 8);
+ assert(inst->exec_size == 8);
assert(inst->mlen == 6);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
} else {
@@ -692,7 +696,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
break;
case SHADER_OPCODE_TXD:
/* There is no sample_d_c message; comparisons are done manually */
- assert(dispatch_width == 8);
+ assert(inst->exec_size == 8);
assert(inst->mlen == 7 || inst->mlen == 10);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
break;
@@ -1054,7 +1058,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
struct brw_reg index,
struct brw_reg offset)
{
- assert(inst->mlen == 0);
assert(index.type == BRW_REGISTER_TYPE_UD);
assert(offset.file == BRW_GENERAL_REGISTER_FILE);
@@ -1069,12 +1072,10 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
struct brw_reg src = offset;
bool header_present = false;
- int mlen = 1;
if (devinfo->gen >= 9) {
/* Skylake requires a message header in order to use SIMD4x2 mode. */
- src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
- mlen = 2;
+ src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD);
header_present = true;
brw_push_insn_state(p);
@@ -1105,7 +1106,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- mlen,
+ inst->mlen,
header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
@@ -1135,7 +1136,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- mlen,
+ inst->mlen,
header_present,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
@@ -1363,37 +1364,6 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
brw_pop_insn_state(p);
}
-/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0
- * (when mask is passed as a uniform) of register mask before moving it
- * to register dst.
- */
-void
-fs_generator::generate_set_omask(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg mask)
-{
- bool stride_8_8_1 =
- (mask.vstride == BRW_VERTICAL_STRIDE_8 &&
- mask.width == BRW_WIDTH_8 &&
- mask.hstride == BRW_HORIZONTAL_STRIDE_1);
-
- bool stride_0_1_0 = has_scalar_region(mask);
-
- assert(stride_8_8_1 || stride_0_1_0);
- assert(dst.type == BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
- if (stride_8_8_1) {
- brw_MOV(p, dst, retype(stride(mask, 16, 8, 2), dst.type));
- } else if (stride_0_1_0) {
- brw_MOV(p, dst, retype(mask, dst.type));
- }
- brw_pop_insn_state(p);
-}
-
/* Sets vstride=1, width=4, hstride=0 of register src1 during
* the ADD instruction.
*/
@@ -1563,7 +1533,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < inst->sources; i++) {
- src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+ src[i] = brw_reg_from_fs_reg(inst, &inst->src[i]);
/* The accumulator result appears to get used for the
* conditional modifier generation. When negating a UD
@@ -1575,7 +1545,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
inst->src[i].type != BRW_REGISTER_TYPE_UD ||
!inst->src[i].negate);
}
- dst = brw_reg_from_fs_reg(&inst->dst);
+ dst = brw_reg_from_fs_reg(inst, &inst->dst);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1604,7 +1574,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
/* If the instruction writes to more than one register, it needs to
* be a "compressed" instruction on Gen <= 5.
*/
- if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32)
+ if (inst->dst.component_size(inst->exec_size) > REG_SIZE)
brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
else
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1872,7 +1842,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case BRW_OPCODE_DO:
- brw_DO(p, BRW_EXECUTE_8);
+ brw_DO(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
break;
case BRW_OPCODE_BREAK:
@@ -2019,19 +1989,15 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
- assert(src[1].file == BRW_IMMEDIATE_VALUE &&
- src[2].file == BRW_IMMEDIATE_VALUE);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud,
inst->mlen, !inst->dst.is_null());
- brw_mark_surface_used(prog_data, src[1].dw1.ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- assert(src[1].file == BRW_IMMEDIATE_VALUE &&
- src[2].file == BRW_IMMEDIATE_VALUE);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1],
inst->mlen, src[2].dw1.ud);
- brw_mark_surface_used(prog_data, src[1].dw1.ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -2073,10 +2039,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
brw_broadcast(p, dst, src[0], src[1]);
break;
- case FS_OPCODE_SET_OMASK:
- generate_set_omask(inst, dst, src[0]);
- break;
-
case FS_OPCODE_SET_SAMPLE_ID:
generate_set_sample_id(inst, dst, src[0], src[1]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 502161d5128..19aec92fad1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -204,27 +204,9 @@ fs_live_variables::compute_live_variables()
while (cont) {
cont = false;
- foreach_block (block, cfg) {
+ foreach_block_reverse (block, cfg) {
struct block_data *bd = &block_data[block->num];
- /* Update livein */
- for (int i = 0; i < bitset_words; i++) {
- BITSET_WORD new_livein = (bd->use[i] |
- (bd->liveout[i] &
- ~bd->def[i]));
- if (new_livein & ~bd->livein[i]) {
- bd->livein[i] |= new_livein;
- cont = true;
- }
- }
- BITSET_WORD new_livein = (bd->flag_use[0] |
- (bd->flag_liveout[0] &
- ~bd->flag_def[0]));
- if (new_livein & ~bd->flag_livein[0]) {
- bd->flag_livein[0] |= new_livein;
- cont = true;
- }
-
/* Update liveout */
foreach_list_typed(bblock_link, child_link, link, &block->children) {
struct block_data *child_bd = &block_data[child_link->block->num];
@@ -244,6 +226,24 @@ fs_live_variables::compute_live_variables()
cont = true;
}
}
+
+ /* Update livein */
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_livein = (bd->use[i] |
+ (bd->liveout[i] &
+ ~bd->def[i]));
+ if (new_livein & ~bd->livein[i]) {
+ bd->livein[i] |= new_livein;
+ cont = true;
+ }
+ }
+ BITSET_WORD new_livein = (bd->flag_use[0] |
+ (bd->flag_liveout[0] &
+ ~bd->flag_def[0]));
+ if (new_livein & ~bd->flag_livein[0]) {
+ bd->flag_livein[0] |= new_livein;
+ cont = true;
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 4d98b048433..93a36cc03bf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -24,8 +24,10 @@
#include "glsl/ir.h"
#include "glsl/ir_optimization.h"
#include "glsl/nir/glsl_to_nir.h"
+#include "main/shaderimage.h"
#include "program/prog_to_nir.h"
#include "brw_fs.h"
+#include "brw_fs_surface_builder.h"
#include "brw_nir.h"
using namespace brw;
@@ -38,31 +40,11 @@ fs_visitor::emit_nir_code()
/* emit the arrays used for inputs and outputs - load/store intrinsics will
* be converted to reads/writes of these arrays
*/
-
- if (nir->num_inputs > 0) {
- nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
- nir_setup_inputs(nir);
- }
-
- if (nir->num_outputs > 0) {
- nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
- nir_setup_outputs(nir);
- }
-
- if (nir->num_uniforms > 0) {
- nir_setup_uniforms(nir);
- }
-
+ nir_setup_inputs(nir);
+ nir_setup_outputs(nir);
+ nir_setup_uniforms(nir);
nir_emit_system_values(nir);
- nir_globals = ralloc_array(mem_ctx, fs_reg, nir->reg_alloc);
- foreach_list_typed(nir_register, reg, node, &nir->registers) {
- unsigned array_elems =
- reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
- unsigned size = array_elems * reg->num_components;
- nir_globals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
- }
-
/* get the main function and emit it */
nir_foreach_overload(nir, overload) {
assert(strcmp(overload->function->name, "main") == 0);
@@ -74,9 +56,11 @@ fs_visitor::emit_nir_code()
void
fs_visitor::nir_setup_inputs(nir_shader *shader)
{
+ nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
+
foreach_list_typed(nir_variable, var, node, &shader->inputs) {
enum brw_reg_type type = brw_type_for_base_type(var->type);
- fs_reg input = offset(nir_inputs, var->data.driver_location);
+ fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
fs_reg reg;
switch (stage) {
@@ -91,25 +75,35 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
* So, we need to copy from fs_reg(ATTR, var->location) to
* offset(nir_inputs, var->data.driver_location).
*/
- unsigned components = var->type->without_array()->components();
+ const glsl_type *const t = var->type->without_array();
+ const unsigned components = t->components();
+ const unsigned cols = t->matrix_columns;
+ const unsigned elts = t->vector_elements;
unsigned array_length = var->type->is_array() ? var->type->length : 1;
for (unsigned i = 0; i < array_length; i++) {
- for (unsigned j = 0; j < components; j++) {
- bld.MOV(retype(offset(input, components * i + j), type),
- offset(fs_reg(ATTR, var->data.location + i, type), j));
+ for (unsigned j = 0; j < cols; j++) {
+ for (unsigned k = 0; k < elts; k++) {
+ bld.MOV(offset(retype(input, type), bld,
+ components * i + elts * j + k),
+ offset(fs_reg(ATTR, var->data.location + i, type),
+ bld, 4 * j + k));
+ }
}
}
break;
}
case MESA_SHADER_GEOMETRY:
case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
unreachable("fs_visitor not used for these stages yet.");
break;
case MESA_SHADER_FRAGMENT:
if (var->data.location == VARYING_SLOT_POS) {
reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
var->data.origin_upper_left);
- emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, input, reg), 0xF);
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+ input, reg), 0xF);
} else {
emit_general_interpolation(input, var->name, var->type,
(glsl_interp_qualifier) var->data.interpolation,
@@ -126,45 +120,54 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
{
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+ nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs);
+
foreach_list_typed(nir_variable, var, node, &shader->outputs) {
- fs_reg reg = offset(nir_outputs, var->data.driver_location);
+ fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
int vector_elements =
var->type->is_array() ? var->type->fields.array->vector_elements
: var->type->vector_elements;
- if (stage == MESA_SHADER_VERTEX) {
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) {
int output = var->data.location + i;
- this->outputs[output] = offset(reg, 4 * i);
+ this->outputs[output] = offset(reg, bld, 4 * i);
this->output_components[output] = vector_elements;
}
- } else if (var->data.index > 0) {
- assert(var->data.location == FRAG_RESULT_DATA0);
- assert(var->data.index == 1);
- this->dual_src_output = reg;
- this->do_dual_src = true;
- } else if (var->data.location == FRAG_RESULT_COLOR) {
- /* Writing gl_FragColor outputs to all color regions. */
- for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
- this->outputs[i] = reg;
- this->output_components[i] = 4;
- }
- } else if (var->data.location == FRAG_RESULT_DEPTH) {
- this->frag_depth = reg;
- } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
- this->sample_mask = reg;
- } else {
- /* gl_FragData or a user-defined FS output */
- assert(var->data.location >= FRAG_RESULT_DATA0 &&
- var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
-
- /* General color output. */
- for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
- int output = var->data.location - FRAG_RESULT_DATA0 + i;
- this->outputs[output] = offset(reg, vector_elements * i);
- this->output_components[output] = vector_elements;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.index > 0) {
+ assert(var->data.location == FRAG_RESULT_DATA0);
+ assert(var->data.index == 1);
+ this->dual_src_output = reg;
+ this->do_dual_src = true;
+ } else if (var->data.location == FRAG_RESULT_COLOR) {
+ /* Writing gl_FragColor outputs to all color regions. */
+ for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
+ this->outputs[i] = reg;
+ this->output_components[i] = 4;
+ }
+ } else if (var->data.location == FRAG_RESULT_DEPTH) {
+ this->frag_depth = reg;
+ } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
+ this->sample_mask = reg;
+ } else {
+ /* gl_FragData or a user-defined FS output */
+ assert(var->data.location >= FRAG_RESULT_DATA0 &&
+ var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
+
+ /* General color output. */
+ for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
+ int output = var->data.location - FRAG_RESULT_DATA0 + i;
+ this->outputs[output] = offset(reg, bld, vector_elements * i);
+ this->output_components[output] = vector_elements;
+ }
}
+ break;
+ default:
+ unreachable("unhandled shader stage");
}
}
}
@@ -172,18 +175,20 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
void
fs_visitor::nir_setup_uniforms(nir_shader *shader)
{
- uniforms = shader->num_uniforms;
num_direct_uniforms = shader->num_direct_uniforms;
+ if (dispatch_width != 8)
+ return;
+
/* We split the uniform register file in half. The first half is
* entirely direct uniforms. The second half is indirect.
*/
- param_size[0] = num_direct_uniforms;
+ if (num_direct_uniforms > 0)
+ param_size[0] = num_direct_uniforms;
if (shader->num_uniforms > num_direct_uniforms)
param_size[num_direct_uniforms] = shader->num_uniforms - num_direct_uniforms;
- if (dispatch_width != 8)
- return;
+ uniforms = shader->num_uniforms;
if (shader_prog) {
foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
@@ -233,17 +238,26 @@ fs_visitor::nir_setup_uniform(nir_variable *var)
continue;
}
- unsigned slots = storage->type->component_slots();
- if (storage->array_elements)
- slots *= storage->array_elements;
+ if (storage->type->is_image()) {
+ /* Images don't get a valid location assigned by nir_lower_io()
+ * because their size is driver-specific, so we need to allocate
+ * space for them here at the end of the parameter array.
+ */
+ var->data.driver_location = uniforms;
+ param_size[uniforms] =
+ BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1);
- for (unsigned i = 0; i < slots; i++) {
- stage_prog_data->param[index++] = &storage->storage[i];
+ setup_image_uniform_values(storage);
+ } else {
+ unsigned slots = storage->type->component_slots();
+ if (storage->array_elements)
+ slots *= storage->array_elements;
+
+ for (unsigned i = 0; i < slots; i++) {
+ stage_prog_data->param[index++] = &storage->storage[i];
+ }
}
}
-
- /* Make sure we actually initialized the right amount of stuff here. */
- assert(var->data.driver_location + var->type->component_slots() == index);
}
void
@@ -366,6 +380,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
}
+ nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg,
+ impl->ssa_alloc);
+
nir_emit_cf_list(&impl->body);
}
@@ -413,18 +430,12 @@ fs_visitor::nir_emit_if(nir_if *if_stmt)
bld.emit(BRW_OPCODE_ENDIF);
- if (!try_replace_with_sel() && devinfo->gen < 6) {
- no16("Can't support (non-uniform) control flow on SIMD16\n");
- }
+ try_replace_with_sel();
}
void
fs_visitor::nir_emit_loop(nir_loop *loop)
{
- if (devinfo->gen < 6) {
- no16("Can't support (non-uniform) control flow on SIMD16\n");
- }
-
bld.emit(BRW_OPCODE_DO);
nir_emit_cf_list(&loop->body);
@@ -459,9 +470,11 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
break;
case nir_instr_type_load_const:
- /* We can hit these, but we do nothing now and use them as
- * immediates later.
- */
+ nir_emit_load_const(abld, nir_instr_as_load_const(instr));
+ break;
+
+ case nir_instr_type_ssa_undef:
+ nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
break;
case nir_instr_type_jump:
@@ -473,39 +486,16 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
}
}
-static brw_reg_type
-brw_type_for_nir_type(nir_alu_type type)
-{
- switch (type) {
- case nir_type_unsigned:
- return BRW_REGISTER_TYPE_UD;
- case nir_type_bool:
- case nir_type_int:
- return BRW_REGISTER_TYPE_D;
- case nir_type_float:
- return BRW_REGISTER_TYPE_F;
- default:
- unreachable("unknown type");
- }
-
- return BRW_REGISTER_TYPE_F;
-}
-
bool
fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result)
{
- if (instr->src[0].src.is_ssa ||
- !instr->src[0].src.reg.reg ||
- !instr->src[0].src.reg.reg->parent_instr)
- return false;
-
- if (instr->src[0].src.reg.reg->parent_instr->type !=
- nir_instr_type_intrinsic)
+ if (!instr->src[0].src.is_ssa ||
+ instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *src0 =
- nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr);
+ nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr);
if (src0->intrinsic != nir_intrinsic_load_front_face)
return false;
@@ -618,11 +608,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
continue;
if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
- inst = bld.MOV(offset(temp, i),
- offset(op[0], instr->src[0].swizzle[i]));
+ inst = bld.MOV(offset(temp, bld, i),
+ offset(op[0], bld, instr->src[0].swizzle[i]));
} else {
- inst = bld.MOV(offset(temp, i),
- offset(op[i], instr->src[i].swizzle[0]));
+ inst = bld.MOV(offset(temp, bld, i),
+ offset(op[i], bld, instr->src[i].swizzle[0]));
}
inst->saturate = instr->dest.saturate;
}
@@ -636,7 +626,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
if (!(instr->dest.write_mask & (1 << i)))
continue;
- bld.MOV(offset(result, i), offset(temp, i));
+ bld.MOV(offset(result, bld, i), offset(temp, bld, i));
}
}
return;
@@ -657,12 +647,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
assert(_mesa_bitcount(instr->dest.write_mask) == 1);
channel = ffs(instr->dest.write_mask) - 1;
- result = offset(result, channel);
+ result = offset(result, bld, channel);
}
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
assert(nir_op_infos[instr->op].input_sizes[i] < 2);
- op[i] = offset(op[i], instr->src[i].swizzle[channel]);
+ op[i] = offset(op[i], bld, instr->src[i].swizzle[channel]);
}
switch (instr->op) {
@@ -788,67 +778,20 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_imul_high:
- case nir_op_umul_high: {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
-
- fs_inst *mul = bld.MUL(acc, op[0], op[1]);
- bld.MACH(result, op[0], op[1]);
-
- /* Until Gen8, integer multiplies read 32-bits from one source, and
- * 16-bits from the other, and relying on the MACH instruction to
- * generate the high bits of the result.
- *
- * On Gen8, the multiply instruction does a full 32x32-bit multiply,
- * but in order to do a 64x64-bit multiply we have to simulate the
- * previous behavior and then use a MACH instruction.
- *
- * FINISHME: Don't use source modifiers on src1.
- */
- if (devinfo->gen >= 8) {
- assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
- mul->src[1].type == BRW_REGISTER_TYPE_UD);
- if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
- mul->src[1].type = BRW_REGISTER_TYPE_W;
- mul->src[1].stride = 2;
- } else {
- mul->src[1].type = BRW_REGISTER_TYPE_UW;
- mul->src[1].stride = 2;
- }
- }
+ case nir_op_umul_high:
+ bld.emit(SHADER_OPCODE_MULH, result, op[0], op[1]);
break;
- }
case nir_op_idiv:
case nir_op_udiv:
bld.emit(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
break;
- case nir_op_uadd_carry: {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
+ case nir_op_uadd_carry:
+ unreachable("Should have been lowered by carry_to_arith().");
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- BRW_REGISTER_TYPE_UD);
-
- bld.ADDC(bld.null_reg_ud(), op[0], op[1]);
- bld.MOV(result, fs_reg(acc));
- break;
- }
-
- case nir_op_usub_borrow: {
- if (devinfo->gen >= 7)
- no16("SIMD16 explicit accumulator operands unsupported\n");
-
- struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
- BRW_REGISTER_TYPE_UD);
-
- bld.SUBB(bld.null_reg_ud(), op[0], op[1]);
- bld.MOV(result, fs_reg(acc));
- break;
- }
+ case nir_op_usub_borrow:
+ unreachable("Should have been lowered by borrow_to_arith().");
case nir_op_umod:
bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
@@ -878,28 +821,28 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
case nir_op_inot:
if (devinfo->gen >= 8) {
- resolve_source_modifiers(&op[0]);
+ op[0] = resolve_source_modifiers(op[0]);
}
bld.NOT(result, op[0]);
break;
case nir_op_ixor:
if (devinfo->gen >= 8) {
- resolve_source_modifiers(&op[0]);
- resolve_source_modifiers(&op[1]);
+ op[0] = resolve_source_modifiers(op[0]);
+ op[1] = resolve_source_modifiers(op[1]);
}
bld.XOR(result, op[0], op[1]);
break;
case nir_op_ior:
if (devinfo->gen >= 8) {
- resolve_source_modifiers(&op[0]);
- resolve_source_modifiers(&op[1]);
+ op[0] = resolve_source_modifiers(op[0]);
+ op[1] = resolve_source_modifiers(op[1]);
}
bld.OR(result, op[0], op[1]);
break;
case nir_op_iand:
if (devinfo->gen >= 8) {
- resolve_source_modifiers(&op[0]);
- resolve_source_modifiers(&op[1]);
+ op[0] = resolve_source_modifiers(op[0]);
+ op[1] = resolve_source_modifiers(op[1]);
}
bld.AND(result, op[0], op[1]);
break;
@@ -959,10 +902,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_b2i:
- bld.AND(result, op[0], fs_reg(1));
- break;
case nir_op_b2f:
- bld.AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u));
+ bld.MOV(result, negate(op[0]));
break;
case nir_op_f2b:
@@ -1146,17 +1087,36 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
}
}
+void
+fs_visitor::nir_emit_load_const(const fs_builder &bld,
+ nir_load_const_instr *instr)
+{
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
+
+ for (unsigned i = 0; i < instr->def.num_components; i++)
+ bld.MOV(offset(reg, bld, i), fs_reg(instr->value.i[i]));
+
+ nir_ssa_values[instr->def.index] = reg;
+}
+
+void
+fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
+{
+ nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D,
+ instr->def.num_components);
+}
+
static fs_reg
fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
unsigned base_offset, nir_src *indirect)
{
fs_reg reg;
- if (nir_reg->is_global)
- reg = v->nir_globals[nir_reg->index];
- else
- reg = v->nir_locals[nir_reg->index];
- reg = offset(reg, base_offset * nir_reg->num_components);
+ assert(!nir_reg->is_global);
+
+ reg = v->nir_locals[nir_reg->index];
+
+ reg = offset(reg, v->bld, base_offset * nir_reg->num_components);
if (indirect) {
int multiplier = nir_reg->num_components * (v->dispatch_width / 8);
@@ -1171,34 +1131,77 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
fs_reg
fs_visitor::get_nir_src(nir_src src)
{
+ fs_reg reg;
if (src.is_ssa) {
- assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
- nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
- fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
-
- for (unsigned i = 0; i < src.ssa->num_components; ++i)
- bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
-
- return reg;
+ reg = nir_ssa_values[src.ssa->index];
} else {
- fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
- src.reg.indirect);
-
- /* to avoid floating-point denorm flushing problems, set the type by
- * default to D - instructions that need floating point semantics will set
- * this to F if they need to
- */
- return retype(reg, BRW_REGISTER_TYPE_D);
+ reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
+ src.reg.indirect);
}
+
+ /* to avoid floating-point denorm flushing problems, set the type by
+ * default to D - instructions that need floating point semantics will set
+ * this to F if they need to
+ */
+ return retype(reg, BRW_REGISTER_TYPE_D);
}
fs_reg
fs_visitor::get_nir_dest(nir_dest dest)
{
+ if (dest.is_ssa) {
+ nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F,
+ dest.ssa.num_components);
+ return nir_ssa_values[dest.ssa.index];
+ }
+
return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
dest.reg.indirect);
}
+fs_reg
+fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
+{
+ fs_reg image(UNIFORM, deref->var->data.driver_location,
+ BRW_REGISTER_TYPE_UD);
+
+ if (deref->deref.child) {
+ const nir_deref_array *deref_array =
+ nir_deref_as_array(deref->deref.child);
+ assert(deref->deref.child->deref_type == nir_deref_type_array &&
+ deref_array->deref.child == NULL);
+ const unsigned size = glsl_get_length(deref->var->type);
+ const unsigned base = MIN2(deref_array->base_offset, size - 1);
+
+ image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE);
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+ fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
+
+ if (devinfo->gen == 7 && !devinfo->is_haswell) {
+ /* IVB hangs when trying to access an invalid surface index with
+ * the dataport. According to the spec "if the index used to
+ * select an individual element is negative or greater than or
+ * equal to the size of the array, the results of the operation
+ * are undefined but may not lead to termination" -- which is one
+ * of the possible outcomes of the hang. Clamp the index to
+ * prevent access outside of the array bounds.
+ */
+ bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect),
+ BRW_REGISTER_TYPE_UD),
+ fs_reg(size - base - 1), BRW_CONDITIONAL_L);
+ } else {
+ bld.MOV(*tmp, get_nir_src(deref_array->indirect));
+ }
+
+ bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE));
+ image.reladdr = tmp;
+ }
+ }
+
+ return image;
+}
+
void
fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
unsigned wr_mask)
@@ -1208,15 +1211,64 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
continue;
fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
- new_inst->dst = offset(new_inst->dst, i);
+ new_inst->dst = offset(new_inst->dst, bld, i);
for (unsigned j = 0; j < new_inst->sources; j++)
if (new_inst->src[j].file == GRF)
- new_inst->src[j] = offset(new_inst->src[j], i);
+ new_inst->src[j] = offset(new_inst->src[j], bld, i);
bld.emit(new_inst);
}
}
+/**
+ * Get the matching channel register datatype for an image intrinsic of the
+ * specified GLSL image type.
+ */
+static brw_reg_type
+get_image_base_type(const glsl_type *type)
+{
+ switch ((glsl_base_type)type->sampler_type) {
+ case GLSL_TYPE_UINT:
+ return BRW_REGISTER_TYPE_UD;
+ case GLSL_TYPE_INT:
+ return BRW_REGISTER_TYPE_D;
+ case GLSL_TYPE_FLOAT:
+ return BRW_REGISTER_TYPE_F;
+ default:
+ unreachable("Not reached.");
+ }
+}
+
+/**
+ * Get the appropriate atomic op for an image atomic intrinsic.
+ */
+static unsigned
+get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
+{
+ switch (op) {
+ case nir_intrinsic_image_atomic_add:
+ return BRW_AOP_ADD;
+ case nir_intrinsic_image_atomic_min:
+ return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
+ BRW_AOP_IMIN : BRW_AOP_UMIN);
+ case nir_intrinsic_image_atomic_max:
+ return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
+ BRW_AOP_IMAX : BRW_AOP_UMAX);
+ case nir_intrinsic_image_atomic_and:
+ return BRW_AOP_AND;
+ case nir_intrinsic_image_atomic_or:
+ return BRW_AOP_OR;
+ case nir_intrinsic_image_atomic_xor:
+ return BRW_AOP_XOR;
+ case nir_intrinsic_image_atomic_exchange:
+ return BRW_AOP_MOV;
+ case nir_intrinsic_image_atomic_comp_swap:
+ return BRW_AOP_CMPWR;
+ default:
+ unreachable("Not reachable.");
+ }
+}
+
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -1255,25 +1307,102 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_dec:
case nir_intrinsic_atomic_counter_read: {
- unsigned surf_index = prog_data->binding_table.abo_start +
- (unsigned) instr->const_index[0];
- fs_reg offset = fs_reg(get_nir_src(instr->src[0]));
+ using namespace surface_access;
+ /* Get the arguments of the atomic intrinsic. */
+ const fs_reg offset = get_nir_src(instr->src[0]);
+ const unsigned surface = (stage_prog_data->binding_table.abo_start +
+ instr->const_index[0]);
+ fs_reg tmp;
+
+ /* Emit a surface read or atomic op. */
switch (instr->intrinsic) {
- case nir_intrinsic_atomic_counter_inc:
- emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
- fs_reg(), fs_reg());
- break;
- case nir_intrinsic_atomic_counter_dec:
- emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
- fs_reg(), fs_reg());
- break;
- case nir_intrinsic_atomic_counter_read:
- emit_untyped_surface_read(surf_index, dest, offset);
- break;
- default:
- unreachable("Unreachable");
+ case nir_intrinsic_atomic_counter_read:
+ tmp = emit_untyped_read(bld, fs_reg(surface), offset, 1, 1);
+ break;
+
+ case nir_intrinsic_atomic_counter_inc:
+ tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+ fs_reg(), 1, 1, BRW_AOP_INC);
+ break;
+
+ case nir_intrinsic_atomic_counter_dec:
+ tmp = emit_untyped_atomic(bld, fs_reg(surface), offset, fs_reg(),
+ fs_reg(), 1, 1, BRW_AOP_PREDEC);
+ break;
+
+ default:
+ unreachable("Unreachable");
}
+
+ /* Assign the result. */
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), tmp);
+
+ /* Mark the surface as used. */
+ brw_mark_surface_used(stage_prog_data, surface);
+ break;
+ }
+
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap: {
+ using namespace image_access;
+
+ /* Get the referenced image variable and type. */
+ const nir_variable *var = instr->variables[0]->var;
+ const glsl_type *type = var->type->without_array();
+ const brw_reg_type base_type = get_image_base_type(type);
+
+ /* Get some metadata from the image intrinsic. */
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ const unsigned arr_dims = type->sampler_array ? 1 : 0;
+ const unsigned surf_dims = type->coordinate_components() - arr_dims;
+ const mesa_format format =
+ (var->data.image.write_only ? MESA_FORMAT_NONE :
+ _mesa_get_shader_image_format(var->data.image.format));
+
+ /* Get the arguments of the image intrinsic. */
+ const fs_reg image = get_nir_image_deref(instr->variables[0]);
+ const fs_reg addr = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+ const fs_reg src0 = (info->num_srcs >= 3 ?
+ retype(get_nir_src(instr->src[2]), base_type) :
+ fs_reg());
+ const fs_reg src1 = (info->num_srcs >= 4 ?
+ retype(get_nir_src(instr->src[3]), base_type) :
+ fs_reg());
+ fs_reg tmp;
+
+ /* Emit an image load, store or atomic op. */
+ if (instr->intrinsic == nir_intrinsic_image_load)
+ tmp = emit_image_load(bld, image, addr, surf_dims, arr_dims, format);
+
+ else if (instr->intrinsic == nir_intrinsic_image_store)
+ emit_image_store(bld, image, addr, src0, surf_dims, arr_dims, format);
+
+ else
+ tmp = emit_image_atomic(bld, image, addr, src0, src1,
+ surf_dims, arr_dims, info->dest_components,
+ get_image_atomic_op(instr->intrinsic, type));
+
+ /* Assign the result. */
+ for (unsigned c = 0; c < info->dest_components; ++c)
+ bld.MOV(offset(retype(dest, base_type), bld, c),
+ offset(tmp, bld, c));
+ break;
+ }
+
+ case nir_intrinsic_memory_barrier: {
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 16 / dispatch_width);
+ bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+ ->regs_written = 2;
break;
}
@@ -1322,7 +1451,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
assert(sample_pos.file != BAD_FILE);
dest.type = sample_pos.type;
bld.MOV(dest, sample_pos);
- bld.MOV(offset(dest, 1), offset(sample_pos, 1));
+ bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
break;
}
@@ -1349,13 +1478,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(uniform_reg, dest.type), index);
+ fs_reg src = offset(retype(uniform_reg, dest.type), bld, index);
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
index++;
bld.MOV(dest, src);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
break;
}
@@ -1387,7 +1516,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
fs_reg(stage_prog_data->binding_table.ubo_start));
- bld.emit_uniformize(surf_index, surf_index);
+ surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -1406,7 +1535,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
unsigned vec4_offset = instr->const_index[1] / 4;
for (int i = 0; i < instr->num_components; i++)
- VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index,
+ VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
base_offset, vec4_offset + i);
} else {
fs_reg packed_consts = vgrf(glsl_type::float_type);
@@ -1425,7 +1554,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
assert(packed_consts.subreg_offset < 32);
bld.MOV(dest, packed_consts);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
}
break;
@@ -1437,14 +1566,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_load_input: {
unsigned index = 0;
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = offset(retype(nir_inputs, dest.type),
+ fs_reg src = offset(retype(nir_inputs, dest.type), bld,
instr->const_index[0] + index);
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
index++;
bld.MOV(dest, src);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
break;
}
@@ -1470,11 +1599,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_interp_var_at_centroid:
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset: {
- /* in SIMD16 mode, the pixel interpolator returns coords interleaved
- * 8 channels at a time, same as the barycentric coords presented in
- * the FS payload. this requires a bit of extra work to support.
- */
- no16("interpolate_at_* not yet supported in SIMD16 mode.");
+ assert(stage == MESA_SHADER_FRAGMENT);
+
+ ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
@@ -1517,7 +1644,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
fs_reg temp = vgrf(glsl_type::float_type);
- bld.MUL(temp, offset(offset_src, i), fs_reg(16.0f));
+ bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
fs_reg itemp = vgrf(glsl_type::int_type);
bld.MOV(itemp, temp); /* float to int */
@@ -1537,10 +1664,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
set_condmod(BRW_CONDITIONAL_L,
- bld.SEL(offset(src, i), itemp, fs_reg(7)));
+ bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
- mlen = 2;
+ mlen = 2 * dispatch_width / 8;
inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
fs_reg(0u));
}
@@ -1552,7 +1679,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
inst->mlen = mlen;
- inst->regs_written = 2; /* 2 floats per slot returned */
+ /* 2 floats per slot returned */
+ inst->regs_written = 2 * dispatch_width / 8;
inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
INTERP_QUALIFIER_NOPERSPECTIVE;
@@ -1561,7 +1689,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
src.type = dest.type;
bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
- dest = offset(dest, 1);
+ dest = offset(dest, bld, 1);
}
break;
}
@@ -1573,13 +1701,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fs_reg src = get_nir_src(instr->src[0]);
unsigned index = 0;
for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg new_dest = offset(retype(nir_outputs, src.type),
+ fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
instr->const_index[0] + index);
if (has_indirect)
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
index++;
bld.MOV(new_dest, src);
- src = offset(src, 1);
+ src = offset(src, bld, 1);
}
break;
}
@@ -1689,7 +1817,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
/* Emit code to evaluate the actual indexing expression */
sampler_reg = vgrf(glsl_type::uint_type);
bld.ADD(sampler_reg, src, fs_reg(sampler));
- bld.emit_uniformize(sampler_reg, sampler_reg);
+ sampler_reg = bld.emit_uniformize(sampler_reg);
break;
}
@@ -1715,20 +1843,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
}
}
- enum glsl_base_type dest_base_type;
- switch (instr->dest_type) {
- case nir_type_float:
- dest_base_type = GLSL_TYPE_FLOAT;
- break;
- case nir_type_int:
- dest_base_type = GLSL_TYPE_INT;
- break;
- case nir_type_unsigned:
- dest_base_type = GLSL_TYPE_UINT;
- break;
- default:
- unreachable("bad type");
- }
+ enum glsl_base_type dest_base_type =
+ brw_glsl_base_type_for_nir_type (instr->dest_type);
const glsl_type *dest_type =
glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
@@ -1758,7 +1874,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;
unsigned num_components = nir_tex_instr_dest_size(instr);
- emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, dest, this->result),
+ emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+ dest, this->result),
(1 << num_components) - 1);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
index d92d4bbd81d..b75f40ba5a1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
@@ -24,6 +24,8 @@
#include "brw_fs.h"
#include "brw_cfg.h"
+using namespace brw;
+
/** @file brw_fs_peephole_predicated_break.cpp
*
* Loops are often structured as
@@ -85,9 +87,9 @@ fs_visitor::opt_peephole_predicated_break()
* instruction to set the flag register.
*/
if (devinfo->gen == 6 && if_inst->conditional_mod) {
- bld.at(if_block, if_inst)
- .CMP(bld.null_reg_d(), if_inst->src[0], if_inst->src[1],
- if_inst->conditional_mod);
+ const fs_builder ibld(this, if_block, if_inst);
+ ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
+ if_inst->conditional_mod);
jump_inst->predicate = BRW_PREDICATE_NORMAL;
} else {
jump_inst->predicate = if_inst->predicate;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 364fc4a5ad2..b70895ec2ff 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -73,11 +73,20 @@ fs_visitor::assign_regs_trivial()
}
static void
-brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
+brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
{
const struct brw_device_info *devinfo = compiler->devinfo;
int base_reg_count = BRW_MAX_GRF;
- int index = reg_width - 1;
+ int index = (dispatch_width / 8) - 1;
+
+ if (dispatch_width > 8 && devinfo->gen >= 7) {
+ /* For IVB+, we don't need the PLN hacks or the even-reg alignment in
+ * SIMD16. Therefore, we can use the exact same register sets for
+ * SIMD16 as we do for SIMD8 and we don't need to recalculate them.
+ */
+ compiler->fs_reg_sets[index] = compiler->fs_reg_sets[0];
+ return;
+ }
/* The registers used to make up almost all values handled in the compiler
* are a scalar value occupying a single register (or 2 registers in the
@@ -121,7 +130,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
for (int i = 0; i < class_count; i++) {
- if (devinfo->gen <= 5 && reg_width == 2) {
+ if (devinfo->gen <= 5 && dispatch_width == 16) {
/* From the G45 PRM:
*
* In order to reduce the hardware complexity, the following
@@ -168,7 +177,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
int pairs_reg_count = 0;
for (int i = 0; i < class_count; i++) {
int class_reg_count;
- if (devinfo->gen <= 5 && reg_width == 2) {
+ if (devinfo->gen <= 5 && dispatch_width == 16) {
class_reg_count = (base_reg_count - (class_sizes[i] - 1)) / 2;
/* See comment below. The only difference here is that we are
@@ -214,7 +223,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
pairs_reg_count = class_reg_count;
}
- if (devinfo->gen <= 5 && reg_width == 2) {
+ if (devinfo->gen <= 5 && dispatch_width == 16) {
for (int j = 0; j < class_reg_count; j++) {
ra_class_add_reg(regs, classes[i], reg);
@@ -249,7 +258,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
/* Add a special class for aligned pairs, which we'll put delta_xy
* in on Gen <= 6 so that we can do PLN.
*/
- if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) {
+ if (devinfo->has_pln && dispatch_width == 8 && devinfo->gen <= 6) {
aligned_pairs_class = ra_alloc_reg_class(regs);
for (int i = 0; i < pairs_reg_count; i++) {
@@ -287,8 +296,8 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width)
void
brw_fs_alloc_reg_sets(struct brw_compiler *compiler)
{
- brw_alloc_reg_set(compiler, 1);
- brw_alloc_reg_set(compiler, 2);
+ brw_alloc_reg_set(compiler, 8);
+ brw_alloc_reg_set(compiler, 16);
}
static int
@@ -341,7 +350,9 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
int loop_end_ip = 0;
int payload_last_use_ip[payload_node_count];
- memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip));
+ for (int i = 0; i < payload_node_count; i++)
+ payload_last_use_ip[i] = -1;
+
int ip = 0;
foreach_block_and_inst(block, fs_inst, inst, cfg) {
switch (inst->opcode) {
@@ -380,32 +391,15 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
if (node_nr >= payload_node_count)
continue;
- payload_last_use_ip[node_nr] = use_ip;
+ for (int j = 0; j < inst->regs_read(i); j++) {
+ payload_last_use_ip[node_nr + j] = use_ip;
+ assert(node_nr + j < payload_node_count);
+ }
}
}
/* Special case instructions which have extra implied registers used. */
switch (inst->opcode) {
- case FS_OPCODE_LINTERP:
- /* On gen6+ in SIMD16, there are 4 adjacent registers used by
- * PLN's sourcing of the deltas, while we list only the first one
- * in the arguments. Pre-gen6, the deltas are computed in normal
- * VGRFs.
- */
- if (devinfo->gen >= 6) {
- int delta_x_arg = 0;
- if (inst->src[delta_x_arg].file == HW_REG &&
- inst->src[delta_x_arg].fixed_hw_reg.file ==
- BRW_GENERAL_REGISTER_FILE) {
- for (int i = 1; i < 4; ++i) {
- int node = inst->src[delta_x_arg].fixed_hw_reg.nr + i;
- assert(node < payload_node_count);
- payload_last_use_ip[node] = use_ip;
- }
- }
- }
- break;
-
case CS_OPCODE_CS_TERMINATE:
payload_last_use_ip[0] = use_ip;
break;
@@ -428,6 +422,9 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
}
for (int i = 0; i < payload_node_count; i++) {
+ if (payload_last_use_ip[i] == -1)
+ continue;
+
/* Mark the payload node as interfering with any virtual grf that is
* live between the start of the program and our last use of the payload
* node.
@@ -706,10 +703,8 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst,
uint32_t spill_offset, int count)
{
int reg_size = 1;
- if (dispatch_width == 16 && count % 2 == 0) {
+ if (dispatch_width == 16 && count % 2 == 0)
reg_size = 2;
- dst.width = 16;
- }
const fs_builder ibld = bld.annotate(inst->annotation, inst->ir)
.group(reg_size * 8, 0)
@@ -752,7 +747,7 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
for (int i = 0; i < count / reg_size; i++) {
fs_inst *spill_inst =
- ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, bld.null_reg_f(), src);
+ ibld.emit(SHADER_OPCODE_GEN4_SCRATCH_WRITE, ibld.null_reg_f(), src);
src.reg_offset += reg_size;
spill_inst->offset = spill_offset + i * reg_size * REG_SIZE;
spill_inst->mlen = 1 + reg_size; /* header, value */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 2ad7079bdf8..72e873857ce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -167,7 +167,6 @@ fs_visitor::register_coalesce()
src_size = alloc.sizes[inst->src[0].reg];
assert(src_size <= MAX_VGRF_SIZE);
- assert(inst->src[0].width % 8 == 0);
channels_remaining = src_size;
memset(mov, 0, sizeof(mov));
@@ -196,7 +195,7 @@ fs_visitor::register_coalesce()
continue;
}
reg_to_offset[offset] = inst->dst.reg_offset;
- if (inst->src[0].width == 16)
+ if (inst->regs_written > 1)
reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
mov[offset] = inst;
channels_remaining -= inst->regs_written;
@@ -229,7 +228,6 @@ fs_visitor::register_coalesce()
continue;
progress = true;
- bool was_load_payload = inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD;
for (int i = 0; i < src_size; i++) {
if (mov[i]) {
@@ -243,22 +241,19 @@ fs_visitor::register_coalesce()
}
foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
- for (int i = 0; i < src_size; i++) {
- if (mov[i] || was_load_payload) {
- if (scan_inst->dst.file == GRF &&
- scan_inst->dst.reg == reg_from &&
- scan_inst->dst.reg_offset == i) {
- scan_inst->dst.reg = reg_to;
- scan_inst->dst.reg_offset = reg_to_offset[i];
- }
- for (int j = 0; j < scan_inst->sources; j++) {
- if (scan_inst->src[j].file == GRF &&
- scan_inst->src[j].reg == reg_from &&
- scan_inst->src[j].reg_offset == i) {
- scan_inst->src[j].reg = reg_to;
- scan_inst->src[j].reg_offset = reg_to_offset[i];
- }
- }
+ if (scan_inst->dst.file == GRF &&
+ scan_inst->dst.reg == reg_from) {
+ scan_inst->dst.reg = reg_to;
+ scan_inst->dst.reg_offset =
+ reg_to_offset[scan_inst->dst.reg_offset];
+ }
+
+ for (int j = 0; j < scan_inst->sources; j++) {
+ if (scan_inst->src[j].file == GRF &&
+ scan_inst->src[j].reg == reg_from) {
+ scan_inst->src[j].reg = reg_to;
+ scan_inst->src[j].reg_offset =
+ reg_to_offset[scan_inst->src[j].reg_offset];
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
index 8660ec08b8f..d190d8eb6b4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
@@ -174,6 +174,9 @@ fs_visitor::opt_peephole_sel()
/* Check that the MOVs are the right form. */
if (!then_mov[i]->dst.equals(else_mov[i]->dst) ||
+ then_mov[i]->exec_size != else_mov[i]->exec_size ||
+ then_mov[i]->force_sechalf != else_mov[i]->force_sechalf ||
+ then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all ||
then_mov[i]->is_partial_write() ||
else_mov[i]->is_partial_write() ||
then_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE ||
@@ -192,14 +195,17 @@ fs_visitor::opt_peephole_sel()
if (movs == 0)
continue;
- const fs_builder ibld = bld.at(block, if_inst);
-
/* Emit a CMP if our IF used the embedded comparison */
- if (devinfo->gen == 6 && if_inst->conditional_mod)
+ if (devinfo->gen == 6 && if_inst->conditional_mod) {
+ const fs_builder ibld(this, block, if_inst);
ibld.CMP(ibld.null_reg_d(), if_inst->src[0], if_inst->src[1],
if_inst->conditional_mod);
+ }
for (int i = 0; i < movs; i++) {
+ const fs_builder ibld = fs_builder(this, then_block, then_mov[i])
+ .at(block, if_inst);
+
if (then_mov[i]->src[0].equals(else_mov[i]->src[0])) {
ibld.MOV(then_mov[i]->dst, then_mov[i]->src[0]);
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
new file mode 100644
index 00000000000..50e0acd05f5
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -0,0 +1,1096 @@
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_fs_surface_builder.h"
+#include "brw_fs.h"
+
+using namespace brw;
+
+namespace brw {
+ namespace surface_access {
+ namespace {
+ /**
+ * Generate a logical send opcode for a surface message and return
+ * the result.
+ */
+ fs_reg
+ emit_send(const fs_builder &bld, enum opcode opcode,
+ const fs_reg &addr, const fs_reg &src, const fs_reg &surface,
+ unsigned dims, unsigned arg, unsigned rsize,
+ brw_predicate pred = BRW_PREDICATE_NONE)
+ {
+ /* Reduce the dynamically uniform surface index to a single
+ * scalar.
+ */
+ const fs_reg usurface = bld.emit_uniformize(surface);
+ const fs_reg srcs[] = {
+ addr, src, usurface, fs_reg(dims), fs_reg(arg)
+ };
+ const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
+ fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
+
+ inst->regs_written = rsize * bld.dispatch_width() / 8;
+ inst->predicate = pred;
+ return dst;
+ }
+ }
+
+ /**
+ * Emit an untyped surface read opcode. \p dims determines the number
+ * of components of the address and \p size the number of components of
+ * the returned value.
+ */
+ fs_reg
+ emit_untyped_read(const fs_builder &bld,
+ const fs_reg &surface, const fs_reg &addr,
+ unsigned dims, unsigned size,
+ brw_predicate pred)
+ {
+ return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
+ addr, fs_reg(), surface, dims, size, size, pred);
+ }
+
+ /**
+ * Emit an untyped surface write opcode. \p dims determines the number
+ * of components of the address and \p size the number of components of
+ * the argument.
+ */
+ void
+ emit_untyped_write(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr, const fs_reg &src,
+ unsigned dims, unsigned size,
+ brw_predicate pred)
+ {
+ emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
+ addr, src, surface, dims, size, 0, pred);
+ }
+
+ /**
+ * Emit an untyped surface atomic opcode. \p dims determines the number
+ * of components of the address and \p rsize the number of components of
+ * the returned value (either zero or one).
+ */
+ fs_reg
+ emit_untyped_atomic(const fs_builder &bld,
+ const fs_reg &surface, const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred)
+ {
+ /* FINISHME: Factor out this frequently recurring pattern into a
+ * helper function.
+ */
+ const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ const fs_reg srcs[] = { src0, src1 };
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
+ bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
+
+ return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
+ addr, tmp, surface, dims, op, rsize, pred);
+ }
+
+ /**
+ * Emit a typed surface read opcode. \p dims determines the number of
+ * components of the address and \p size the number of components of the
+ * returned value.
+ */
+ fs_reg
+ emit_typed_read(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr, unsigned dims, unsigned size)
+ {
+ return emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
+ addr, fs_reg(), surface, dims, size, size);
+ }
+
+ /**
+ * Emit a typed surface write opcode. \p dims determines the number of
+ * components of the address and \p size the number of components of the
+ * argument.
+ */
+ void
+ emit_typed_write(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr, const fs_reg &src,
+ unsigned dims, unsigned size)
+ {
+ emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
+ addr, src, surface, dims, size, 0);
+ }
+
+ /**
+ * Emit a typed surface atomic opcode. \p dims determines the number of
+ * components of the address and \p rsize the number of components of
+ * the returned value (either zero or one).
+ */
+ fs_reg
+ emit_typed_atomic(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred)
+ {
+ /* FINISHME: Factor out this frequently recurring pattern into a
+ * helper function.
+ */
+ const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+ const fs_reg srcs[] = { src0, src1 };
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
+ bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
+
+ return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
+ addr, tmp, surface, dims, op, rsize);
+ }
+ }
+}
+
+namespace {
+ namespace image_format_info {
+ /**
+ * Simple 4-tuple of scalars used to pass around per-color component
+ * values.
+ */
+ struct color_u {
+ color_u(unsigned x = 0) : r(x), g(x), b(x), a(x)
+ {
+ }
+
+ color_u(unsigned r, unsigned g, unsigned b, unsigned a) :
+ r(r), g(g), b(b), a(a)
+ {
+ }
+
+ unsigned
+ operator[](unsigned i) const
+ {
+ const unsigned xs[] = { r, g, b, a };
+ return xs[i];
+ }
+
+ unsigned r, g, b, a;
+ };
+
+ /**
+ * Return the per-channel bitfield widths for a given image format.
+ */
+ inline color_u
+ get_bit_widths(mesa_format format)
+ {
+ return color_u(_mesa_get_format_bits(format, GL_RED_BITS),
+ _mesa_get_format_bits(format, GL_GREEN_BITS),
+ _mesa_get_format_bits(format, GL_BLUE_BITS),
+ _mesa_get_format_bits(format, GL_ALPHA_BITS));
+ }
+
+ /**
+ * Return the per-channel bitfield shifts for a given image format.
+ */
+ inline color_u
+ get_bit_shifts(mesa_format format)
+ {
+ const color_u widths = get_bit_widths(format);
+ return color_u(0, widths.r, widths.r + widths.g,
+ widths.r + widths.g + widths.b);
+ }
+
+ /**
+ * Return true if all present components have the same bit width.
+ */
+ inline bool
+ is_homogeneous(mesa_format format)
+ {
+ const color_u widths = get_bit_widths(format);
+ return ((widths.g == 0 || widths.g == widths.r) &&
+ (widths.b == 0 || widths.b == widths.r) &&
+ (widths.a == 0 || widths.a == widths.r));
+ }
+
+ /**
+ * Return true if the format conversion boils down to a trivial copy.
+ */
+ inline bool
+ is_conversion_trivial(const brw_device_info *devinfo, mesa_format format)
+ {
+ return (get_bit_widths(format).r == 32 && is_homogeneous(format)) ||
+ format == brw_lower_mesa_image_format(devinfo, format);
+ }
+
+ /**
+ * Return true if the hardware natively supports some format with
+ * compatible bitfield layout, but possibly different data types.
+ */
+ inline bool
+ has_supported_bit_layout(const brw_device_info *devinfo,
+ mesa_format format)
+ {
+ const color_u widths = get_bit_widths(format);
+ const color_u lower_widths = get_bit_widths(
+ brw_lower_mesa_image_format(devinfo, format));
+
+ return (widths.r == lower_widths.r &&
+ widths.g == lower_widths.g &&
+ widths.b == lower_widths.b &&
+ widths.a == lower_widths.a);
+ }
+
+ /**
+ * Return true if we are required to spread individual components over
+ * several components of the format used by the hardware (RG32 and
+ * friends implemented as RGBA16UI).
+ */
+ inline bool
+ has_split_bit_layout(const brw_device_info *devinfo, mesa_format format)
+ {
+ const mesa_format lower_format =
+ brw_lower_mesa_image_format(devinfo, format);
+
+ return (_mesa_format_num_components(format) <
+ _mesa_format_num_components(lower_format));
+ }
+
+ /**
+ * Return true unless we have to fall back to untyped surface access.
+ * Fail!
+ */
+ inline bool
+ has_matching_typed_format(const brw_device_info *devinfo,
+ mesa_format format)
+ {
+ return (_mesa_get_format_bytes(format) <= 4 ||
+ (_mesa_get_format_bytes(format) <= 8 &&
+ (devinfo->gen >= 8 || devinfo->is_haswell)) ||
+ devinfo->gen >= 9);
+ }
+
+ /**
+ * Return true if the hardware returns garbage in the unused high bits
+ * of each component. This may happen on IVB because we rely on the
+ * undocumented behavior that typed reads from surfaces of the
+ * unsupported R8 and R16 formats return useful data in their least
+ * significant bits.
+ */
+ inline bool
+ has_undefined_high_bits(const brw_device_info *devinfo,
+ mesa_format format)
+ {
+ const mesa_format lower_format =
+ brw_lower_mesa_image_format(devinfo, format);
+
+ return (devinfo->gen == 7 && !devinfo->is_haswell &&
+ (lower_format == MESA_FORMAT_R_UINT16 ||
+ lower_format == MESA_FORMAT_R_UINT8));
+ }
+
+ /**
+ * Return true if the format represents values as signed integers
+ * requiring sign extension when unpacking.
+ */
+ inline bool
+ needs_sign_extension(mesa_format format)
+ {
+ return (_mesa_get_format_datatype(format) == GL_SIGNED_NORMALIZED ||
+ _mesa_get_format_datatype(format) == GL_INT);
+ }
+ }
+
+ namespace image_validity {
+ /**
+ * Check whether there is an image bound at the given index and write
+ * the comparison result to f0.0. Returns an appropriate predication
+ * mode to use on subsequent image operations.
+ */
+ brw_predicate
+ emit_surface_check(const fs_builder &bld, const fs_reg &image)
+ {
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
+
+ if (devinfo->gen == 7 && !devinfo->is_haswell) {
+ /* Check the first component of the size field to find out if the
+ * image is bound. Necessary on IVB for typed atomics because
+ * they don't seem to respect null surfaces and will happily
+ * corrupt or read random memory when no image is bound.
+ */
+ bld.CMP(bld.null_reg_ud(),
+ retype(size, BRW_REGISTER_TYPE_UD),
+ fs_reg(0), BRW_CONDITIONAL_NZ);
+
+ return BRW_PREDICATE_NORMAL;
+ } else {
+ /* More recent platforms implement compliant behavior when a null
+ * surface is bound.
+ */
+ return BRW_PREDICATE_NONE;
+ }
+ }
+
+ /**
+ * Check whether the provided coordinates are within the image bounds
+ * and write the comparison result to f0.0. Returns an appropriate
+ * predication mode to use on subsequent image operations.
+ */
+ brw_predicate
+ emit_bounds_check(const fs_builder &bld, const fs_reg &image,
+ const fs_reg &addr, unsigned dims)
+ {
+ const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
+
+ for (unsigned c = 0; c < dims; ++c)
+ set_predicate(c == 0 ? BRW_PREDICATE_NONE : BRW_PREDICATE_NORMAL,
+ bld.CMP(bld.null_reg_ud(),
+ offset(retype(addr, BRW_REGISTER_TYPE_UD), bld, c),
+ offset(size, bld, c),
+ BRW_CONDITIONAL_L));
+
+ return BRW_PREDICATE_NORMAL;
+ }
+ }
+
+ namespace image_coordinates {
+ /**
+ * Return the total number of coordinates needed to address a texel of
+ * the surface, which may be more than the sum of \p surf_dims and \p
+ * arr_dims if padding is required.
+ */
+ unsigned
+ num_image_coordinates(const fs_builder &bld,
+ unsigned surf_dims, unsigned arr_dims,
+ mesa_format format)
+ {
+ /* HSW in vec4 mode and our software coordinate handling for untyped
+ * reads want the array index to be at the Z component.
+ */
+ const bool array_index_at_z =
+ !image_format_info::has_matching_typed_format(
+ bld.shader->devinfo, format);
+ const unsigned zero_dims =
+ ((surf_dims == 1 && arr_dims == 1 && array_index_at_z) ? 1 : 0);
+
+ return surf_dims + zero_dims + arr_dims;
+ }
+
+ /**
+ * Transform image coordinates into the form expected by the
+ * implementation.
+ */
+ fs_reg
+ emit_image_coordinates(const fs_builder &bld, const fs_reg &addr,
+ unsigned surf_dims, unsigned arr_dims,
+ mesa_format format)
+ {
+ const unsigned dims =
+ num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+ if (dims > surf_dims + arr_dims) {
+ assert(surf_dims == 1 && arr_dims == 1 && dims == 3);
+ /* The array index is required to be passed in as the Z component,
+ * insert a zero at the Y component to shift it to the right
+ * position.
+ *
+ * FINISHME: Factor out this frequently recurring pattern into a
+ * helper function.
+ */
+ const fs_reg srcs[] = { addr, fs_reg(0), offset(addr, bld, 1) };
+ const fs_reg dst = bld.vgrf(addr.type, dims);
+ bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
+ return dst;
+ } else {
+ return addr;
+ }
+ }
+
+ /**
+ * Calculate the offset in memory of the texel given by \p coord.
+ *
+ * This is meant to be used with untyped surface messages to access a
+ * tiled surface, what involves taking into account the tiling and
+ * swizzling modes of the surface manually so it will hopefully not
+ * happen very often.
+ *
+ * The tiling algorithm implemented here matches either the X or Y
+ * tiling layouts supported by the hardware depending on the tiling
+ * coefficients passed to the program as uniforms. See Volume 1 Part 2
+ * Section 4.5 "Address Tiling Function" of the IVB PRM for an in-depth
+ * explanation of the hardware tiling format.
+ */
+ fs_reg
+ emit_address_calculation(const fs_builder &bld, const fs_reg &image,
+ const fs_reg &coord, unsigned dims)
+ {
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ const fs_reg off = offset(image, bld, BRW_IMAGE_PARAM_OFFSET_OFFSET);
+ const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+ const fs_reg tile = offset(image, bld, BRW_IMAGE_PARAM_TILING_OFFSET);
+ const fs_reg swz = offset(image, bld, BRW_IMAGE_PARAM_SWIZZLING_OFFSET);
+ const fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ const fs_reg minor = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ const fs_reg major = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ /* Shift the coordinates by the fixed surface offset. It may be
+ * non-zero if the image is a single slice of a higher-dimensional
+ * surface, or if a non-zero mipmap level of the surface is bound to
+ * the pipeline. The offset needs to be applied here rather than at
+ * surface state set-up time because the desired slice-level may
+ * start mid-tile, so simply shifting the surface base address
+ * wouldn't give a well-formed tiled surface in the general case.
+ */
+ for (unsigned c = 0; c < 2; ++c)
+ bld.ADD(offset(addr, bld, c), offset(off, bld, c),
+ (c < dims ?
+ offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
+ fs_reg(0)));
+
+ /* The layout of 3-D textures in memory is sort-of like a tiling
+ * format. At each miplevel, the slices are arranged in rows of
+ * 2^level slices per row. The slice row is stored in tmp.y and
+ * the slice within the row is stored in tmp.x.
+ *
+ * The layout of 2-D array textures and cubemaps is much simpler:
+ * Depending on whether the ARYSPC_LOD0 layout is in use it will be
+ * stored in memory as an array of slices, each one being a 2-D
+ * arrangement of miplevels, or as a 2D arrangement of miplevels,
+ * each one being an array of slices. In either case the separation
+ * between slices of the same LOD is equal to the qpitch value
+ * provided as stride.w.
+ *
+ * This code can be made to handle either 2D arrays and 3D textures
+ * by passing in the miplevel as tile.z for 3-D textures and 0 in
+ * tile.z for 2-D array textures.
+ *
+ * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
+ * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
+ * of the hardware 3D texture and 2D array layouts.
+ */
+ if (dims > 2) {
+ /* Decompose z into a major (tmp.y) and a minor (tmp.x)
+ * index.
+ */
+ bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), fs_reg(0),
+ offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
+ bld.SHR(offset(tmp, bld, 1),
+ offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
+ offset(tile, bld, 2));
+
+ /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
+ * slice offset.
+ */
+ for (unsigned c = 0; c < 2; ++c) {
+ bld.MUL(offset(tmp, bld, c),
+ offset(stride, bld, 2 + c), offset(tmp, bld, c));
+ bld.ADD(offset(addr, bld, c),
+ offset(addr, bld, c), offset(tmp, bld, c));
+ }
+ }
+
+ if (dims > 1) {
+ /* Calculate the major/minor x and y indices. In order to
+ * accommodate both X and Y tiling, the Y-major tiling format is
+ * treated as being a bunch of narrow X-tiles placed next to each
+ * other. This means that the tile width for Y-tiling is actually
+ * the width of one sub-column of the Y-major tile where each 4K
+ * tile has 8 512B sub-columns.
+ *
+ * The major Y value is the row of tiles in which the pixel lives.
+ * The major X value is the tile sub-column in which the pixel
+ * lives; for X tiling, this is the same as the tile column, for Y
+ * tiling, each tile has 8 sub-columns. The minor X and Y indices
+ * are the position within the sub-column.
+ */
+ for (unsigned c = 0; c < 2; ++c) {
+ /* Calculate the minor x and y indices. */
+ bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
+ fs_reg(0), offset(addr, bld, c));
+
+ /* Calculate the major x and y indices. */
+ bld.SHR(offset(major, bld, c),
+ offset(addr, bld, c), offset(tile, bld, c));
+ }
+
+ /* Calculate the texel index from the start of the tile row and
+ * the vertical coordinate of the row.
+ * Equivalent to:
+ * tmp.x = (major.x << tile.y << tile.x) +
+ * (minor.y << tile.x) + minor.x
+ * tmp.y = major.y << tile.y
+ */
+ bld.SHL(tmp, major, offset(tile, bld, 1));
+ bld.ADD(tmp, tmp, offset(minor, bld, 1));
+ bld.SHL(tmp, tmp, offset(tile, bld, 0));
+ bld.ADD(tmp, tmp, minor);
+ bld.SHL(offset(tmp, bld, 1),
+ offset(major, bld, 1), offset(tile, bld, 1));
+
+ /* Add it to the start of the tile row. */
+ bld.MUL(offset(tmp, bld, 1),
+ offset(tmp, bld, 1), offset(stride, bld, 1));
+ bld.ADD(tmp, tmp, offset(tmp, bld, 1));
+
+ /* Multiply by the Bpp value. */
+ bld.MUL(dst, tmp, stride);
+
+ if (devinfo->gen < 8 && !devinfo->is_baytrail) {
+ /* Take into account the two dynamically specified shifts.
+ * Both need are used to implement swizzling of X-tiled
+ * surfaces. For Y-tiled surfaces only one bit needs to be
+ * XOR-ed with bit 6 of the memory address, so a swz value of
+ * 0xff (actually interpreted as 31 by the hardware) will be
+ * provided to cause the relevant bit of tmp.y to be zero and
+ * turn the first XOR into the identity. For linear surfaces
+ * or platforms lacking address swizzling both shifts will be
+ * 0xff causing the relevant bits of both tmp.x and .y to be
+ * zero, what effectively disables swizzling.
+ */
+ for (unsigned c = 0; c < 2; ++c)
+ bld.SHR(offset(tmp, bld, c), dst, offset(swz, bld, c));
+
+ /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
+ bld.XOR(tmp, tmp, offset(tmp, bld, 1));
+ bld.AND(tmp, tmp, fs_reg(1 << 6));
+ bld.XOR(dst, dst, tmp);
+ }
+
+ } else {
+ /* Multiply by the Bpp/stride value. Note that the addr.y may be
+ * non-zero even if the image is one-dimensional because a
+ * vertical offset may have been applied above to select a
+ * non-zero slice or level of a higher-dimensional texture.
+ */
+ bld.MUL(offset(addr, bld, 1),
+ offset(addr, bld, 1), offset(stride, bld, 1));
+ bld.ADD(addr, addr, offset(addr, bld, 1));
+ bld.MUL(dst, addr, stride);
+ }
+
+ return dst;
+ }
+ }
+
+ namespace image_format_conversion {
+ using image_format_info::color_u;
+
+ namespace {
+ /**
+ * Maximum representable value in an unsigned integer with the given
+ * number of bits.
+ */
+ inline unsigned
+ scale(unsigned n)
+ {
+ return (1 << n) - 1;
+ }
+ }
+
+ /**
+ * Pack the vector \p src in a bitfield given the per-component bit
+ * shifts and widths. Note that bitfield components are not allowed to
+ * cross 32-bit boundaries.
+ */
+ fs_reg
+ emit_pack(const fs_builder &bld, const fs_reg &src,
+ const color_u &shifts, const color_u &widths)
+ {
+ const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+ bool seen[4] = {};
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ /* Shift each component left to the correct bitfield position. */
+ bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
+
+ /* Add everything up. */
+ if (seen[shifts[c] / 32]) {
+ bld.OR(offset(dst, bld, shifts[c] / 32),
+ offset(dst, bld, shifts[c] / 32), tmp);
+ } else {
+ bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
+ seen[shifts[c] / 32] = true;
+ }
+ }
+ }
+
+ return dst;
+ }
+
+ /**
+ * Unpack a vector from the bitfield \p src given the per-component bit
+ * shifts and widths. Note that bitfield components are not allowed to
+ * cross 32-bit boundaries.
+ */
+ fs_reg
+ emit_unpack(const fs_builder &bld, const fs_reg &src,
+ const color_u &shifts, const color_u &widths)
+ {
+ const fs_reg dst = bld.vgrf(src.type, 4);
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ /* Shift left to discard the most significant bits. */
+ bld.SHL(offset(dst, bld, c),
+ offset(src, bld, shifts[c] / 32),
+ fs_reg(32 - shifts[c] % 32 - widths[c]));
+
+ /* Shift back to the least significant bits using an arithmetic
+ * shift to get sign extension on signed types.
+ */
+ bld.ASR(offset(dst, bld, c),
+ offset(dst, bld, c), fs_reg(32 - widths[c]));
+ }
+ }
+
+ return dst;
+ }
+
+ /**
+ * Convert an integer vector into another integer vector of the
+ * specified bit widths, properly handling overflow.
+ */
+ fs_reg
+ emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
+ const color_u &widths, bool is_signed)
+ {
+ const unsigned s = (is_signed ? 1 : 0);
+ const fs_reg dst = bld.vgrf(
+ is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+ assert(src.type == dst.type);
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ /* Clamp to the maximum value. */
+ bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
+ fs_reg((int)scale(widths[c] - s)),
+ BRW_CONDITIONAL_L);
+
+ /* Clamp to the minimum value. */
+ if (is_signed)
+ bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
+ fs_reg(-(int)scale(widths[c] - s) - 1),
+ BRW_CONDITIONAL_G);
+ }
+ }
+
+ return dst;
+ }
+
+ /**
+ * Convert a normalized fixed-point vector of the specified signedness
+ * and bit widths into a floating point vector.
+ */
+ fs_reg
+ emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
+ const color_u &widths, bool is_signed)
+ {
+ const unsigned s = (is_signed ? 1 : 0);
+ const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ /* Convert to float. */
+ bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+ /* Divide by the normalization constants. */
+ bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
+ fs_reg(1.0f / scale(widths[c] - s)));
+
+ /* Clamp to the minimum value. */
+ if (is_signed)
+ bld.emit_minmax(offset(dst, bld, c),
+ offset(dst, bld, c), fs_reg(-1.0f),
+ BRW_CONDITIONAL_G);
+ }
+ }
+ return dst;
+ }
+
+ /**
+ * Convert a floating-point vector into a normalized fixed-point vector
+ * of the specified signedness and bit widths.
+ */
+ fs_reg
+ emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
+ const color_u &widths, bool is_signed)
+ {
+ const unsigned s = (is_signed ? 1 : 0);
+ const fs_reg dst = bld.vgrf(
+ is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+ const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ /* Clamp the normalized floating-point argument. */
+ if (is_signed) {
+ bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
+ fs_reg(-1.0f), BRW_CONDITIONAL_G);
+
+ bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+ fs_reg(1.0f), BRW_CONDITIONAL_L);
+ } else {
+ set_saturate(true, bld.MOV(offset(fdst, bld, c),
+ offset(src, bld, c)));
+ }
+
+ /* Multiply by the normalization constants. */
+ bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
+ fs_reg((float)scale(widths[c] - s)));
+
+ /* Convert to integer. */
+ bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
+ bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
+ }
+ }
+
+ return dst;
+ }
+
+ /**
+ * Convert a floating point vector of the specified bit widths into a
+ * 32-bit floating point vector.
+ */
+ fs_reg
+ emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
+ const color_u &widths)
+ {
+ const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+ const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+ /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+ * This works because they have a 5-bit exponent just like the
+ * 16-bit floating point format, and they have no sign bit.
+ */
+ if (widths[c] < 16)
+ bld.SHL(offset(dst, bld, c),
+ offset(dst, bld, c), fs_reg(15 - widths[c]));
+
+ /* Convert to 32-bit floating point. */
+ bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
+ }
+ }
+
+ return fdst;
+ }
+
+ /**
+ * Convert a vector into a floating point vector of the specified bit
+ * widths.
+ */
+ fs_reg
+ emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
+ const color_u &widths)
+ {
+ const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+ const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (widths[c]) {
+ bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
+
+ /* Clamp to the minimum value. */
+ if (widths[c] < 16)
+ bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+ fs_reg(0.0f), BRW_CONDITIONAL_G);
+
+ /* Convert to 16-bit floating-point. */
+ bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
+
+ /* Discard the least significant bits to get floating point
+ * numbers of the requested width. This works because the
+ * 10-bit and 11-bit floating point formats have a 5-bit
+ * exponent just like the 16-bit format, and they have no sign
+ * bit.
+ */
+ if (widths[c] < 16)
+ bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
+ fs_reg(15 - widths[c]));
+ }
+ }
+
+ return dst;
+ }
+
+ /**
+ * Fill missing components of a vector with 0, 0, 0, 1.
+ */
+ fs_reg
+ emit_pad(const fs_builder &bld, const fs_reg &src,
+ const color_u &widths)
+ {
+ const fs_reg dst = bld.vgrf(src.type, 4);
+ const unsigned pad[] = { 0, 0, 0, 1 };
+
+ for (unsigned c = 0; c < 4; ++c)
+ bld.MOV(offset(dst, bld, c),
+ widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
+
+ return dst;
+ }
+ }
+}
+
+namespace brw {
+ namespace image_access {
+ /**
+ * Load a vector from a surface of the given format and dimensionality
+ * at the given coordinates. \p surf_dims and \p arr_dims give the
+ * number of non-array and array coordinates of the image respectively.
+ */
+ fs_reg
+ emit_image_load(const fs_builder &bld,
+ const fs_reg &image, const fs_reg &addr,
+ unsigned surf_dims, unsigned arr_dims,
+ mesa_format format)
+ {
+ using namespace image_format_info;
+ using namespace image_format_conversion;
+ using namespace image_validity;
+ using namespace image_coordinates;
+ using namespace surface_access;
+ const brw_device_info *devinfo = bld.shader->devinfo;
+ const mesa_format lower_format =
+ brw_lower_mesa_image_format(devinfo, format);
+ fs_reg tmp;
+
+ /* Transform the image coordinates into actual surface coordinates. */
+ const fs_reg saddr =
+ emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
+ const unsigned dims =
+ num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+ if (has_matching_typed_format(devinfo, format)) {
+ /* Hopefully we get here most of the time... */
+ tmp = emit_typed_read(bld, image, saddr, dims,
+ _mesa_format_num_components(lower_format));
+ } else {
+ /* Untyped surface reads return 32 bits of the surface per
+ * component, without any sort of unpacking or type conversion,
+ */
+ const unsigned size = _mesa_get_format_bytes(format) / 4;
+
+ /* they don't properly handle out of bounds access, so we have to
+ * check manually if the coordinates are valid and predicate the
+ * surface read on the result,
+ */
+ const brw_predicate pred =
+ emit_bounds_check(bld, image, saddr, dims);
+
+ /* and they don't know about surface coordinates, we need to
+ * convert them to a raw memory offset.
+ */
+ const fs_reg laddr = emit_address_calculation(bld, image, saddr, dims);
+
+ tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
+
+ /* An out of bounds surface access should give zero as result. */
+ for (unsigned c = 0; c < 4; ++c)
+ set_predicate(pred, bld.SEL(offset(tmp, bld, c),
+ offset(tmp, bld, c), fs_reg(0)));
+ }
+
+ /* Set the register type to D instead of UD if the data type is
+ * represented as a signed integer in memory so that sign extension
+ * is handled correctly by unpack.
+ */
+ if (needs_sign_extension(format))
+ tmp = retype(tmp, BRW_REGISTER_TYPE_D);
+
+ if (!has_supported_bit_layout(devinfo, format)) {
+ /* Unpack individual vector components from the bitfield if the
+ * hardware is unable to do it for us.
+ */
+ if (has_split_bit_layout(devinfo, format))
+ tmp = emit_pack(bld, tmp, get_bit_shifts(lower_format),
+ get_bit_widths(lower_format));
+ else
+ tmp = emit_unpack(bld, tmp, get_bit_shifts(format),
+ get_bit_widths(format));
+
+ } else if ((needs_sign_extension(format) &&
+ !is_conversion_trivial(devinfo, format)) ||
+ has_undefined_high_bits(devinfo, format)) {
+ /* Perform a trivial unpack even though the bit layout matches in
+ * order to get the most significant bits of each component
+ * initialized properly.
+ */
+ tmp = emit_unpack(bld, tmp, color_u(0, 32, 64, 96),
+ get_bit_widths(format));
+ }
+
+ if (!_mesa_is_format_integer(format)) {
+ if (is_conversion_trivial(devinfo, format)) {
+ /* Just need to cast the vector to the target type. */
+ tmp = retype(tmp, BRW_REGISTER_TYPE_F);
+ } else {
+ /* Do the right sort of type conversion to float. */
+ if (_mesa_get_format_datatype(format) == GL_FLOAT)
+ tmp = emit_convert_from_float(
+ bld, tmp, get_bit_widths(format));
+ else
+ tmp = emit_convert_from_scaled(
+ bld, tmp, get_bit_widths(format),
+ _mesa_is_format_signed(format));
+ }
+ }
+
+ /* Initialize missing components of the result. */
+ return emit_pad(bld, tmp, get_bit_widths(format));
+ }
+
+ /**
+ * Store a vector in a surface of the given format and dimensionality at
+ * the given coordinates. \p surf_dims and \p arr_dims give the number
+ * of non-array and array coordinates of the image respectively.
+ */
+ void
+ emit_image_store(const fs_builder &bld, const fs_reg &image,
+ const fs_reg &addr, const fs_reg &src,
+ unsigned surf_dims, unsigned arr_dims,
+ mesa_format format)
+ {
+ using namespace image_format_info;
+ using namespace image_format_conversion;
+ using namespace image_validity;
+ using namespace image_coordinates;
+ using namespace surface_access;
+ const brw_device_info *devinfo = bld.shader->devinfo;
+
+ /* Transform the image coordinates into actual surface coordinates. */
+ const fs_reg saddr =
+ emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
+ const unsigned dims =
+ num_image_coordinates(bld, surf_dims, arr_dims, format);
+
+ if (format == MESA_FORMAT_NONE) {
+ /* We don't know what the format is, but that's fine because it
+ * implies write-only access, and typed surface writes are always
+ * able to take care of type conversion and packing for us.
+ */
+ emit_typed_write(bld, image, saddr, src, dims, 4);
+
+ } else {
+ const mesa_format lower_format =
+ brw_lower_mesa_image_format(devinfo, format);
+ fs_reg tmp = src;
+
+ if (!is_conversion_trivial(devinfo, format)) {
+ /* Do the right sort of type conversion. */
+ if (_mesa_get_format_datatype(format) == GL_FLOAT)
+ tmp = emit_convert_to_float(bld, tmp, get_bit_widths(format));
+
+ else if (_mesa_is_format_integer(format))
+ tmp = emit_convert_to_integer(bld, tmp, get_bit_widths(format),
+ _mesa_is_format_signed(format));
+
+ else
+ tmp = emit_convert_to_scaled(bld, tmp, get_bit_widths(format),
+ _mesa_is_format_signed(format));
+ }
+
+ /* We're down to bit manipulation at this point. */
+ tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
+
+ if (!has_supported_bit_layout(devinfo, format)) {
+ /* Pack the vector components into a bitfield if the hardware
+ * is unable to do it for us.
+ */
+ if (has_split_bit_layout(devinfo, format))
+ tmp = emit_unpack(bld, tmp, get_bit_shifts(lower_format),
+ get_bit_widths(lower_format));
+
+ else
+ tmp = emit_pack(bld, tmp, get_bit_shifts(format),
+ get_bit_widths(format));
+ }
+
+ if (has_matching_typed_format(devinfo, format)) {
+ /* Hopefully we get here most of the time... */
+ emit_typed_write(bld, image, saddr, tmp, dims,
+ _mesa_format_num_components(lower_format));
+
+ } else {
+ /* Untyped surface writes store 32 bits of the surface per
+ * component, without any sort of packing or type conversion,
+ */
+ const unsigned size = _mesa_get_format_bytes(format) / 4;
+
+ /* they don't properly handle out of bounds access, so we have
+ * to check manually if the coordinates are valid and predicate
+ * the surface write on the result,
+ */
+ const brw_predicate pred =
+ emit_bounds_check(bld, image, saddr, dims);
+
+ /* and, phew, they don't know about surface coordinates, we
+ * need to convert them to a raw memory offset.
+ */
+ const fs_reg laddr = emit_address_calculation(
+ bld, image, saddr, dims);
+
+ emit_untyped_write(bld, image, laddr, tmp, 1, size, pred);
+ }
+ }
+ }
+
+ /**
+ * Perform an atomic read-modify-write operation in a surface of the
+ * given dimensionality at the given coordinates. \p surf_dims and \p
+ * arr_dims give the number of non-array and array coordinates of the
+ * image respectively. Main building block of the imageAtomic GLSL
+ * built-ins.
+ */
+ fs_reg
+ emit_image_atomic(const fs_builder &bld,
+ const fs_reg &image, const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned surf_dims, unsigned arr_dims,
+ unsigned rsize, unsigned op)
+ {
+ using namespace image_validity;
+ using namespace image_coordinates;
+ using namespace surface_access;
+ /* Avoid performing an atomic operation on an unbound surface. */
+ const brw_predicate pred = emit_surface_check(bld, image);
+
+ /* Transform the image coordinates into actual surface coordinates. */
+ const fs_reg saddr =
+ emit_image_coordinates(bld, addr, surf_dims, arr_dims,
+ MESA_FORMAT_R_UINT32);
+ const unsigned dims =
+ num_image_coordinates(bld, surf_dims, arr_dims,
+ MESA_FORMAT_R_UINT32);
+
+ /* Thankfully we can do without untyped atomics here. */
+ const fs_reg tmp = emit_typed_atomic(bld, image, saddr, src0, src1,
+ dims, rsize, op, pred);
+
+ /* An unbound surface access should give zero as result. */
+ if (rsize)
+ set_predicate(pred, bld.SEL(tmp, tmp, fs_reg(0)));
+
+ return tmp;
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
new file mode 100644
index 00000000000..a3dd839955b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
@@ -0,0 +1,89 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_FS_SURFACE_BUILDER_H
+#define BRW_FS_SURFACE_BUILDER_H
+
+#include "brw_fs_builder.h"
+#include "brw_context.h"
+
+namespace brw {
+ namespace surface_access {
+ fs_reg
+ emit_untyped_read(const fs_builder &bld,
+ const fs_reg &surface, const fs_reg &addr,
+ unsigned dims, unsigned size,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+
+ void
+ emit_untyped_write(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr, const fs_reg &src,
+ unsigned dims, unsigned size,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+
+ fs_reg
+ emit_untyped_atomic(const fs_builder &bld,
+ const fs_reg &surface, const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+
+ fs_reg
+ emit_typed_read(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr, unsigned dims, unsigned size);
+
+ void
+ emit_typed_write(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr, const fs_reg &src,
+ unsigned dims, unsigned size);
+
+ fs_reg
+ emit_typed_atomic(const fs_builder &bld, const fs_reg &surface,
+ const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned dims, unsigned rsize, unsigned op,
+ brw_predicate pred = BRW_PREDICATE_NONE);
+ }
+
+ namespace image_access {
+ fs_reg
+ emit_image_load(const fs_builder &bld,
+ const fs_reg &image, const fs_reg &addr,
+ unsigned surf_dims, unsigned arr_dims,
+ mesa_format format);
+
+ void
+ emit_image_store(const fs_builder &bld, const fs_reg &image,
+ const fs_reg &addr, const fs_reg &src,
+ unsigned surf_dims, unsigned arr_dims,
+ mesa_format format);
+ fs_reg
+ emit_image_atomic(const fs_builder &bld,
+ const fs_reg &image, const fs_reg &addr,
+ const fs_reg &src0, const fs_reg &src1,
+ unsigned surf_dims, unsigned arr_dims,
+ unsigned rsize, unsigned op);
+ }
+}
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
index 01d3a569858..96d4f375da2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -173,7 +173,7 @@ ir_vector_reference_visitor::visit_enter(ir_assignment *ir)
return visit_continue_with_parent;
}
if (ir->lhs->as_dereference_variable() &&
- is_power_of_two(ir->write_mask) &&
+ _mesa_is_pow_two(ir->write_mask) &&
!ir->condition) {
/* If we're writing just a channel, then channel-splitting the LHS is OK.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9a4bad6bcf5..111db8c4323 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -77,612 +77,6 @@ fs_visitor::emit_vs_system_value(int location)
return reg;
}
-fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_c,
- fs_reg lod, fs_reg dPdy, int grad_components,
- uint32_t sampler)
-{
- int mlen;
- int base_mrf = 1;
- bool simd16 = false;
- fs_reg orig_dst;
-
- /* g0 header. */
- mlen = 1;
-
- if (shadow_c.file != BAD_FILE) {
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, 1);
- }
-
- /* gen4's SIMD8 sampler always has the slots for u,v,r present.
- * the unused slots must be zeroed.
- */
- for (int i = coord_components; i < 3; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
- }
- mlen += 3;
-
- if (op == ir_tex) {
- /* There's no plain shadow compare message, so we use shadow
- * compare with a bias of 0.0.
- */
- bld.MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f));
- mlen++;
- } else if (op == ir_txb || op == ir_txl) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen), lod);
- mlen++;
- } else {
- unreachable("Should not get here.");
- }
-
- bld.MOV(fs_reg(MRF, base_mrf + mlen), shadow_c);
- mlen++;
- } else if (op == ir_tex) {
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, 1);
- }
- /* zero the others. */
- for (int i = coord_components; i<3; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f));
- }
- /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
- mlen += 3;
- } else if (op == ir_txd) {
- fs_reg &dPdx = lod;
-
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate);
- coordinate = offset(coordinate, 1);
- }
- /* the slots for u and v are always present, but r is optional */
- mlen += MAX2(coord_components, 2);
-
- /* P = u, v, r
- * dPdx = dudx, dvdx, drdx
- * dPdy = dudy, dvdy, drdy
- *
- * 1-arg: Does not exist.
- *
- * 2-arg: dudx dvdx dudy dvdy
- * dPdx.x dPdx.y dPdy.x dPdy.y
- * m4 m5 m6 m7
- *
- * 3-arg: dudx dvdx drdx dudy dvdy drdy
- * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
- * m5 m6 m7 m8 m9 m10
- */
- for (int i = 0; i < grad_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdx);
- dPdx = offset(dPdx, 1);
- }
- mlen += MAX2(grad_components, 2);
-
- for (int i = 0; i < grad_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen), dPdy);
- dPdy = offset(dPdy, 1);
- }
- mlen += MAX2(grad_components, 2);
- } else if (op == ir_txs) {
- /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
- simd16 = true;
- bld.MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod);
- mlen += 2;
- } else {
- /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
- * instructions. We'll need to do SIMD16 here.
- */
- simd16 = true;
- assert(op == ir_txb || op == ir_txl || op == ir_txf);
-
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
- coordinate);
- coordinate = offset(coordinate, 1);
- }
-
- /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to
- * be necessary for TXF (ld), but seems wise to do for all messages.
- */
- for (int i = coord_components; i < 3; i++) {
- bld.MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f));
- }
-
- /* lod/bias appears after u/v/r. */
- mlen += 6;
-
- bld.MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod);
- mlen++;
-
- /* The unused upper half. */
- mlen++;
- }
-
- if (simd16) {
- /* Now, since we're doing simd16, the return is 2 interleaved
- * vec4s where the odd-indexed ones are junk. We'll need to move
- * this weirdness around to the expected layout.
- */
- orig_dst = dst;
- dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type);
- }
-
- enum opcode opcode;
- switch (op) {
- case ir_tex: opcode = SHADER_OPCODE_TEX; break;
- case ir_txb: opcode = FS_OPCODE_TXB; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- default:
- unreachable("not reached");
- }
-
- fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->header_size = 1;
- inst->regs_written = simd16 ? 8 : 4;
-
- if (simd16) {
- for (int i = 0; i < 4; i++) {
- bld.MOV(orig_dst, dst);
- orig_dst = offset(orig_dst, 1);
- dst = offset(dst, 2);
- }
- }
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int vector_elements,
- fs_reg shadow_c, fs_reg lod,
- uint32_t sampler)
-{
- fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
- bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf;
-
- if (has_lod && shadow_c.file != BAD_FILE)
- no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
-
- if (op == ir_txd)
- no16("textureGrad unsupported in SIMD16.");
-
- /* Copy the coordinates. */
- for (int i = 0; i < vector_elements; i++) {
- bld.MOV(retype(offset(message, i), coordinate.type), coordinate);
- coordinate = offset(coordinate, 1);
- }
-
- fs_reg msg_end = offset(message, vector_elements);
-
- /* Messages other than sample and ld require all three components */
- if (has_lod || shadow_c.file != BAD_FILE) {
- for (int i = vector_elements; i < 3; i++) {
- bld.MOV(offset(message, i), fs_reg(0.0f));
- }
- }
-
- if (has_lod) {
- fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ?
- BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
- }
-
- if (shadow_c.file != BAD_FILE) {
- fs_reg msg_ref = offset(message, 3 + has_lod);
- bld.MOV(msg_ref, shadow_c);
- msg_end = offset(msg_ref, 1);
- }
-
- enum opcode opcode;
- switch (op) {
- case ir_tex: opcode = SHADER_OPCODE_TEX; break;
- case ir_txb: opcode = FS_OPCODE_TXB; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- default: unreachable("not reached");
- }
-
- fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
- inst->base_mrf = message.reg - 1;
- inst->mlen = msg_end.reg - inst->base_mrf;
- inst->header_size = 1;
- inst->regs_written = 8;
-
- return inst;
-}
-
-/* gen5's sampler has slots for u, v, r, array index, then optional
- * parameters like shadow comparitor or LOD bias. If optional
- * parameters aren't present, those base slots are optional and don't
- * need to be included in the message.
- *
- * We don't fill in the unnecessary slots regardless, which may look
- * surprising in the disassembly.
- */
-fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int vector_elements,
- fs_reg shadow_c,
- fs_reg lod, fs_reg lod2, int grad_components,
- fs_reg sample_index, uint32_t sampler,
- bool has_offset)
-{
- int reg_width = dispatch_width / 8;
- unsigned header_size = 0;
-
- fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
- fs_reg msg_coords = message;
-
- if (has_offset) {
- /* The offsets set up by the ir_texture visitor are in the
- * m1 header, so we can't go headerless.
- */
- header_size = 1;
- message.reg--;
- }
-
- for (int i = 0; i < vector_elements; i++) {
- bld.MOV(retype(offset(msg_coords, i), coordinate.type), coordinate);
- coordinate = offset(coordinate, 1);
- }
- fs_reg msg_end = offset(msg_coords, vector_elements);
- fs_reg msg_lod = offset(msg_coords, 4);
-
- if (shadow_c.file != BAD_FILE) {
- fs_reg msg_shadow = msg_lod;
- bld.MOV(msg_shadow, shadow_c);
- msg_lod = offset(msg_shadow, 1);
- msg_end = msg_lod;
- }
-
- enum opcode opcode;
- switch (op) {
- case ir_tex:
- opcode = SHADER_OPCODE_TEX;
- break;
- case ir_txb:
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
-
- opcode = FS_OPCODE_TXB;
- break;
- case ir_txl:
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
-
- opcode = SHADER_OPCODE_TXL;
- break;
- case ir_txd: {
- /**
- * P = u, v, r
- * dPdx = dudx, dvdx, drdx
- * dPdy = dudy, dvdy, drdy
- *
- * Load up these values:
- * - dudx dudy dvdx dvdy drdx drdy
- * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
- */
- msg_end = msg_lod;
- for (int i = 0; i < grad_components; i++) {
- bld.MOV(msg_end, lod);
- lod = offset(lod, 1);
- msg_end = offset(msg_end, 1);
-
- bld.MOV(msg_end, lod2);
- lod2 = offset(lod2, 1);
- msg_end = offset(msg_end, 1);
- }
-
- opcode = SHADER_OPCODE_TXD;
- break;
- }
- case ir_txs:
- msg_lod = retype(msg_end, BRW_REGISTER_TYPE_UD);
- bld.MOV(msg_lod, lod);
- msg_end = offset(msg_lod, 1);
-
- opcode = SHADER_OPCODE_TXS;
- break;
- case ir_query_levels:
- msg_lod = msg_end;
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
- msg_end = offset(msg_lod, 1);
-
- opcode = SHADER_OPCODE_TXS;
- break;
- case ir_txf:
- msg_lod = offset(msg_coords, 3);
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), lod);
- msg_end = offset(msg_lod, 1);
-
- opcode = SHADER_OPCODE_TXF;
- break;
- case ir_txf_ms:
- msg_lod = offset(msg_coords, 3);
- /* lod */
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
- /* sample index */
- bld.MOV(retype(offset(msg_lod, 1), BRW_REGISTER_TYPE_UD), sample_index);
- msg_end = offset(msg_lod, 2);
-
- opcode = SHADER_OPCODE_TXF_CMS;
- break;
- case ir_lod:
- opcode = SHADER_OPCODE_LOD;
- break;
- case ir_tg4:
- opcode = SHADER_OPCODE_TG4;
- break;
- default:
- unreachable("not reached");
- }
-
- fs_inst *inst = bld.emit(opcode, dst, reg_undef, fs_reg(sampler));
- inst->base_mrf = message.reg;
- inst->mlen = msg_end.reg - message.reg;
- inst->header_size = header_size;
- inst->regs_written = 4 * reg_width;
-
- if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
- fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
- " disallowed by hardware\n");
- }
-
- return inst;
-}
-
-static bool
-is_high_sampler(const struct brw_device_info *devinfo, fs_reg sampler)
-{
- if (devinfo->gen < 8 && !devinfo->is_haswell)
- return false;
-
- return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
-}
-
-fs_inst *
-fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
- fs_reg coordinate, int coord_components,
- fs_reg shadow_c,
- fs_reg lod, fs_reg lod2, int grad_components,
- fs_reg sample_index, fs_reg mcs, fs_reg sampler,
- fs_reg offset_value)
-{
- int reg_width = dispatch_width / 8;
- unsigned header_size = 0;
-
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
- for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
- sources[i] = vgrf(glsl_type::float_type);
- }
- int length = 0;
-
- if (op == ir_tg4 || offset_value.file != BAD_FILE ||
- is_high_sampler(devinfo, sampler)) {
- /* For general texture offsets (no txf workaround), we need a header to
- * put them in. Note that for SIMD16 we're making space for two actual
- * hardware registers here, so the emit will have to fix up for this.
- *
- * * ir4_tg4 needs to place its channel select in the header,
- * for interaction with ARB_texture_swizzle
- *
- * The sampler index is only 4-bits, so for larger sampler numbers we
- * need to offset the Sampler State Pointer in the header.
- */
- header_size = 1;
- sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- length++;
- }
-
- if (shadow_c.file != BAD_FILE) {
- bld.MOV(sources[length], shadow_c);
- length++;
- }
-
- bool has_nonconstant_offset =
- offset_value.file != BAD_FILE && offset_value.file != IMM;
- bool coordinate_done = false;
-
- /* The sampler can only meaningfully compute LOD for fragment shader
- * messages. For all other stages, we change the opcode to ir_txl and
- * hardcode the LOD to 0.
- */
- if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) {
- op = ir_txl;
- lod = fs_reg(0.0f);
- }
-
- /* Set up the LOD info */
- switch (op) {
- case ir_tex:
- case ir_lod:
- break;
- case ir_txb:
- bld.MOV(sources[length], lod);
- length++;
- break;
- case ir_txl:
- bld.MOV(sources[length], lod);
- length++;
- break;
- case ir_txd: {
- no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
-
- /* Load dPdx and the coordinate together:
- * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
- */
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
- length++;
-
- /* For cube map array, the coordinate is (u,v,r,ai) but there are
- * only derivatives for (u, v, r).
- */
- if (i < grad_components) {
- bld.MOV(sources[length], lod);
- lod = offset(lod, 1);
- length++;
-
- bld.MOV(sources[length], lod2);
- lod2 = offset(lod2, 1);
- length++;
- }
- }
-
- coordinate_done = true;
- break;
- }
- case ir_txs:
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
- length++;
- break;
- case ir_query_levels:
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u));
- length++;
- break;
- case ir_txf:
- /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r.
- * On Gen9 they are u, v, lod, r
- */
-
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
- length++;
-
- if (devinfo->gen >= 9) {
- if (coord_components >= 2) {
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
- }
- length++;
- }
-
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod);
- length++;
-
- for (int i = devinfo->gen >= 9 ? 2 : 1; i < coord_components; i++) {
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
- length++;
- }
-
- coordinate_done = true;
- break;
- case ir_txf_ms:
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
- length++;
-
- /* data from the multisample control surface */
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
- length++;
-
- /* there is no offsetting for this message; just copy in the integer
- * texture coordinates
- */
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
- length++;
- }
-
- coordinate_done = true;
- break;
- case ir_tg4:
- if (has_nonconstant_offset) {
- if (shadow_c.file != BAD_FILE)
- no16("Gen7 does not support gather4_po_c in SIMD16 mode.");
-
- /* More crazy intermixing */
- for (int i = 0; i < 2; i++) { /* u, v */
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
- length++;
- }
-
- for (int i = 0; i < 2; i++) { /* offu, offv */
- bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value);
- offset_value = offset(offset_value, 1);
- length++;
- }
-
- if (coord_components == 3) { /* r if present */
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
- length++;
- }
-
- coordinate_done = true;
- }
- break;
- }
-
- /* Set up the coordinate (except for cases where it was done above) */
- if (!coordinate_done) {
- for (int i = 0; i < coord_components; i++) {
- bld.MOV(sources[length], coordinate);
- coordinate = offset(coordinate, 1);
- length++;
- }
- }
-
- int mlen;
- if (reg_width == 2)
- mlen = length * reg_width - header_size;
- else
- mlen = length * reg_width;
-
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_F, dispatch_width);
- bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
-
- /* Generate the SEND */
- enum opcode opcode;
- switch (op) {
- case ir_tex: opcode = SHADER_OPCODE_TEX; break;
- case ir_txb: opcode = FS_OPCODE_TXB; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
- case ir_lod: opcode = SHADER_OPCODE_LOD; break;
- case ir_tg4:
- if (has_nonconstant_offset)
- opcode = SHADER_OPCODE_TG4_OFFSET;
- else
- opcode = SHADER_OPCODE_TG4;
- break;
- default:
- unreachable("not reached");
- }
- fs_inst *inst = bld.emit(opcode, dst, src_payload, sampler);
- inst->base_mrf = -1;
- inst->mlen = mlen;
- inst->header_size = header_size;
- inst->regs_written = 4 * reg_width;
-
- if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
- fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
- " disallowed by hardware\n");
- }
-
- return inst;
-}
-
fs_reg
fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
bool is_rect, uint32_t sampler, int texunit)
@@ -746,8 +140,8 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
coordinate = dst;
bld.MUL(dst, src, scale_x);
- dst = offset(dst, 1);
- src = offset(src, 1);
+ dst = offset(dst, bld, 1);
+ src = offset(src, bld, 1);
bld.MUL(dst, src, scale_y);
} else if (is_rect) {
/* On gen6+, the sampler handles the rectangle coordinates
@@ -760,7 +154,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
for (int i = 0; i < 2; i++) {
if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
fs_reg chan = coordinate;
- chan = offset(chan, i);
+ chan = offset(chan, bld, i);
set_condmod(BRW_CONDITIONAL_GE,
bld.emit(BRW_OPCODE_SEL, chan, chan, fs_reg(0.0f)));
@@ -785,7 +179,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
for (int i = 0; i < MIN2(coord_components, 3); i++) {
if (key_tex->gl_clamp_mask[i] & (1 << sampler)) {
fs_reg chan = coordinate;
- chan = offset(chan, i);
+ chan = offset(chan, bld, i);
set_saturate(true, bld.MOV(chan, chan));
}
}
@@ -795,31 +189,21 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
/* Sample from the MCS surface attached to this multisample texture. */
fs_reg
-fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
+fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
+ const fs_reg &sampler)
{
- int reg_width = dispatch_width / 8;
- fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width),
- BRW_REGISTER_TYPE_F, dispatch_width);
- fs_reg dest = vgrf(glsl_type::uvec4_type);
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components);
-
- /* parameters are: u, v, r; missing parameters are treated as zero */
- for (int i = 0; i < components; i++) {
- sources[i] = vgrf(glsl_type::float_type);
- bld.MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate);
- coordinate = offset(coordinate, 1);
- }
-
- bld.LOAD_PAYLOAD(payload, sources, components, 0);
+ const fs_reg dest = vgrf(glsl_type::uvec4_type);
+ const fs_reg srcs[] = {
+ coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(),
+ sampler, fs_reg(), fs_reg(components), fs_reg(0)
+ };
+ fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
+ ARRAY_SIZE(srcs));
- fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS, dest, payload, sampler);
- inst->base_mrf = -1;
- inst->mlen = components * reg_width;
- inst->header_size = 0;
- inst->regs_written = 4 * reg_width; /* we only care about one reg of
- * response, but the sampler always
- * writes 4/8
- */
+ /* We only care about one reg of response, but the sampler always writes
+ * 4/8.
+ */
+ inst->regs_written = 4 * dispatch_width / 8;
return dest;
}
@@ -853,12 +237,20 @@ fs_visitor::emit_texture(ir_texture_opcode op,
for (int i=0; i<4; i++) {
bld.MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f));
- res = offset(res, 1);
+ res = offset(res, bld, 1);
}
return;
}
}
+ if (op == ir_query_levels) {
+ /* textureQueryLevels() is implemented in terms of TXS so we need to
+ * pass a valid LOD argument.
+ */
+ assert(lod.file == BAD_FILE);
+ lod = fs_reg(0u);
+ }
+
if (coordinate.file != BAD_FILE) {
/* FINISHME: Texture coordinate rescaling doesn't work with non-constant
* samplers. This should only be a problem with GL_CLAMP on Gen7.
@@ -871,26 +263,50 @@ fs_visitor::emit_texture(ir_texture_opcode op,
* samples, so don't worry about them.
*/
fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1));
+ const fs_reg srcs[] = {
+ coordinate, shadow_c, lod, lod2,
+ sample_index, mcs, sampler_reg, offset_value,
+ fs_reg(coord_components), fs_reg(grad_components)
+ };
+ enum opcode opcode;
- if (devinfo->gen >= 7) {
- inst = emit_texture_gen7(op, dst, coordinate, coord_components,
- shadow_c, lod, lod2, grad_components,
- sample_index, mcs, sampler_reg,
- offset_value);
- } else if (devinfo->gen >= 5) {
- inst = emit_texture_gen5(op, dst, coordinate, coord_components,
- shadow_c, lod, lod2, grad_components,
- sample_index, sampler,
- offset_value.file != BAD_FILE);
- } else if (dispatch_width == 16) {
- inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components,
- shadow_c, lod, sampler);
- } else {
- inst = emit_texture_gen4(op, dst, coordinate, coord_components,
- shadow_c, lod, lod2, grad_components,
- sampler);
+ switch (op) {
+ case ir_tex:
+ opcode = SHADER_OPCODE_TEX_LOGICAL;
+ break;
+ case ir_txb:
+ opcode = FS_OPCODE_TXB_LOGICAL;
+ break;
+ case ir_txl:
+ opcode = SHADER_OPCODE_TXL_LOGICAL;
+ break;
+ case ir_txd:
+ opcode = SHADER_OPCODE_TXD_LOGICAL;
+ break;
+ case ir_txf:
+ opcode = SHADER_OPCODE_TXF_LOGICAL;
+ break;
+ case ir_txf_ms:
+ opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
+ break;
+ case ir_txs:
+ case ir_query_levels:
+ opcode = SHADER_OPCODE_TXS_LOGICAL;
+ break;
+ case ir_lod:
+ opcode = SHADER_OPCODE_LOD_LOGICAL;
+ break;
+ case ir_tg4:
+ opcode = (offset_value.file != BAD_FILE && offset_value.file != IMM ?
+ SHADER_OPCODE_TG4_OFFSET_LOGICAL : SHADER_OPCODE_TG4_LOGICAL);
+ break;
+ default:
+ unreachable("Invalid texture opcode.");
}
+ inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
+ inst->regs_written = 4 * dispatch_width / 8;
+
if (shadow_c.file != BAD_FILE)
inst->shadow_compare = true;
@@ -907,17 +323,17 @@ fs_visitor::emit_texture(ir_texture_opcode op,
/* fixup #layers for cube map arrays */
if (op == ir_txs && is_cube_array) {
- fs_reg depth = offset(dst, 2);
+ fs_reg depth = offset(dst, bld, 2);
fs_reg fixed_depth = vgrf(glsl_type::int_type);
bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, fs_reg(6));
fs_reg *fixed_payload = ralloc_array(mem_ctx, fs_reg, inst->regs_written);
- int components = inst->regs_written / (dst.width / 8);
+ int components = inst->regs_written / (inst->exec_size / 8);
for (int i = 0; i < components; i++) {
if (i == 2) {
fixed_payload[i] = fixed_depth;
} else {
- fixed_payload[i] = offset(dst, i);
+ fixed_payload[i] = offset(dst, bld, i);
}
}
bld.LOAD_PAYLOAD(dst, fixed_payload, components, 0);
@@ -952,7 +368,7 @@ fs_visitor::emit_gen6_gather_wa(uint8_t wa, fs_reg dst)
bld.ASR(dst, dst, fs_reg(32 - width));
}
- dst = offset(dst, 1);
+ dst = offset(dst, bld, 1);
}
}
@@ -989,7 +405,7 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
{
if (op == ir_query_levels) {
/* # levels is in .w */
- this->result = offset(orig_val, 3);
+ this->result = offset(orig_val, bld, 3);
return;
}
@@ -1010,15 +426,15 @@ fs_visitor::swizzle_result(ir_texture_opcode op, int dest_components,
for (int i = 0; i < 4; i++) {
int swiz = GET_SWZ(key_tex->swizzles[sampler], i);
fs_reg l = swizzled_result;
- l = offset(l, i);
+ l = offset(l, bld, i);
if (swiz == SWIZZLE_ZERO) {
bld.MOV(l, fs_reg(0.0f));
} else if (swiz == SWIZZLE_ONE) {
bld.MOV(l, fs_reg(1.0f));
} else {
- bld.MOV(l, offset(orig_val,
- GET_SWZ(key_tex->swizzles[sampler], i)));
+ bld.MOV(l, offset(orig_val, bld,
+ GET_SWZ(key_tex->swizzles[sampler], i)));
}
}
this->result = swizzled_result;
@@ -1114,118 +530,6 @@ fs_visitor::try_replace_with_sel()
return false;
}
-void
-fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- fs_reg dst, fs_reg offset, fs_reg src0,
- fs_reg src1)
-{
- int reg_width = dispatch_width / 8;
- int length = 0;
-
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4);
-
- sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- /* Initialize the sample mask in the message header. */
- bld.exec_all().MOV(sources[0], fs_reg(0u));
-
- if (stage == MESA_SHADER_FRAGMENT) {
- if (((brw_wm_prog_data*)this->prog_data)->uses_kill) {
- bld.exec_all()
- .MOV(component(sources[0], 7), brw_flag_reg(0, 1));
- } else {
- bld.exec_all()
- .MOV(component(sources[0], 7),
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
- }
- } else {
- /* The execution mask is part of the side-band information sent together with
- * the message payload to the data port. It's implicitly ANDed with the sample
- * mask sent in the header to compute the actual set of channels that execute
- * the atomic operation.
- */
- assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE);
- bld.exec_all()
- .MOV(component(sources[0], 7), fs_reg(0xffffu));
- }
- length++;
-
- /* Set the atomic operation offset. */
- sources[1] = vgrf(glsl_type::uint_type);
- bld.MOV(sources[1], offset);
- length++;
-
- /* Set the atomic operation arguments. */
- if (src0.file != BAD_FILE) {
- sources[length] = vgrf(glsl_type::uint_type);
- bld.MOV(sources[length], src0);
- length++;
- }
-
- if (src1.file != BAD_FILE) {
- sources[length] = vgrf(glsl_type::uint_type);
- bld.MOV(sources[length], src1);
- length++;
- }
-
- int mlen = 1 + (length - 1) * reg_width;
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_UD, dispatch_width);
- bld.LOAD_PAYLOAD(src_payload, sources, length, 1);
-
- /* Emit the instruction. */
- fs_inst *inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst, src_payload,
- fs_reg(surf_index), fs_reg(atomic_op));
- inst->mlen = mlen;
-}
-
-void
-fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
- fs_reg offset)
-{
- int reg_width = dispatch_width / 8;
-
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
-
- sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- /* Initialize the sample mask in the message header. */
- bld.exec_all()
- .MOV(sources[0], fs_reg(0u));
-
- if (stage == MESA_SHADER_FRAGMENT) {
- if (((brw_wm_prog_data*)this->prog_data)->uses_kill) {
- bld.exec_all()
- .MOV(component(sources[0], 7), brw_flag_reg(0, 1));
- } else {
- bld.exec_all()
- .MOV(component(sources[0], 7),
- retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
- }
- } else {
- /* The execution mask is part of the side-band information sent together with
- * the message payload to the data port. It's implicitly ANDed with the sample
- * mask sent in the header to compute the actual set of channels that execute
- * the atomic operation.
- */
- assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_COMPUTE);
- bld.exec_all()
- .MOV(component(sources[0], 7), fs_reg(0xffffu));
- }
-
- /* Set the surface read offset. */
- sources[1] = vgrf(glsl_type::uint_type);
- bld.MOV(sources[1], offset);
-
- int mlen = 1 + reg_width;
- fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
- BRW_REGISTER_TYPE_UD, dispatch_width);
- fs_inst *inst = bld.LOAD_PAYLOAD(src_payload, sources, 2, 1);
-
- /* Emit the instruction. */
- inst = bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst, src_payload,
- fs_reg(surf_index), fs_reg(1));
- inst->mlen = mlen;
-}
-
/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
void
fs_visitor::emit_dummy_fs()
@@ -1235,8 +539,8 @@ fs_visitor::emit_dummy_fs()
/* Everyone's favorite color. */
const float color[4] = { 1.0, 0.0, 1.0, 0.0 };
for (int i = 0; i < 4; i++) {
- bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F,
- dispatch_width), fs_reg(color[i]));
+ bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F),
+ fs_reg(color[i]));
}
fs_inst *write;
@@ -1315,14 +619,14 @@ fs_visitor::emit_interpolation_setup_gen4()
if (devinfo->has_pln && dispatch_width == 16) {
for (unsigned i = 0; i < 2; i++) {
- abld.half(i).ADD(half(offset(delta_xy, i), 0),
+ abld.half(i).ADD(half(offset(delta_xy, abld, i), 0),
half(this->pixel_x, i), xstart);
- abld.half(i).ADD(half(offset(delta_xy, i), 1),
+ abld.half(i).ADD(half(offset(delta_xy, abld, i), 1),
half(this->pixel_y, i), ystart);
}
} else {
- abld.ADD(offset(delta_xy, 0), this->pixel_x, xstart);
- abld.ADD(offset(delta_xy, 1), this->pixel_y, ystart);
+ abld.ADD(offset(delta_xy, abld, 0), this->pixel_x, xstart);
+ abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart);
}
abld = bld.annotate("compute pos.w and 1/pos.w");
@@ -1356,9 +660,10 @@ fs_visitor::emit_interpolation_setup_gen6()
* compute our pixel centers.
*/
fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
- BRW_REGISTER_TYPE_UW, dispatch_width * 2);
- abld.exec_all()
- .ADD(int_pixel_xy,
+ BRW_REGISTER_TYPE_UW);
+
+ const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
+ dbld.ADD(int_pixel_xy,
fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)),
fs_reg(brw_imm_v(0x11001010)));
@@ -1407,33 +712,6 @@ fs_visitor::emit_interpolation_setup_gen6()
}
}
-void
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
- unsigned exec_size, bool use_2nd_half)
-{
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- fs_inst *inst;
-
- if (key->clamp_fragment_color) {
- fs_reg tmp = vgrf(glsl_type::vec4_type);
- assert(color.type == BRW_REGISTER_TYPE_F);
- for (unsigned i = 0; i < components; i++) {
- inst = bld.MOV(offset(tmp, i), offset(color, i));
- inst->saturate = true;
- }
- color = tmp;
- }
-
- if (exec_size < dispatch_width) {
- unsigned half_idx = use_2nd_half ? 1 : 0;
- for (unsigned i = 0; i < components; i++)
- dst[i] = half(offset(color, i), half_idx);
- } else {
- for (unsigned i = 0; i < components; i++)
- dst[i] = offset(color, i);
- }
-}
-
static enum brw_conditional_mod
cond_for_alpha_func(GLenum func)
{
@@ -1478,7 +756,7 @@ fs_visitor::emit_alpha_test()
BRW_CONDITIONAL_NEQ);
} else {
/* RT0 alpha */
- fs_reg color = offset(outputs[0], 3);
+ fs_reg color = offset(outputs[0], bld, 3);
/* f0.1 &= func(color, ref) */
cmp = abld.CMP(bld.null_reg_f(), color, fs_reg(key->alpha_test_ref),
@@ -1491,152 +769,36 @@ fs_visitor::emit_alpha_test()
fs_inst *
fs_visitor::emit_single_fb_write(const fs_builder &bld,
fs_reg color0, fs_reg color1,
- fs_reg src0_alpha, unsigned components,
- unsigned exec_size, bool use_2nd_half)
+ fs_reg src0_alpha, unsigned components)
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- int header_size = 2, payload_header_size;
-
- /* We can potentially have a message length of up to 15, so we have to set
- * base_mrf to either 0 or 1 in order to fit in m0..m15.
- */
- fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15);
- int length = 0;
-
- /* From the Sandy Bridge PRM, volume 4, page 198:
- *
- * "Dispatched Pixel Enables. One bit per pixel indicating
- * which pixels were originally enabled when the thread was
- * dispatched. This field is only required for the end-of-
- * thread message and on all dual-source messages."
- */
- if (devinfo->gen >= 6 &&
- (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) &&
- color1.file == BAD_FILE &&
- key->nr_color_regions == 1) {
- header_size = 0;
- }
-
- if (header_size != 0) {
- assert(header_size == 2);
- /* Allocate 2 registers for a header */
- length += 2;
- }
- if (payload.aa_dest_stencil_reg) {
- sources[length] = fs_reg(GRF, alloc.allocate(1));
- bld.exec_all().annotate("FB write stencil/AA alpha")
- .MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
- length++;
- }
-
- prog_data->uses_omask =
- prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
- if (prog_data->uses_omask) {
- assert(this->sample_mask.file != BAD_FILE);
- /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
- * it's unsinged single words, one vgrf is always 16-wide.
- */
- sources[length] = fs_reg(GRF, alloc.allocate(1),
- BRW_REGISTER_TYPE_UW, 16);
- bld.exec_all().annotate("FB write oMask")
- .emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
- length++;
- }
-
- payload_header_size = length;
-
- if (color0.file == BAD_FILE) {
- /* Even if there's no color buffers enabled, we still need to send
- * alpha out the pipeline to our null renderbuffer to support
- * alpha-testing, alpha-to-coverage, and so on.
- */
- if (this->outputs[0].file != BAD_FILE)
- setup_color_payload(&sources[length + 3], offset(this->outputs[0], 3),
- 1, exec_size, false);
- length += 4;
- } else if (color1.file == BAD_FILE) {
- if (src0_alpha.file != BAD_FILE) {
- setup_color_payload(&sources[length], src0_alpha, 1, exec_size, false);
- length++;
- }
-
- setup_color_payload(&sources[length], color0, components,
- exec_size, use_2nd_half);
- length += 4;
- } else {
- setup_color_payload(&sources[length], color0, components,
- exec_size, use_2nd_half);
- length += 4;
- setup_color_payload(&sources[length], color1, components,
- exec_size, use_2nd_half);
- length += 4;
- }
+ /* Hand over gl_FragDepth or the payload depth. */
+ const fs_reg dst_depth = (payload.dest_depth_reg ?
+ fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
+ fs_reg());
+ fs_reg src_depth;
if (source_depth_to_render_target) {
- if (devinfo->gen == 6) {
- /* For outputting oDepth on gen6, SIMD8 writes have to be
- * used. This would require SIMD8 moves of each half to
- * message regs, kind of like pre-gen5 SIMD16 FB writes.
- * Just bail on doing so for now.
- */
- no16("Missing support for simd16 depth writes on gen6\n");
- }
-
- if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- /* Hand over gl_FragDepth. */
- assert(this->frag_depth.file != BAD_FILE);
- if (exec_size < dispatch_width) {
- sources[length] = half(this->frag_depth, use_2nd_half);
- } else {
- sources[length] = this->frag_depth;
- }
- } else {
- /* Pass through the payload depth. */
- sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
- }
- length++;
+ if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ src_depth = frag_depth;
+ else
+ src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
- if (payload.dest_depth_reg)
- sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
-
- const fs_builder ubld = bld.group(exec_size, use_2nd_half);
- fs_inst *load;
- fs_inst *write;
- if (devinfo->gen >= 7) {
- /* Send from the GRF */
- fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size);
- load = ubld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
- payload.reg = alloc.allocate(load->regs_written);
- load->dst = payload;
- write = ubld.emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
- write->base_mrf = -1;
- } else {
- /* Send from the MRF */
- load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
- sources, length, payload_header_size);
-
- /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD
- * will do this for us if we just give it a COMPR4 destination.
- */
- if (devinfo->gen < 6 && exec_size == 16)
- load->dst.reg |= BRW_MRF_COMPR4;
-
- write = ubld.emit(FS_OPCODE_FB_WRITE);
- write->exec_size = exec_size;
- write->base_mrf = 1;
- }
+ const fs_reg sources[] = {
+ color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
+ fs_reg(components)
+ };
+ fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
+ sources, ARRAY_SIZE(sources));
- write->mlen = load->regs_written;
- write->header_size = header_size;
if (prog_data->uses_kill) {
write->predicate = BRW_PREDICATE_NORMAL;
write->flag_subreg = 1;
}
+
return write;
}
@@ -1648,37 +810,24 @@ fs_visitor::emit_fb_writes()
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
fs_inst *inst = NULL;
+
+ if (source_depth_to_render_target && devinfo->gen == 6) {
+ /* For outputting oDepth on gen6, SIMD8 writes have to be used. This
+ * would require SIMD8 moves of each half to message regs, e.g. by using
+ * the SIMD lowering pass. Unfortunately this is more difficult than it
+ * sounds because the SIMD8 single-source message lacks channel selects
+ * for the second and third subspans.
+ */
+ no16("Missing support for simd16 depth writes on gen6\n");
+ }
+
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");
inst = emit_single_fb_write(abld, this->outputs[0],
- this->dual_src_output, reg_undef, 4, 8);
+ this->dual_src_output, reg_undef, 4);
inst->target = 0;
- /* SIMD16 dual source blending requires to send two SIMD8 dual source
- * messages, where each message contains color data for 8 pixels. Color
- * data for the first group of pixels is stored in the "lower" half of
- * the color registers, so in SIMD16, the previous message did:
- * m + 0: r0
- * m + 1: g0
- * m + 2: b0
- * m + 3: a0
- *
- * Here goes the second message, which packs color data for the
- * remaining 8 pixels. Color data for these pixels is stored in the
- * "upper" half of the color registers, so we need to do:
- * m + 0: r1
- * m + 1: g1
- * m + 2: b1
- * m + 3: a1
- */
- if (dispatch_width == 16) {
- inst = emit_single_fb_write(abld, this->outputs[0],
- this->dual_src_output, reg_undef, 4, 8,
- true);
- inst->target = 0;
- }
-
prog_data->dual_src_blend = true;
} else {
for (int target = 0; target < key->nr_color_regions; target++) {
@@ -1691,12 +840,11 @@ fs_visitor::emit_fb_writes()
fs_reg src0_alpha;
if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
- src0_alpha = offset(outputs[0], 3);
+ src0_alpha = offset(outputs[0], bld, 3);
inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
src0_alpha,
- this->output_components[target],
- dispatch_width);
+ this->output_components[target]);
inst->target = target;
}
}
@@ -1706,8 +854,15 @@ fs_visitor::emit_fb_writes()
* alpha out the pipeline to our null renderbuffer to support
* alpha-testing, alpha-to-coverage, and so on.
*/
- inst = emit_single_fb_write(bld, reg_undef, reg_undef, reg_undef, 0,
- dispatch_width);
+ /* FINISHME: Factor out this frequently recurring pattern into a
+ * helper function.
+ */
+ const fs_reg srcs[] = { reg_undef, reg_undef,
+ reg_undef, offset(this->outputs[0], bld, 3) };
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+ bld.LOAD_PAYLOAD(tmp, srcs, 4, 0);
+
+ inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4);
inst->target = 0;
}
@@ -1730,6 +885,12 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
}
}
+/**
+ * Lower legacy fixed-function and gl_ClipVertex clipping to clip distances.
+ *
+ * This does nothing if the shader uses gl_ClipDistance or user clipping is
+ * disabled altogether.
+ */
void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
{
struct brw_vue_prog_data *vue_prog_data =
@@ -1737,6 +898,10 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
const struct brw_vue_prog_key *key =
(const struct brw_vue_prog_key *) this->key;
+ /* Bail unless some sort of legacy clipping is enabled */
+ if (!key->userclip_active || prog->UsesClipDistanceOut)
+ return;
+
/* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
*
* "If a linked set of shaders forming the vertex stage contains no
@@ -1774,13 +939,13 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
abld.MUL(output, outputs[clip_vertex], u);
for (int j = 1; j < 4; j++) {
u.reg = userplane[i].reg + j;
- abld.MAD(output, output, offset(outputs[clip_vertex], j), u);
+ abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u);
}
}
}
void
-fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
+fs_visitor::emit_urb_writes()
{
int slot, urb_offset, length;
struct brw_vs_prog_data *vs_prog_data =
@@ -1793,21 +958,24 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
bool flush;
fs_reg sources[8];
- /* Lower legacy ff and ClipVertex clipping to clip distances */
- if (key->base.userclip_active && !prog->UsesClipDistanceOut)
- compute_clip_distance(clip_planes);
-
/* If we don't have any valid slots to write, just do a minimal urb write
- * send to terminate the shader. */
+ * send to terminate the shader. This includes 1 slot of undefined data,
+ * because it's invalid to write 0 data:
+ *
+ * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions -
+ * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read >
+ * Write Data Payload:
+ *
+ * "The write data payload can be between 1 and 8 message phases long."
+ */
if (vue_map->slots_valid == 0) {
-
- fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+ fs_reg payload = fs_reg(GRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
BRW_REGISTER_TYPE_UD)));
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
inst->eot = true;
- inst->mlen = 1;
+ inst->mlen = 2;
inst->offset = 1;
return;
}
@@ -1888,13 +1056,13 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
*/
for (int i = 0; i < 4; i++) {
reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
- src = offset(this->outputs[varying], i);
+ src = offset(this->outputs[varying], bld, i);
set_saturate(true, bld.MOV(reg, src));
sources[length++] = reg;
}
} else {
for (int i = 0; i < 4; i++)
- sources[length++] = offset(this->outputs[varying], i);
+ sources[length++] = offset(this->outputs[varying], bld, i);
}
break;
}
@@ -1911,7 +1079,7 @@ fs_visitor::emit_urb_writes(gl_clip_plane *clip_planes)
if (flush) {
fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
- BRW_REGISTER_TYPE_F, dispatch_width);
+ BRW_REGISTER_TYPE_F);
payload_sources[0] =
fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
@@ -1944,7 +1112,7 @@ fs_visitor::emit_cs_terminate()
*/
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
- bld.exec_all().MOV(payload, g0);
+ bld.group(8, 0).exec_all().MOV(payload, g0);
/* Send a message to the thread spawner to terminate the thread. */
fs_inst *inst = bld.exec_all()
@@ -2012,7 +1180,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
this->no16_msg = NULL;
this->nir_locals = NULL;
- this->nir_globals = NULL;
+ this->nir_ssa_values = NULL;
memset(&this->payload, 0, sizeof(this->payload));
memset(this->outputs, 0, sizeof(this->outputs));
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 45c132b4a9e..4ad65215756 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -68,12 +68,16 @@ brw_compile_gs_prog(struct brw_context *brw,
/* We also upload clip plane data as uniforms */
param_count += MAX_CLIP_PLANES * 4;
+ param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
c.prog_data.base.base.param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
c.prog_data.base.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
+ c.prog_data.base.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
c.prog_data.base.base.nr_params = param_count;
+ c.prog_data.base.base.nr_image_params = gs->NumImages;
if (brw->gen >= 7) {
if (gp->program.OutputType == GL_POINTS) {
@@ -270,16 +274,6 @@ brw_compile_gs_prog(struct brw_context *brw,
return false;
}
- /* Scratch space is used for register spilling */
- if (c.base.last_scratch) {
- perf_debug("Geometry shader triggered register spilling. "
- "Try reducing the number of live vec4 values to "
- "improve performance.\n");
-
- c.prog_data.base.base.total_scratch
- = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
- }
-
output->mem_ctx = mem_ctx;
output->program = program;
output->program_size = program_size;
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 0b8bfc3d9bd..0bb307432d0 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -119,3 +119,28 @@ const struct brw_tracked_state brw_gs_abo_surfaces = {
},
.emit = brw_upload_gs_abo_surfaces,
};
+
+static void
+brw_upload_gs_image_surfaces(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_GEOMETRY_PROGRAM */
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ if (prog) {
+ /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ &brw->gs.base, &brw->gs.prog_data->base.base);
+ }
+}
+
+const struct brw_tracked_state brw_gs_image_surfaces = {
+ .dirty = {
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_GEOMETRY_PROGRAM |
+ BRW_NEW_GS_PROG_DATA |
+ BRW_NEW_IMAGE_UNITS,
+ },
+ .emit = brw_upload_gs_image_surfaces,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_inst.h b/src/mesa/drivers/dri/i965/brw_inst.h
index 7a8c210118c..46eff1dd381 100644
--- a/src/mesa/drivers/dri/i965/brw_inst.h
+++ b/src/mesa/drivers/dri/i965/brw_inst.h
@@ -683,9 +683,9 @@ brw_inst_bits(const brw_inst *inst, unsigned high, unsigned low)
high %= 64;
low %= 64;
- const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+ const uint64_t mask = (1ull << (high - low + 1)) - 1;
- return (inst->data[word] & mask) >> low;
+ return (inst->data[word] >> low) & mask;
}
/**
@@ -702,12 +702,12 @@ brw_inst_set_bits(brw_inst *inst, unsigned high, unsigned low, uint64_t value)
high %= 64;
low %= 64;
- const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+ const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
/* Make sure the supplied value actually fits in the given bitfield. */
assert((value & (mask >> low)) == value);
- inst->data[word] = (inst->data[word] & ~mask) | ((value << low) & mask);
+ inst->data[word] = (inst->data[word] & ~mask) | (value << low);
}
#undef BRW_IA16_ADDR_IMM
@@ -731,9 +731,9 @@ typedef struct {
static inline unsigned
brw_compact_inst_bits(brw_compact_inst *inst, unsigned high, unsigned low)
{
- const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+ const uint64_t mask = (1ull << (high - low + 1)) - 1;
- return (inst->data & mask) >> low;
+ return (inst->data >> low) & mask;
}
/**
@@ -745,12 +745,12 @@ static inline void
brw_compact_inst_set_bits(brw_compact_inst *inst, unsigned high, unsigned low,
uint64_t value)
{
- const uint64_t mask = (((1ull << (high - low + 1)) - 1) << low);
+ const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
/* Make sure the supplied value actually fits in the given bitfield. */
assert((value & (mask >> low)) == value);
- inst->data = (inst->data & ~mask) | ((value << low) & mask);
+ inst->data = (inst->data & ~mask) | (value << low);
}
#define F(name, high, low) \
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 96dc20da3cf..97c6f8b2500 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -44,11 +44,16 @@ public:
fs_reg(struct brw_reg fixed_hw_reg);
fs_reg(enum register_file file, int reg);
fs_reg(enum register_file file, int reg, enum brw_reg_type type);
- fs_reg(enum register_file file, int reg, enum brw_reg_type type, uint8_t width);
bool equals(const fs_reg &r) const;
bool is_contiguous() const;
+ /**
+ * Return the size in bytes of a single logical component of the
+ * register assuming the given execution width.
+ */
+ unsigned component_size(unsigned width) const;
+
/** Smear a channel of the reg to all channels. */
fs_reg &set_smear(unsigned subreg);
@@ -60,14 +65,6 @@ public:
fs_reg *reladdr;
- /**
- * The register width. This indicates how many hardware values are
- * represented by each virtual value. Valid values are 1, 8, or 16.
- * For immediate values, this is 1. Most of the rest of the time, it
- * will be equal to the dispatch width.
- */
- uint8_t width;
-
/** Register region horizontal stride */
uint8_t stride;
};
@@ -129,33 +126,10 @@ horiz_offset(fs_reg reg, unsigned delta)
}
static inline fs_reg
-offset(fs_reg reg, unsigned delta)
-{
- switch (reg.file) {
- case BAD_FILE:
- break;
- case GRF:
- case MRF:
- case ATTR:
- return byte_offset(reg,
- delta * MAX2(reg.width * reg.stride, 1) *
- type_sz(reg.type));
- case UNIFORM:
- reg.reg_offset += delta;
- break;
- default:
- assert(delta == 0);
- }
- return reg;
-}
-
-static inline fs_reg
component(fs_reg reg, unsigned idx)
{
assert(reg.subreg_offset == 0);
- assert(idx < reg.width);
reg.subreg_offset = idx * type_sz(reg.type);
- reg.width = 1;
reg.stride = 0;
return reg;
}
@@ -163,7 +137,7 @@ component(fs_reg reg, unsigned idx)
static inline bool
is_uniform(const fs_reg &reg)
{
- return (reg.width == 1 || reg.stride == 0 || reg.is_null()) &&
+ return (reg.stride == 0 || reg.is_null()) &&
(!reg.reladdr || is_uniform(*reg.reladdr));
}
@@ -185,8 +159,6 @@ half(fs_reg reg, unsigned idx)
case GRF:
case MRF:
- assert(reg.width == 16);
- reg.width = 8;
return horiz_offset(reg, 8 * idx);
case ATTR:
@@ -210,20 +182,13 @@ public:
fs_inst();
fs_inst(enum opcode opcode, uint8_t exec_size);
- fs_inst(enum opcode opcode, const fs_reg &dst);
+ fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2);
- fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg src[],
- unsigned sources);
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
const fs_reg src[], unsigned sources);
fs_inst(const fs_inst &that);
@@ -236,6 +201,7 @@ public:
bool is_send_from_grf() const;
bool is_partial_write() const;
bool is_copy_payload(const brw::simple_allocator &grf_alloc) const;
+ unsigned components_read(unsigned i) const;
int regs_read(int arg) const;
bool can_do_source_mods(const struct brw_device_info *devinfo);
bool has_side_effects() const;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index fceacae0e51..966a410a15d 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -113,6 +113,8 @@ public:
dst_reg(register_file file, int reg);
dst_reg(register_file file, int reg, const glsl_type *type,
unsigned writemask);
+ dst_reg(register_file file, int reg, brw_reg_type type,
+ unsigned writemask);
dst_reg(struct brw_reg reg);
dst_reg(class vec4_visitor *v, const struct glsl_type *type);
diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 49f2e3e498c..f5ecbb54989 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -128,7 +128,7 @@ brw_bind_rep_write_shader(struct brw_context *brw, float *color)
_mesa_AttachShader(clear->shader_prog, vs);
_mesa_DeleteShader(vs);
_mesa_BindAttribLocation(clear->shader_prog, 0, "position");
- _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta clear");
+ _mesa_ObjectLabel(GL_PROGRAM, clear->shader_prog, -1, "meta repclear");
_mesa_LinkProgram(clear->shader_prog);
clear->color_location =
@@ -200,7 +200,7 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances)
brw_draw_prims(ctx, &prim, 1, NULL,
GL_TRUE, start, start + count - 1,
- NULL, NULL);
+ NULL, 0, NULL);
}
static void
@@ -348,7 +348,7 @@ is_color_fast_clear_compatible(struct brw_context *brw,
}
for (int i = 0; i < 4; i++) {
- if (color->f[i] != 0.0 && color->f[i] != 1.0 &&
+ if (color->f[i] != 0.0f && color->f[i] != 1.0f &&
_mesa_format_has_color_component(format, i)) {
return false;
}
@@ -366,7 +366,7 @@ compute_fast_clear_color_bits(const union gl_color_union *color)
uint32_t bits = 0;
for (int i = 0; i < 4; i++) {
/* Testing for non-0 works for integer and float colors */
- if (color->f[i] != 0.0)
+ if (color->f[i] != 0.0f)
bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
}
return bits;
@@ -623,7 +623,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
* write-flush must be issued before sending any DRAW commands on that
* render target.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* If we had to fall back to plain clear for any buffers, clear those now
* by calling into meta.
@@ -677,7 +677,7 @@ brw_meta_resolve_color(struct brw_context *brw,
GLuint fbo, rbo;
struct rect rect;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
index d079197a2a9..aa6df16eb04 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -239,10 +239,10 @@ setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset,
if (mirror) {
_mesa_Uniform1f(multiplier, -scale);
- _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale);
+ _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5f) * scale);
} else {
_mesa_Uniform1f(multiplier, scale);
- _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale);
+ _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5f) * scale);
}
}
@@ -500,11 +500,11 @@ brw_meta_fbo_stencil_blit(struct brw_context *brw,
.mirror_x = mirror_x, .mirror_y = mirror_y };
adjust_mip_level(dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
brw_meta_stencil_blit(brw,
dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
void
@@ -524,7 +524,7 @@ brw_meta_stencil_updownsample(struct brw_context *brw,
if (dst->stencil_mt)
dst = dst->stencil_mt;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
_mesa_GenFramebuffers(1, &fbo);
@@ -535,7 +535,7 @@ brw_meta_stencil_updownsample(struct brw_context *brw,
GL_RENDERBUFFER, rbo);
brw_meta_stencil_blit(brw, dst, 0, 0, &dims);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_DeleteRenderbuffers(1, &rbo);
_mesa_DeleteFramebuffers(1, &fbo);
diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
index 21507b1ad2a..f39d50a69e6 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c
@@ -116,7 +116,7 @@ brw_meta_updownsample(struct brw_context *brw,
blit_bit = GL_COLOR_BUFFER_BIT;
}
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
_mesa_meta_begin(ctx, MESA_META_ALL);
_mesa_GenFramebuffers(2, fbos);
@@ -147,5 +147,5 @@ brw_meta_updownsample(struct brw_context *brw,
_mesa_meta_end(ctx);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 5a4515b582d..e9d9467d330 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -44,7 +44,8 @@
#include "main/glformats.h"
/* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void upload_drawing_rect(struct brw_context *brw)
+static void
+upload_drawing_rect(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
const struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -73,7 +74,8 @@ const struct brw_tracked_state brw_drawing_rect = {
* The state pointers in this packet are all relative to the general state
* base address set by CMD_STATE_BASE_ADDRESS, which is 0.
*/
-static void upload_pipelined_state_pointers(struct brw_context *brw )
+static void
+upload_pipelined_state_pointers(struct brw_context *brw)
{
if (brw->gen == 5) {
/* Need to flush before changing clip max threads for errata. */
@@ -104,7 +106,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
brw->ctx.NewDriverState |= BRW_NEW_PSP;
}
-static void upload_psp_urb_cbs(struct brw_context *brw )
+static void
+upload_psp_urb_cbs(struct brw_context *brw)
{
upload_pipelined_state_pointers(brw);
brw_upload_urb_fence(brw);
@@ -580,7 +583,7 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
* non-pipelined state that will need the PIPE_CONTROL workaround.
*/
if (brw->gen == 6) {
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
}
unsigned int len;
@@ -700,13 +703,11 @@ const struct brw_tracked_state brw_depthbuffer = {
.emit = brw_emit_depthbuffer,
};
-
-
-/***********************************************************************
+/**
* Polygon stipple packet
*/
-
-static void upload_polygon_stipple(struct brw_context *brw)
+static void
+upload_polygon_stipple(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
GLuint i;
@@ -728,8 +729,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
for (i = 0; i < 32; i++)
OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
- }
- else {
+ } else {
for (i = 0; i < 32; i++)
OUT_BATCH(ctx->PolygonStipple[i]);
}
@@ -745,12 +745,11 @@ const struct brw_tracked_state brw_polygon_stipple = {
.emit = upload_polygon_stipple
};
-
-/***********************************************************************
+/**
* Polygon stipple offset packet
*/
-
-static void upload_polygon_stipple_offset(struct brw_context *brw)
+static void
+upload_polygon_stipple_offset(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
@@ -785,10 +784,11 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
.emit = upload_polygon_stipple_offset
};
-/**********************************************************************
+/**
* AA Line parameters
*/
-static void upload_aa_line_parameters(struct brw_context *brw)
+static void
+upload_aa_line_parameters(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
@@ -815,11 +815,11 @@ const struct brw_tracked_state brw_aa_line_parameters = {
.emit = upload_aa_line_parameters
};
-/***********************************************************************
+/**
* Line stipple packet
*/
-
-static void upload_line_stipple(struct brw_context *brw)
+static void
+upload_line_stipple(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
GLfloat tmp;
@@ -834,13 +834,12 @@ static void upload_line_stipple(struct brw_context *brw)
if (brw->gen >= 7) {
/* in U1.16 */
- tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+ tmp = 1.0f / ctx->Line.StippleFactor;
tmpi = tmp * (1<<16);
OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
- }
- else {
+ } else {
/* in U1.13 */
- tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+ tmp = 1.0f / ctx->Line.StippleFactor;
tmpi = tmp * (1<<13);
OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
}
@@ -856,7 +855,6 @@ const struct brw_tracked_state brw_line_stipple = {
.emit = upload_line_stipple
};
-
void
brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
{
@@ -872,11 +870,9 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
ADVANCE_BATCH();
}
-
-/***********************************************************************
+/**
* Misc invariant state packets
*/
-
void
brw_upload_invariant_state(struct brw_context *brw)
{
@@ -930,7 +926,8 @@ const struct brw_tracked_state brw_invariant_state = {
* surface state objects, but not the surfaces that the surface state
* objects point to.
*/
-static void upload_state_base_address( struct brw_context *brw )
+static void
+upload_state_base_address(struct brw_context *brw)
{
/* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
* vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index e7e16b6686a..79e31d86759 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -27,19 +27,27 @@
#include "program/prog_to_nir.h"
static void
-nir_optimize(nir_shader *nir)
+nir_optimize(nir_shader *nir, bool is_scalar)
{
bool progress;
do {
progress = false;
nir_lower_vars_to_ssa(nir);
nir_validate_shader(nir);
- nir_lower_alu_to_scalar(nir);
- nir_validate_shader(nir);
+
+ if (is_scalar) {
+ nir_lower_alu_to_scalar(nir);
+ nir_validate_shader(nir);
+ }
+
progress |= nir_copy_prop(nir);
nir_validate_shader(nir);
- nir_lower_phis_to_scalar(nir);
- nir_validate_shader(nir);
+
+ if (is_scalar) {
+ nir_lower_phis_to_scalar(nir);
+ nir_validate_shader(nir);
+ }
+
progress |= nir_copy_prop(nir);
nir_validate_shader(nir);
progress |= nir_opt_dce(nir);
@@ -57,33 +65,12 @@ nir_optimize(nir_shader *nir)
} while (progress);
}
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
- int *count = (int *) state;
- nir_foreach_instr(block, instr) {
- *count = *count + 1;
- }
- return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
- int count = 0;
- nir_foreach_overload(nir, overload) {
- if (!overload->impl)
- continue;
- nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
- }
- return count;
-}
-
nir_shader *
brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
const struct gl_program *prog,
- gl_shader_stage stage)
+ gl_shader_stage stage,
+ bool is_scalar)
{
struct gl_context *ctx = &brw->ctx;
const nir_shader_compiler_options *options =
@@ -100,16 +87,15 @@ brw_create_nir(struct brw_context *brw,
}
nir_validate_shader(nir);
- brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage);
+ brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar);
static GLuint msg_id = 0;
_mesa_gl_debug(&brw->ctx, &msg_id,
MESA_DEBUG_SOURCE_SHADER_COMPILER,
MESA_DEBUG_TYPE_OTHER,
MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s NIR shader: %d inst\n",
- _mesa_shader_stage_to_abbrev(stage),
- count_nir_instrs(nir));
+ "%s NIR shader:\n",
+ _mesa_shader_stage_to_abbrev(stage));
return nir;
}
@@ -118,7 +104,7 @@ void
brw_process_nir(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct gl_shader_program *shader_prog,
- gl_shader_stage stage)
+ gl_shader_stage stage, bool is_scalar)
{
bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
@@ -134,22 +120,33 @@ brw_process_nir(nir_shader *nir,
nir_split_var_copies(nir);
nir_validate_shader(nir);
- nir_optimize(nir);
+ nir_optimize(nir, is_scalar);
/* Lower a bunch of stuff */
nir_lower_var_copies(nir);
nir_validate_shader(nir);
/* Get rid of split copies */
- nir_optimize(nir);
+ nir_optimize(nir, is_scalar);
+
+ if (is_scalar) {
+ nir_assign_var_locations_direct_first(nir, &nir->uniforms,
+ &nir->num_direct_uniforms,
+ &nir->num_uniforms,
+ is_scalar);
+ nir_assign_var_locations(&nir->outputs, &nir->num_outputs, is_scalar);
+ } else {
+ nir_assign_var_locations(&nir->uniforms,
+ &nir->num_uniforms,
+ is_scalar);
- nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
- &nir->num_direct_uniforms,
- &nir->num_uniforms);
- nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
- nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
+ foreach_list_typed(nir_variable, var, node, &nir->outputs)
+ var->data.driver_location = var->data.location;
+ }
+ nir_assign_var_locations(&nir->inputs, &nir->num_inputs, is_scalar);
+
+ nir_lower_io(nir, is_scalar);
- nir_lower_io(nir);
nir_validate_shader(nir);
nir_remove_dead_variables(nir);
@@ -168,7 +165,7 @@ brw_process_nir(nir_shader *nir,
nir_lower_atomics(nir);
nir_validate_shader(nir);
- nir_optimize(nir);
+ nir_optimize(nir, is_scalar);
if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
@@ -201,9 +198,14 @@ brw_process_nir(nir_shader *nir,
nir_print_shader(nir, stderr);
}
- nir_convert_from_ssa(nir);
+ nir_convert_from_ssa(nir, is_scalar);
nir_validate_shader(nir);
+ if (!is_scalar) {
+ nir_lower_vec_to_movs(nir);
+ nir_validate_shader(nir);
+ }
+
/* This is the last pass we run before we start emitting stuff. It
* determines when we need to insert boolean resolves on Gen <= 5. We
* run it last because it stashes data in instr->pass_flags and we don't
@@ -220,3 +222,42 @@ brw_process_nir(nir_shader *nir,
nir_print_shader(nir, stderr);
}
}
+
+enum brw_reg_type
+brw_type_for_nir_type(nir_alu_type type)
+{
+ switch (type) {
+ case nir_type_unsigned:
+ return BRW_REGISTER_TYPE_UD;
+ case nir_type_bool:
+ case nir_type_int:
+ return BRW_REGISTER_TYPE_D;
+ case nir_type_float:
+ return BRW_REGISTER_TYPE_F;
+ default:
+ unreachable("unknown type");
+ }
+
+ return BRW_REGISTER_TYPE_F;
+}
+
+/* Returns the glsl_base_type corresponding to a nir_alu_type.
+ * This is used by both brw_vec4_nir and brw_fs_nir.
+ */
+enum glsl_base_type
+brw_glsl_base_type_for_nir_type(nir_alu_type type)
+{
+ switch (type) {
+ case nir_type_float:
+ return GLSL_TYPE_FLOAT;
+
+ case nir_type_int:
+ return GLSL_TYPE_INT;
+
+ case nir_type_unsigned:
+ return GLSL_TYPE_UINT;
+
+ default:
+ unreachable("bad type");
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 8487cef0901..5a1358890cc 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -24,6 +24,7 @@
#pragma once
#include "brw_context.h"
+#include "brw_reg.h"
#include "glsl/nir/nir.h"
#ifdef __cplusplus
@@ -77,13 +78,18 @@ void brw_nir_analyze_boolean_resolves(nir_shader *nir);
nir_shader *brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
const struct gl_program *prog,
- gl_shader_stage stage);
+ gl_shader_stage stage,
+ bool is_scalar);
+
+enum brw_reg_type brw_type_for_nir_type(nir_alu_type type);
+
+enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
void
brw_process_nir(nir_shader *nir,
const struct brw_device_info *devinfo,
const struct gl_shader_program *shader_prog,
- gl_shader_stage stage);
+ gl_shader_stage stage, bool is_scalar);
#ifdef __cplusplus
}
diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
index f0b018cf84a..c995d2b7e2d 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c
@@ -43,8 +43,8 @@
static uint8_t
get_resolve_status_for_src(nir_src *src)
{
- nir_instr *src_instr = nir_src_get_parent_instr(src);
- if (src_instr) {
+ if (src->is_ssa) {
+ nir_instr *src_instr = src->ssa->parent_instr;
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
/* If the source instruction needs resolve, then from the perspective
@@ -66,8 +66,8 @@ get_resolve_status_for_src(nir_src *src)
static bool
src_mark_needs_resolve(nir_src *src, void *void_state)
{
- nir_instr *src_instr = nir_src_get_parent_instr(src);
- if (src_instr) {
+ if (src->is_ssa) {
+ nir_instr *src_instr = src->ssa->parent_instr;
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
/* If the source instruction is unresolved, then mark it as needing
@@ -109,28 +109,27 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
uint8_t resolve_status;
nir_alu_instr *alu = nir_instr_as_alu(instr);
switch (alu->op) {
- case nir_op_flt:
- case nir_op_ilt:
- case nir_op_ult:
- case nir_op_fge:
- case nir_op_ige:
- case nir_op_uge:
- case nir_op_feq:
- case nir_op_ieq:
- case nir_op_fne:
- case nir_op_ine:
- case nir_op_f2b:
- case nir_op_i2b:
- /* This instruction will turn into a CMP when we actually emit
- * so the result will have to be resolved before it can be used.
+ case nir_op_bany2:
+ case nir_op_bany3:
+ case nir_op_bany4:
+ case nir_op_ball_fequal2:
+ case nir_op_ball_iequal2:
+ case nir_op_ball_fequal3:
+ case nir_op_ball_iequal3:
+ case nir_op_ball_fequal4:
+ case nir_op_ball_iequal4:
+ case nir_op_bany_fnequal2:
+ case nir_op_bany_inequal2:
+ case nir_op_bany_fnequal3:
+ case nir_op_bany_inequal3:
+ case nir_op_bany_fnequal4:
+ case nir_op_bany_inequal4:
+ /* These are only implemented by the vec4 backend and its
+ * implementation emits resolved booleans. At some point in the
+ * future, this may change and we'll have to remove some of the
+ * above cases.
*/
- resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED;
-
- /* Even though the destination is allowed to be left unresolved,
- * the sources are treated as regular integers or floats so
- * they need to be resolved.
- */
- nir_foreach_src(instr, src_mark_needs_resolve, NULL);
+ resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE;
break;
case nir_op_imov:
@@ -169,14 +168,28 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
}
default:
- resolve_status = BRW_NIR_NON_BOOLEAN;
+ if (nir_op_infos[alu->op].output_type == nir_type_bool) {
+ /* This instructions will turn into a CMP when we actually emit
+ * them so the result will have to be resolved before it can be
+ * used.
+ */
+ resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED;
+
+ /* Even though the destination is allowed to be left
+ * unresolved, the sources are treated as regular integers or
+ * floats so they need to be resolved.
+ */
+ nir_foreach_src(instr, src_mark_needs_resolve, NULL);
+ } else {
+ resolve_status = BRW_NIR_NON_BOOLEAN;
+ }
}
- /* If the destination is SSA-like, go ahead allow unresolved booleans.
+ /* If the destination is SSA, go ahead allow unresolved booleans.
* If the destination register doesn't have a well-defined parent_instr
* we need to resolve immediately.
*/
- if (alu->dest.dest.reg.reg->parent_instr == NULL &&
+ if (!alu->dest.dest.is_ssa &&
resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) {
resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_performance_monitor.c b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
index 2c8cd491a8e..7e90e8a8fa1 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_monitor.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_monitor.c
@@ -581,7 +581,7 @@ snapshot_statistics_registers(struct brw_context *brw,
const int group = PIPELINE_STATS_COUNTERS;
const int num_counters = ctx->PerfMonitor.Groups[group].NumCounters;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
for (int i = 0; i < num_counters; i++) {
if (BITSET_TEST(monitor->base.ActiveCounters[group], i)) {
@@ -687,7 +687,7 @@ stop_oa_counters(struct brw_context *brw)
* The amount of batch space it takes to emit an MI_REPORT_PERF_COUNT snapshot,
* including the required PIPE_CONTROL flushes.
*
- * Sandybridge is the worst case scenario: intel_batchbuffer_emit_mi_flush
+ * Sandybridge is the worst case scenario: brw_emit_mi_flush
* expands to three PIPE_CONTROLs which are 4 DWords each. We have to flush
* before and after MI_REPORT_PERF_COUNT, so multiply by two. Finally, add
* the 3 DWords for MI_REPORT_PERF_COUNT itself.
@@ -710,10 +710,10 @@ emit_mi_report_perf_count(struct brw_context *brw,
/* Make sure the commands to take a snapshot fits in a single batch. */
intel_batchbuffer_require_space(brw, MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4,
RENDER_RING);
- int batch_used = brw->batch.used;
+ int batch_used = USED_BATCH(brw->batch);
/* Reports apparently don't always get written unless we flush first. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (brw->gen == 5) {
/* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all
@@ -751,10 +751,10 @@ emit_mi_report_perf_count(struct brw_context *brw,
}
/* Reports apparently don't always get written unless we flush after. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
(void) batch_used;
- assert(brw->batch.used - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4);
+ assert(USED_BATCH(brw->batch) - batch_used <= MI_REPORT_PERF_COUNT_BATCH_DWORDS * 4);
}
/**
@@ -1386,7 +1386,7 @@ void
brw_perf_monitor_new_batch(struct brw_context *brw)
{
assert(brw->batch.ring == RENDER_RING);
- assert(brw->gen < 6 || brw->batch.used == 0);
+ assert(brw->gen < 6 || USED_BATCH(brw->batch) == 0);
if (brw->perfmon.oa_users == 0)
return;
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
new file mode 100644
index 00000000000..7ee3cb680f7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+
+/**
+ * According to the latest documentation, any PIPE_CONTROL with the
+ * "Command Streamer Stall" bit set must also have another bit set,
+ * with five different options:
+ *
+ * - Render Target Cache Flush
+ * - Depth Cache Flush
+ * - Stall at Pixel Scoreboard
+ * - Post-Sync Operation
+ * - Depth Stall
+ *
+ * I chose "Stall at Pixel Scoreboard" since we've used it effectively
+ * in the past, but the choice is fairly arbitrary.
+ */
+static void
+gen8_add_cs_stall_workaround_bits(uint32_t *flags)
+{
+ uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_WRITE_IMMEDIATE |
+ PIPE_CONTROL_WRITE_DEPTH_COUNT |
+ PIPE_CONTROL_WRITE_TIMESTAMP |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
+ PIPE_CONTROL_DEPTH_STALL;
+
+ /* If we're doing a CS stall, and don't already have one of the
+ * workaround bits set, add "Stall at Pixel Scoreboard."
+ */
+ if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
+ *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+}
+
+/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
+ *
+ * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
+ * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
+ *
+ * Note that the kernel does CS stalls between batches, so we only need
+ * to count them within a batch.
+ */
+static uint32_t
+gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
+{
+ if (brw->gen == 7 && !brw->is_haswell) {
+ if (flags & PIPE_CONTROL_CS_STALL) {
+ /* If we're doing a CS stall, reset the counter and carry on. */
+ brw->pipe_controls_since_last_cs_stall = 0;
+ return 0;
+ }
+
+ /* If this is the fourth pipe control without a CS stall, do one now. */
+ if (++brw->pipe_controls_since_last_cs_stall == 4) {
+ brw->pipe_controls_since_last_cs_stall = 0;
+ return PIPE_CONTROL_CS_STALL;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Emit a PIPE_CONTROL with various flushing flags.
+ *
+ * The caller is responsible for deciding what flags are appropriate for the
+ * given generation.
+ */
+void
+brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
+{
+ if (brw->gen >= 8) {
+ gen8_add_cs_stall_workaround_bits(&flags);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+ OUT_BATCH(flags);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else if (brw->gen >= 6) {
+ flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+ OUT_BATCH(flags);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
+}
+
+/**
+ * Emit a PIPE_CONTROL that writes to a buffer object.
+ *
+ * \p flags should contain one of the following items:
+ * - PIPE_CONTROL_WRITE_IMMEDIATE
+ * - PIPE_CONTROL_WRITE_TIMESTAMP
+ * - PIPE_CONTROL_WRITE_DEPTH_COUNT
+ */
+void
+brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+ drm_intel_bo *bo, uint32_t offset,
+ uint32_t imm_lower, uint32_t imm_upper)
+{
+ if (brw->gen >= 8) {
+ gen8_add_cs_stall_workaround_bits(&flags);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+ OUT_BATCH(flags);
+ OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ } else if (brw->gen >= 6) {
+ flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
+
+ /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
+ * on later platforms. We always use PPGTT on Gen7+.
+ */
+ unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
+
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+ OUT_BATCH(flags);
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ gen6_gtt | offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
+ OUT_BATCH(imm_lower);
+ OUT_BATCH(imm_upper);
+ ADVANCE_BATCH();
+ }
+}
+
+/**
+ * Restriction [DevSNB, DevIVB]:
+ *
+ * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
+ * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
+ * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
+ * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
+ * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
+ * unless SW can otherwise guarantee that the pipeline from WM onwards is
+ * already flushed (e.g., via a preceding MI_FLUSH).
+ */
+void
+brw_emit_depth_stall_flushes(struct brw_context *brw)
+{
+ assert(brw->gen >= 6 && brw->gen <= 9);
+
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
+}
+
+/**
+ * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
+ * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
+ * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+ * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+ * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
+ * to be sent before any combination of VS associated 3DSTATE."
+ */
+void
+gen7_emit_vs_workaround_flush(struct brw_context *brw)
+{
+ assert(brw->gen == 7);
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_WRITE_IMMEDIATE
+ | PIPE_CONTROL_DEPTH_STALL,
+ brw->workaround_bo, 0,
+ 0, 0);
+}
+
+
+/**
+ * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
+ */
+void
+gen7_emit_cs_stall_flush(struct brw_context *brw)
+{
+ brw_emit_pipe_control_write(brw,
+ PIPE_CONTROL_CS_STALL
+ | PIPE_CONTROL_WRITE_IMMEDIATE,
+ brw->workaround_bo, 0,
+ 0, 0);
+}
+
+
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6. From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ * "1 of the following must also be set:
+ * - Render Target Cache Flush Enable ([12] of DW1)
+ * - Depth Cache Flush Enable ([0] of DW1)
+ * - Stall at Pixel Scoreboard ([1] of DW1)
+ * - Depth Stall ([13] of DW1)
+ * - Post-Sync Operation ([13] of DW1)
+ * - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it. Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either. Notify enable is IRQs, which aren't
+ * really our business. That leaves only stall at scoreboard.
+ */
+void
+brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
+{
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+
+ brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
+ brw->workaround_bo, 0, 0, 0);
+}
+
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+brw_emit_mi_flush(struct brw_context *brw)
+{
+ if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
+ BEGIN_BATCH_BLT(4);
+ OUT_BATCH(MI_FLUSH_DW);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
+ int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
+ if (brw->gen >= 6) {
+ if (brw->gen == 9) {
+ /* Hardware workaround: SKL
+ *
+ * Emit Pipe Control with all bits set to zero before emitting
+ * a Pipe Control with VF Cache Invalidate set.
+ */
+ brw_emit_pipe_control_flush(brw, 0);
+ }
+
+ flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_VF_CACHE_INVALIDATE |
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_CS_STALL;
+
+ if (brw->gen == 6) {
+ /* Hardware workaround: SNB B-Spec says:
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
+ * Flush Enable =1, a PIPE_CONTROL with any non-zero
+ * post-sync-op is required.
+ */
+ brw_emit_post_sync_nonzero_flush(brw);
+ }
+ }
+ brw_emit_pipe_control_flush(brw, flags);
+ }
+
+ brw_render_cache_set_clear(brw);
+}
+
+int
+brw_init_pipe_control(struct brw_context *brw,
+ const struct brw_device_info *devinfo)
+{
+ if (devinfo->gen < 6)
+ return 0;
+
+ /* We can't just use brw_state_batch to get a chunk of space for
+ * the gen6 workaround because it involves actually writing to
+ * the buffer, and the kernel doesn't let us write to the batch.
+ */
+ brw->workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
+ "pipe_control workaround",
+ 4096, 4096);
+ if (brw->workaround_bo == NULL)
+ return -ENOMEM;
+
+ brw->pipe_controls_since_last_cs_stall = 0;
+
+ return 0;
+}
+
+void
+brw_fini_pipe_control(struct brw_context *brw)
+{
+ drm_intel_bo_unreference(brw->workaround_bo);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
index 2c7a7e8b8dd..6ed79d7cb75 100644
--- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c
+++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
@@ -161,7 +161,8 @@ brw_handle_primitive_restart(struct gl_context *ctx,
/* Cut index should work for primitive restart, so use it
*/
brw->prim_restart.enable_cut_index = true;
- brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, indirect);
+ brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, 0,
+ indirect);
brw->prim_restart.enable_cut_index = false;
} else {
/* Not all the primitive draw modes are supported by the cut index,
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index ea128ccb670..5a54cd39076 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -143,7 +143,7 @@ brwProgramStringNotify(struct gl_context *ctx,
brw_add_texrect_params(prog);
if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
- prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT);
+ prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
}
brw_fs_precompile(ctx, NULL, prog);
@@ -169,7 +169,8 @@ brwProgramStringNotify(struct gl_context *ctx,
brw_add_texrect_params(prog);
if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
- prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX);
+ prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
+ brw->intelScreen->compiler->scalar_vs);
}
brw_vs_precompile(ctx, NULL, prog);
@@ -196,7 +197,7 @@ brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
PIPE_CONTROL_NO_WRITE |
PIPE_CONTROL_CS_STALL);
- assert(brw->gen >= 7 && brw->gen <= 8);
+ assert(brw->gen >= 7 && brw->gen <= 9);
if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
GL_ELEMENT_ARRAY_BARRIER_BIT |
@@ -574,10 +575,13 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
struct gl_shader *shader, struct gl_program *prog)
{
if (shader_prog) {
- fprintf(stderr,
- "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name);
- _mesa_print_ir(stderr, shader->ir, NULL);
- fprintf(stderr, "\n\n");
+ if (shader->ir) {
+ fprintf(stderr,
+ "GLSL IR for native %s shader %d:\n",
+ stage, shader_prog->Name);
+ _mesa_print_ir(stderr, shader->ir, NULL);
+ fprintf(stderr, "\n\n");
+ }
} else {
fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
stage, prog->Id, stage);
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index aea4d9b77d3..d6b012c392e 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -497,13 +497,22 @@ brw_get_timestamp(struct gl_context *ctx)
struct brw_context *brw = brw_context(ctx);
uint64_t result = 0;
- drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+ switch (brw->intelScreen->hw_has_timestamp) {
+ case 3: /* New kernel, always full 36bit accuracy */
+ drm_intel_reg_read(brw->bufmgr, TIMESTAMP | 1, &result);
+ break;
+ case 2: /* 64bit kernel, result is left-shifted by 32bits, losing 4bits */
+ drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+ result = result >> 32;
+ break;
+ case 1: /* 32bit kernel, result is 36bit wide but may be inaccurate! */
+ drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+ break;
+ }
/* See logic in brw_queryobj_get_results() */
- result = result >> 32;
result *= 80;
result &= (1ull << 36) - 1;
-
return result;
}
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index c8b134103bb..31806f769bd 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -853,7 +853,7 @@ static inline struct brw_reg
spread(struct brw_reg reg, unsigned s)
{
if (s) {
- assert(is_power_of_two(s));
+ assert(_mesa_is_pow_two(s));
if (reg.hstride)
reg.hstride += cvt(s) - 1;
@@ -950,6 +950,12 @@ brw_set_writemask(struct brw_reg reg, unsigned mask)
return reg;
}
+static inline unsigned
+brw_writemask_for_size(unsigned n)
+{
+ return (1 << n) - 1;
+}
+
static inline struct brw_reg
negate(struct brw_reg reg)
{
diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c
index 22ccbfe8461..2021bb3b460 100644
--- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
@@ -425,11 +425,11 @@ brw_update_sampler_state(struct brw_context *brw,
/* Enable anisotropic filtering if desired. */
unsigned max_anisotropy = BRW_ANISORATIO_2;
- if (sampler->MaxAnisotropy > 1.0) {
+ if (sampler->MaxAnisotropy > 1.0f) {
min_filter = BRW_MAPFILTER_ANISOTROPIC;
mag_filter = BRW_MAPFILTER_ANISOTROPIC;
- if (sampler->MaxAnisotropy > 2.0) {
+ if (sampler->MaxAnisotropy > 2.0f) {
max_anisotropy =
MIN2((sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16);
}
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index ee0add5d765..b49961fff68 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1314,8 +1314,8 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
* single-result send is probably actually reducing register
* pressure.
*/
- if (inst->regs_written <= inst->dst.width / 8 &&
- chosen_inst->regs_written > chosen_inst->dst.width / 8) {
+ if (inst->regs_written <= inst->exec_size / 8 &&
+ chosen_inst->regs_written > chosen_inst->exec_size / 8) {
chosen = n;
continue;
} else if (inst->regs_written > chosen_inst->regs_written) {
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 5d9892214a9..b126f82ebbf 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -45,7 +45,7 @@ static void upload_sf_vp(struct brw_context *brw)
struct gl_context *ctx = &brw->ctx;
struct brw_sf_viewport *sfv;
GLfloat y_scale, y_bias;
- double scale[3], translate[3];
+ float scale[3], translate[3];
const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
@@ -220,7 +220,7 @@ static void upload_sf_unit( struct brw_context *brw )
/* _NEW_LINE */
sf->sf6.line_width =
- CLAMP(ctx->Line.Width, 1.0, ctx->Const.MaxLineWidth) * (1<<1);
+ CLAMP(ctx->Line.Width, 1.0f, ctx->Const.MaxLineWidth) * (1<<1);
sf->sf6.line_endcap_aa_region_width = 1;
if (ctx->Line.SmoothFlag)
@@ -259,9 +259,10 @@ static void upload_sf_unit( struct brw_context *brw )
/* _NEW_POINT */
sf->sf7.sprite_point = ctx->Point.PointSprite;
- sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size,
- ctx->Point.MinSize,
- ctx->Point.MaxSize)), 1, 255) * (1<<3);
+ sf->sf7.point_size = CLAMP(rintf(CLAMP(ctx->Point.Size,
+ ctx->Point.MinSize,
+ ctx->Point.MaxSize)), 1.0f, 255.0f) *
+ (1<<3);
/* _NEW_PROGRAM | _NEW_POINT */
sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
ctx->Point._Attenuated);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 06393c8ff2b..67b8dde7cc8 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -113,22 +113,32 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
(i == MESA_SHADER_FRAGMENT);
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
compiler->glsl_compiler_options[i].LowerClipDistance = true;
+
+ /* !ARB_gpu_shader5 */
+ if (devinfo->gen < 7)
+ compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
}
compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = true;
compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
- if (compiler->scalar_vs) {
- /* If we're using the scalar backend for vertex shaders, we need to
- * configure these accordingly.
- */
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
- compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+ if (compiler->scalar_vs || brw_env_var_as_boolean("INTEL_USE_NIR", false)) {
+ if (compiler->scalar_vs) {
+ /* If we're using the scalar backend for vertex shaders, we need to
+ * configure these accordingly.
+ */
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
+ compiler->glsl_compiler_options[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+ }
compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions = nir_options;
}
+ if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) {
+ compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].NirOptions = nir_options;
+ }
+
compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions = nir_options;
compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions = nir_options;
@@ -229,7 +239,8 @@ brw_lower_packing_builtins(struct brw_context *brw,
}
static void
-process_glsl_ir(struct brw_context *brw,
+process_glsl_ir(gl_shader_stage stage,
+ struct brw_context *brw,
struct gl_shader_program *shader_prog,
struct gl_shader *shader)
{
@@ -255,7 +266,9 @@ process_glsl_ir(struct brw_context *brw,
EXP_TO_EXP2 |
LOG_TO_LOG2 |
bitfield_insert |
- LDEXP_TO_ARITH);
+ LDEXP_TO_ARITH |
+ CARRY_TO_ARITH |
+ BORROW_TO_ARITH);
/* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this,
* if-statements need to be flattened.
@@ -275,15 +288,17 @@ process_glsl_ir(struct brw_context *brw,
lower_quadop_vector(shader->ir, false);
bool lowered_variable_indexing =
- lower_variable_index_to_cond_assign(shader->ir,
+ lower_variable_index_to_cond_assign((gl_shader_stage)stage,
+ shader->ir,
options->EmitNoIndirectInput,
options->EmitNoIndirectOutput,
options->EmitNoIndirectTemp,
options->EmitNoIndirectUniform);
if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
- perf_debug("Unsupported form of variable indexing in FS; falling "
- "back to very inefficient code generation\n");
+ perf_debug("Unsupported form of variable indexing in %s; falling "
+ "back to very inefficient code generation\n",
+ _mesa_shader_stage_to_abbrev(shader->Stage));
}
lower_ubo_reference(shader, shader->ir);
@@ -308,7 +323,7 @@ process_glsl_ir(struct brw_context *brw,
} while (progress);
if (options->NirOptions != NULL)
- lower_output_reads(shader->ir);
+ lower_output_reads(stage, shader->ir);
validate_ir_tree(shader->ir);
@@ -352,7 +367,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
_mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
- process_glsl_ir(brw, shProg, shader);
+ process_glsl_ir((gl_shader_stage) stage, brw, shProg, shader);
/* Make a pass over the IR to add state references for any built-in
* uniforms that are used. This has to be done now (during linking).
@@ -387,8 +402,10 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
brw_add_texrect_params(prog);
- if (options->NirOptions)
- prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
+ if (options->NirOptions) {
+ prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+ is_scalar_shader_stage(brw, stage));
+ }
_mesa_reference_program(ctx, &prog, NULL);
}
@@ -422,6 +439,7 @@ brw_type_for_base_type(const struct glsl_type *type)
return BRW_REGISTER_TYPE_F;
case GLSL_TYPE_INT:
case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_SUBROUTINE:
return BRW_REGISTER_TYPE_D;
case GLSL_TYPE_UINT:
return BRW_REGISTER_TYPE_UD;
@@ -528,6 +546,8 @@ brw_instruction_name(enum opcode op)
return opcode_descs[op].name;
case FS_OPCODE_FB_WRITE:
return "fb_write";
+ case FS_OPCODE_FB_WRITE_LOGICAL:
+ return "fb_write_logical";
case FS_OPCODE_BLORP_FB_WRITE:
return "blorp_fb_write";
case FS_OPCODE_REP_FB_WRITE:
@@ -556,43 +576,80 @@ brw_instruction_name(enum opcode op)
case SHADER_OPCODE_TEX:
return "tex";
+ case SHADER_OPCODE_TEX_LOGICAL:
+ return "tex_logical";
case SHADER_OPCODE_TXD:
return "txd";
+ case SHADER_OPCODE_TXD_LOGICAL:
+ return "txd_logical";
case SHADER_OPCODE_TXF:
return "txf";
+ case SHADER_OPCODE_TXF_LOGICAL:
+ return "txf_logical";
case SHADER_OPCODE_TXL:
return "txl";
+ case SHADER_OPCODE_TXL_LOGICAL:
+ return "txl_logical";
case SHADER_OPCODE_TXS:
return "txs";
+ case SHADER_OPCODE_TXS_LOGICAL:
+ return "txs_logical";
case FS_OPCODE_TXB:
return "txb";
+ case FS_OPCODE_TXB_LOGICAL:
+ return "txb_logical";
case SHADER_OPCODE_TXF_CMS:
return "txf_cms";
+ case SHADER_OPCODE_TXF_CMS_LOGICAL:
+ return "txf_cms_logical";
case SHADER_OPCODE_TXF_UMS:
return "txf_ums";
+ case SHADER_OPCODE_TXF_UMS_LOGICAL:
+ return "txf_ums_logical";
case SHADER_OPCODE_TXF_MCS:
return "txf_mcs";
+ case SHADER_OPCODE_TXF_MCS_LOGICAL:
+ return "txf_mcs_logical";
case SHADER_OPCODE_LOD:
return "lod";
+ case SHADER_OPCODE_LOD_LOGICAL:
+ return "lod_logical";
case SHADER_OPCODE_TG4:
return "tg4";
+ case SHADER_OPCODE_TG4_LOGICAL:
+ return "tg4_logical";
case SHADER_OPCODE_TG4_OFFSET:
return "tg4_offset";
+ case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
+ return "tg4_offset_logical";
+
case SHADER_OPCODE_SHADER_TIME_ADD:
return "shader_time_add";
case SHADER_OPCODE_UNTYPED_ATOMIC:
return "untyped_atomic";
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ return "untyped_atomic_logical";
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
return "untyped_surface_read";
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ return "untyped_surface_read_logical";
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
return "untyped_surface_write";
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+ return "untyped_surface_write_logical";
case SHADER_OPCODE_TYPED_ATOMIC:
return "typed_atomic";
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+ return "typed_atomic_logical";
case SHADER_OPCODE_TYPED_SURFACE_READ:
return "typed_surface_read";
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ return "typed_surface_read_logical";
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
return "typed_surface_write";
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
+ return "typed_surface_write_logical";
case SHADER_OPCODE_MEMORY_FENCE:
return "memory_fence";
@@ -653,8 +710,6 @@ brw_instruction_name(enum opcode op)
case FS_OPCODE_DISCARD_JUMP:
return "discard_jump";
- case FS_OPCODE_SET_OMASK:
- return "set_omask";
case FS_OPCODE_SET_SAMPLE_ID:
return "set_sample_id";
case FS_OPCODE_SET_SIMD4X2_OFFSET:
@@ -724,6 +779,8 @@ brw_instruction_name(enum opcode op)
return "cs_terminate";
case SHADER_OPCODE_BARRIER:
return "barrier";
+ case SHADER_OPCODE_MULH:
+ return "mulh";
}
unreachable("not reached");
@@ -942,6 +999,7 @@ backend_instruction::is_commutative() const
case BRW_OPCODE_XOR:
case BRW_OPCODE_ADD:
case BRW_OPCODE_MUL:
+ case SHADER_OPCODE_MULH:
return true;
case BRW_OPCODE_SEL:
/* MIN and MAX are commutative. */
@@ -1049,6 +1107,7 @@ backend_instruction::can_do_saturate() const
case BRW_OPCODE_MATH:
case BRW_OPCODE_MOV:
case BRW_OPCODE_MUL:
+ case SHADER_OPCODE_MULH:
case BRW_OPCODE_PLN:
case BRW_OPCODE_RNDD:
case BRW_OPCODE_RNDE:
@@ -1147,10 +1206,14 @@ backend_instruction::has_side_effects() const
{
switch (opcode) {
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_FB_WRITE:
@@ -1356,3 +1419,34 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_
/* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
}
+
+void
+backend_shader::setup_image_uniform_values(const gl_uniform_storage *storage)
+{
+ const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
+
+ for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
+ const unsigned image_idx = storage->image[stage].index + i;
+ const brw_image_param *param = &stage_prog_data->image_param[image_idx];
+
+ /* Upload the brw_image_param structure. The order is expected to match
+ * the BRW_IMAGE_PARAM_*_OFFSET defines.
+ */
+ setup_vector_uniform_values(
+ (const gl_constant_value *)&param->surface_idx, 1);
+ setup_vector_uniform_values(
+ (const gl_constant_value *)param->offset, 2);
+ setup_vector_uniform_values(
+ (const gl_constant_value *)param->size, 3);
+ setup_vector_uniform_values(
+ (const gl_constant_value *)param->stride, 4);
+ setup_vector_uniform_values(
+ (const gl_constant_value *)param->tiling, 3);
+ setup_vector_uniform_values(
+ (const gl_constant_value *)param->swizzling, 2);
+
+ brw_mark_surface_used(
+ stage_prog_data,
+ stage_prog_data->binding_table.image_start + image_idx);
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index b2c1a0b8d69..2cc97f24972 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -26,6 +26,7 @@
#include "brw_defines.h"
#include "main/compiler.h"
#include "glsl/ir.h"
+#include "program/prog_parameter.h"
#ifdef __cplusplus
#include "brw_ir_allocator.h"
@@ -268,6 +269,10 @@ public:
void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
virtual void invalidate_live_intervals() = 0;
+
+ virtual void setup_vector_uniform_values(const gl_constant_value *values,
+ unsigned n) = 0;
+ void setup_image_uniform_values(const gl_uniform_storage *storage);
};
uint32_t brw_texture_offset(int *offsets, unsigned num_components);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 987672f8815..78a1f874b4e 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -72,8 +72,10 @@ extern const struct brw_tracked_state brw_vs_samplers;
extern const struct brw_tracked_state brw_gs_samplers;
extern const struct brw_tracked_state brw_vs_ubo_surfaces;
extern const struct brw_tracked_state brw_vs_abo_surfaces;
+extern const struct brw_tracked_state brw_vs_image_surfaces;
extern const struct brw_tracked_state brw_gs_ubo_surfaces;
extern const struct brw_tracked_state brw_gs_abo_surfaces;
+extern const struct brw_tracked_state brw_gs_image_surfaces;
extern const struct brw_tracked_state brw_vs_unit;
extern const struct brw_tracked_state brw_gs_prog;
extern const struct brw_tracked_state brw_wm_prog;
@@ -84,7 +86,9 @@ extern const struct brw_tracked_state brw_gs_binding_table;
extern const struct brw_tracked_state brw_vs_binding_table;
extern const struct brw_tracked_state brw_wm_ubo_surfaces;
extern const struct brw_tracked_state brw_wm_abo_surfaces;
+extern const struct brw_tracked_state brw_wm_image_surfaces;
extern const struct brw_tracked_state brw_cs_abo_surfaces;
+extern const struct brw_tracked_state brw_cs_image_surfaces;
extern const struct brw_tracked_state brw_wm_unit;
extern const struct brw_tracked_state brw_interpolation_map;
@@ -121,7 +125,6 @@ extern const struct brw_tracked_state gen6_wm_state;
extern const struct brw_tracked_state gen7_depthbuffer;
extern const struct brw_tracked_state gen7_clip_state;
extern const struct brw_tracked_state gen7_disable_stages;
-extern const struct brw_tracked_state gen7_gs_push_constants;
extern const struct brw_tracked_state gen7_gs_state;
extern const struct brw_tracked_state gen7_ps_state;
extern const struct brw_tracked_state gen7_push_constant_space;
@@ -132,6 +135,7 @@ extern const struct brw_tracked_state gen7_sol_state;
extern const struct brw_tracked_state gen7_urb;
extern const struct brw_tracked_state gen7_vs_state;
extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state gen7_hw_binding_tables;
extern const struct brw_tracked_state haswell_cut_index;
extern const struct brw_tracked_state gen8_blend_state;
extern const struct brw_tracked_state gen8_disable_stages;
@@ -266,15 +270,6 @@ void brw_update_renderbuffer_surfaces(struct brw_context *brw,
uint32_t render_target_start,
uint32_t *surf_offset);
-/* gen7_wm_state.c */
-void
-gen7_upload_ps_state(struct brw_context *brw,
- const struct gl_fragment_program *fp,
- const struct brw_stage_state *stage_state,
- const struct brw_wm_prog_data *prog_data,
- bool enable_dual_src_blend, unsigned sample_mask,
- unsigned fast_clear_op);
-
/* gen7_wm_surface_state.c */
uint32_t gen7_surface_tiling_mode(uint32_t tiling);
uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l);
@@ -372,6 +367,20 @@ gen7_upload_constant_state(struct brw_context *brw,
const struct brw_stage_state *stage_state,
bool active, unsigned opcode);
+void gen7_rs_control(struct brw_context *brw, int enable);
+
+void gen7_edit_hw_binding_table_entry(struct brw_context *brw,
+ gl_shader_stage stage,
+ uint32_t index,
+ uint32_t surf_offset);
+void gen7_update_binding_table_from_array(struct brw_context *brw,
+ gl_shader_stage stage,
+ const uint32_t* binding_table,
+ int num_surfaces);
+void gen7_enable_hw_binding_tables(struct brw_context *brw);
+void gen7_disable_hw_binding_tables(struct brw_context *brw);
+void gen7_reset_hw_bt_pool_offsets(struct brw_context *brw);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
index a405a80ef6e..d79e0ea00c7 100644
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -87,7 +87,7 @@ brw_annotate_aub(struct brw_context *brw)
drm_intel_aub_annotation annotations[annotation_count];
int a = 0;
make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0,
- 4*brw->batch.used);
+ 4 * USED_BATCH(brw->batch));
for (int i = brw->state_batch_count; i-- > 0; ) {
uint32_t type = brw->state_batch_list[i].type;
uint32_t start_offset = brw->state_batch_list[i].offset;
@@ -136,7 +136,7 @@ __brw_state_batch(struct brw_context *brw,
* space, then flush and try again.
*/
if (batch->state_batch_offset < size ||
- offset < 4*batch->used + batch->reserved_space) {
+ offset < 4 * USED_BATCH(*batch) + batch->reserved_space) {
intel_batchbuffer_flush(brw);
offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 24778d25379..5effb4c8829 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -200,36 +200,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
}
/**
- * Attempts to find an item in the cache with identical data and aux
- * data to use
+ * Attempts to find an item in the cache with identical data.
*/
-static bool
-brw_try_upload_using_copy(struct brw_cache *cache,
- struct brw_cache_item *result_item,
- const void *data,
- const void *aux)
+static const struct brw_cache_item *
+brw_lookup_prog(const struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *data, unsigned data_size)
{
- struct brw_context *brw = cache->brw;
+ const struct brw_context *brw = cache->brw;
int i;
- struct brw_cache_item *item;
+ const struct brw_cache_item *item;
for (i = 0; i < cache->size; i++) {
for (item = cache->items[i]; item; item = item->next) {
- const void *item_aux = item->key + item->key_size;
int ret;
- if (item->cache_id != result_item->cache_id ||
- item->size != result_item->size ||
- item->aux_size != result_item->aux_size) {
- continue;
- }
-
- if (cache->aux_compare[result_item->cache_id]) {
- if (!cache->aux_compare[result_item->cache_id](item_aux, aux))
- continue;
- } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
+ if (item->cache_id != cache_id || item->size != data_size)
continue;
- }
if (!brw->has_llc)
drm_intel_bo_map(cache->bo, false);
@@ -239,27 +226,24 @@ brw_try_upload_using_copy(struct brw_cache *cache,
if (ret)
continue;
- result_item->offset = item->offset;
-
- return true;
+ return item;
}
}
- return false;
+ return NULL;
}
-static void
-brw_upload_item_data(struct brw_cache *cache,
- struct brw_cache_item *item,
- const void *data)
+static uint32_t
+brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
{
+ uint32_t offset;
struct brw_context *brw = cache->brw;
/* Allocate space in the cache BO for our new program. */
- if (cache->next_offset + item->size > cache->bo->size) {
+ if (cache->next_offset + size > cache->bo->size) {
uint32_t new_size = cache->bo->size * 2;
- while (cache->next_offset + item->size > new_size)
+ while (cache->next_offset + size > new_size)
new_size *= 2;
brw_cache_new_bo(cache, new_size);
@@ -273,10 +257,12 @@ brw_upload_item_data(struct brw_cache *cache,
brw_cache_new_bo(cache, cache->bo->size);
}
- item->offset = cache->next_offset;
+ offset = cache->next_offset;
/* Programs are always 64-byte aligned, so set up the next one now */
- cache->next_offset = ALIGN(item->offset + item->size, 64);
+ cache->next_offset = ALIGN(offset + size, 64);
+
+ return offset;
}
void
@@ -293,6 +279,8 @@ brw_upload_cache(struct brw_cache *cache,
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+ const struct brw_cache_item *matching_data =
+ brw_lookup_prog(cache, cache_id, data, data_size);
GLuint hash;
void *tmp;
@@ -304,15 +292,23 @@ brw_upload_cache(struct brw_cache *cache,
hash = hash_key(item);
item->hash = hash;
- /* If we can find a matching prog/prog_data combo in the cache
- * already, then reuse the existing stuff. This will mean not
- * flagging CACHE_NEW_* when transitioning between the two
- * equivalent hash keys. This is notably useful for programs
- * generating shaders at runtime, where multiple shaders may
- * compile to the thing in our backend.
+ /* If we can find a matching prog in the cache already, then reuse the
+ * existing stuff without creating new copy into the underlying buffer
+ * object. This is notably useful for programs generating shaders at
+ * runtime, where multiple shaders may compile to the same thing in our
+ * backend.
*/
- if (!brw_try_upload_using_copy(cache, item, data, aux)) {
- brw_upload_item_data(cache, item, data);
+ if (matching_data) {
+ item->offset = matching_data->offset;
+ } else {
+ item->offset = brw_alloc_item_data(cache, data_size);
+
+ /* Copy data to the buffer */
+ if (brw->has_llc) {
+ memcpy((char *)cache->bo->virtual + item->offset, data, data_size);
+ } else {
+ drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+ }
}
/* Set up the memory containing the key and aux_data */
@@ -323,7 +319,7 @@ brw_upload_cache(struct brw_cache *cache,
item->key = tmp;
- if (cache->n_items > cache->size * 1.5)
+ if (cache->n_items > cache->size * 1.5f)
rehash(cache);
hash %= cache->size;
@@ -331,13 +327,6 @@ brw_upload_cache(struct brw_cache *cache,
cache->items[hash] = item;
cache->n_items++;
- /* Copy data to the buffer */
- if (brw->has_llc) {
- memcpy((char *) cache->bo->virtual + item->offset, data, data_size);
- } else {
- drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
- }
-
*out_offset = item->offset;
*(void **)out_aux = (void *)((char *)item->key + item->key_size);
cache->brw->ctx.NewDriverState |= 1 << cache_id;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 08d1ac28885..9de42ce8503 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -192,6 +192,12 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&gen6_color_calc_state, /* must do before cc unit */
&gen6_depth_stencil_state, /* must do before cc unit */
+ &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
+
+ &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
+ &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
+ &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
+
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -251,6 +257,7 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
static const struct brw_tracked_state *gen7_compute_atoms[] =
{
&brw_state_base_address,
+ &brw_cs_image_surfaces,
&brw_cs_abo_surfaces,
&brw_cs_state,
};
@@ -268,6 +275,12 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
&gen8_blend_state,
&gen6_color_calc_state,
+ &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
+
+ &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
+ &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
+ &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
+
&gen6_vs_push_constants, /* Before vs_state */
&gen6_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -334,6 +347,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
static const struct brw_tracked_state *gen8_compute_atoms[] =
{
&gen8_state_base_address,
+ &brw_cs_image_surfaces,
&brw_cs_abo_surfaces,
&brw_cs_state,
};
@@ -349,7 +363,7 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
return;
if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
brw_upload_invariant_state(brw);
@@ -468,6 +482,7 @@ void brw_init_state( struct brw_context *brw )
ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
+ ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
}
@@ -581,6 +596,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_STATS_WM),
DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
+ DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
@@ -710,7 +726,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
/* Emit Sandybridge workaround flushes on every primitive, for safety. */
if (brw->gen == 6)
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
brw_upload_programs(brw, pipeline);
merge_ctx_state(brw, &state);
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 05016067bba..a33fd88a026 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -813,3 +813,112 @@ brw_depth_format(struct brw_context *brw, mesa_format format)
unreachable("Unexpected depth format.");
}
}
+
+mesa_format
+brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+ mesa_format format)
+{
+ switch (format) {
+ /* These are never lowered. Up to BDW we'll have to fall back to untyped
+ * surface access for 128bpp formats.
+ */
+ case MESA_FORMAT_RGBA_UINT32:
+ case MESA_FORMAT_RGBA_SINT32:
+ case MESA_FORMAT_RGBA_FLOAT32:
+ case MESA_FORMAT_R_UINT32:
+ case MESA_FORMAT_R_SINT32:
+ case MESA_FORMAT_R_FLOAT32:
+ return format;
+
+ /* From HSW to BDW the only 64bpp format supported for typed access is
+ * RGBA_UINT16. IVB falls back to untyped.
+ */
+ case MESA_FORMAT_RGBA_UINT16:
+ case MESA_FORMAT_RGBA_SINT16:
+ case MESA_FORMAT_RGBA_FLOAT16:
+ case MESA_FORMAT_RG_UINT32:
+ case MESA_FORMAT_RG_SINT32:
+ case MESA_FORMAT_RG_FLOAT32:
+ return (devinfo->gen >= 9 ? format :
+ devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RGBA_UINT16 : MESA_FORMAT_RG_UINT32);
+
+ /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component
+ * are supported. IVB doesn't support formats with more than one component
+ * for typed access. For 8 and 16 bpp formats IVB relies on the
+ * undocumented behavior that typed reads from R_UINT8 and R_UINT16
+ * surfaces actually do a 32-bit misaligned read. The alternative would be
+ * to use two surface state entries with different formats for each image,
+ * one for reading (using R_UINT32) and another one for writing (using
+ * R_UINT8 or R_UINT16), but that would complicate the shaders we generate
+ * even more.
+ */
+ case MESA_FORMAT_RGBA_UINT8:
+ case MESA_FORMAT_RGBA_SINT8:
+ return (devinfo->gen >= 9 ? format :
+ devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RGBA_UINT8 : MESA_FORMAT_R_UINT32);
+
+ case MESA_FORMAT_RG_UINT16:
+ case MESA_FORMAT_RG_SINT16:
+ case MESA_FORMAT_RG_FLOAT16:
+ return (devinfo->gen >= 9 ? format :
+ devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RG_UINT16 : MESA_FORMAT_R_UINT32);
+
+ case MESA_FORMAT_RG_UINT8:
+ case MESA_FORMAT_RG_SINT8:
+ return (devinfo->gen >= 9 ? format :
+ devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RG_UINT8 : MESA_FORMAT_R_UINT16);
+
+ case MESA_FORMAT_R_UINT16:
+ case MESA_FORMAT_R_FLOAT16:
+ case MESA_FORMAT_R_SINT16:
+ return (devinfo->gen >= 9 ? format : MESA_FORMAT_R_UINT16);
+
+ case MESA_FORMAT_R_UINT8:
+ case MESA_FORMAT_R_SINT8:
+ return (devinfo->gen >= 9 ? format : MESA_FORMAT_R_UINT8);
+
+ /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported
+ * by the hardware.
+ */
+ case MESA_FORMAT_R10G10B10A2_UINT:
+ case MESA_FORMAT_R10G10B10A2_UNORM:
+ case MESA_FORMAT_R11G11B10_FLOAT:
+ return MESA_FORMAT_R_UINT32;
+
+ /* No normalized fixed-point formats are supported by the hardware. */
+ case MESA_FORMAT_RGBA_UNORM16:
+ case MESA_FORMAT_RGBA_SNORM16:
+ return (devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RGBA_UINT16 : MESA_FORMAT_RG_UINT32);
+
+ case MESA_FORMAT_R8G8B8A8_UNORM:
+ case MESA_FORMAT_R8G8B8A8_SNORM:
+ return (devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RGBA_UINT8 : MESA_FORMAT_R_UINT32);
+
+ case MESA_FORMAT_R16G16_UNORM:
+ case MESA_FORMAT_R16G16_SNORM:
+ return (devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RG_UINT16 : MESA_FORMAT_R_UINT32);
+
+ case MESA_FORMAT_R8G8_UNORM:
+ case MESA_FORMAT_R8G8_SNORM:
+ return (devinfo->gen >= 8 || devinfo->is_haswell ?
+ MESA_FORMAT_RG_UINT8 : MESA_FORMAT_R_UINT16);
+
+ case MESA_FORMAT_R_UNORM16:
+ case MESA_FORMAT_R_SNORM16:
+ return MESA_FORMAT_R_UINT16;
+
+ case MESA_FORMAT_R_UNORM8:
+ case MESA_FORMAT_R_SNORM8:
+ return MESA_FORMAT_R_UINT8;
+
+ default:
+ unreachable("Unknown image format");
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 998d8c42770..b8b03932065 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -63,7 +63,7 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
int i = 0;
/* Alignment computations below assume bpp >= 8 and a power of 2. */
- assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp));
+ assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
switch(mt->target) {
case GL_TEXTURE_1D:
@@ -95,7 +95,7 @@ tr_mode_horizontal_texture_alignment(const struct brw_context *brw,
ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
align_yf[i] : align_ys[i];
- assert(is_power_of_two(mt->num_samples));
+ assert(_mesa_is_pow_two(mt->num_samples));
switch (mt->num_samples) {
case 2:
@@ -199,7 +199,7 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw,
mt->target != GL_TEXTURE_1D_ARRAY);
/* Alignment computations below assume bpp >= 8 and a power of 2. */
- assert (bpp >= 8 && bpp <= 128 && is_power_of_two(bpp)) ;
+ assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)) ;
switch(mt->target) {
case GL_TEXTURE_2D:
@@ -226,7 +226,7 @@ tr_mode_vertical_texture_alignment(const struct brw_context *brw,
ret_align = mt->tr_mode == INTEL_MIPTREE_TRMODE_YF ?
align_yf[i] : align_ys[i];
- assert(is_power_of_two(mt->num_samples));
+ assert(_mesa_is_pow_two(mt->num_samples));
switch (mt->num_samples) {
case 4:
@@ -366,9 +366,8 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
mt->total_width = mt->physical_width0;
- if (mt->compressed) {
- mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
- }
+ if (mt->compressed)
+ mt->total_width = ALIGN(mt->total_width, bw);
/* May need to adjust width to accommodate the placement of
* the 2nd mipmap. This occurs when the alignment
@@ -433,9 +432,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
const struct intel_mipmap_tree *mt,
unsigned level)
{
- assert(brw->gen < 9);
-
- if (mt->target == GL_TEXTURE_3D ||
+ if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
return ALIGN(minify(mt->physical_width0, level), mt->align_w);
} else {
@@ -615,8 +612,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw,
*/
static uint32_t
brw_miptree_choose_tiling(struct brw_context *brw,
- enum intel_miptree_tiling_mode requested,
- const struct intel_mipmap_tree *mt)
+ const struct intel_mipmap_tree *mt,
+ uint32_t layout_flags)
{
if (mt->format == MESA_FORMAT_S_UINT8) {
/* The stencil buffer is W tiled. However, we request from the kernel a
@@ -625,15 +622,18 @@ brw_miptree_choose_tiling(struct brw_context *brw,
return I915_TILING_NONE;
}
+ /* Do not support changing the tiling for miptrees with pre-allocated BOs. */
+ assert((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0);
+
/* Some usages may want only one type of tiling, like depth miptrees (Y
* tiled), or temporary BOs for uploading data once (linear).
*/
- switch (requested) {
- case INTEL_MIPTREE_TILING_ANY:
+ switch (layout_flags & MIPTREE_LAYOUT_TILING_ANY) {
+ case MIPTREE_LAYOUT_TILING_ANY:
break;
- case INTEL_MIPTREE_TILING_Y:
+ case MIPTREE_LAYOUT_TILING_Y:
return I915_TILING_Y;
- case INTEL_MIPTREE_TILING_NONE:
+ case MIPTREE_LAYOUT_TILING_NONE:
return I915_TILING_NONE;
}
@@ -762,16 +762,13 @@ intel_miptree_set_total_width_height(struct brw_context *brw,
mt->total_width, mt->total_height, mt->cpp);
}
-void
-brw_miptree_layout(struct brw_context *brw,
- struct intel_mipmap_tree *mt,
- enum intel_miptree_tiling_mode requested,
- uint32_t layout_flags)
+static void
+intel_miptree_set_alignment(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t layout_flags)
{
bool gen6_hiz_or_stencil = false;
- mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
-
if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
const GLenum base_format = _mesa_get_format_base_format(mt->format);
gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
@@ -806,7 +803,16 @@ brw_miptree_layout(struct brw_context *brw,
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
}
+}
+
+void
+brw_miptree_layout(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t layout_flags)
+{
+ mt->tr_mode = INTEL_MIPTREE_TRMODE_NONE;
+ intel_miptree_set_alignment(brw, mt, layout_flags);
intel_miptree_set_total_width_height(brw, mt);
if (!mt->total_width || !mt->total_height) {
@@ -825,6 +831,6 @@ brw_miptree_layout(struct brw_context *brw,
}
if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
- mt->tiling = brw_miptree_choose_tiling(brw, requested, mt);
+ mt->tiling = brw_miptree_choose_tiling(brw, mt, layout_flags);
}
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index 6fcf1b0cb1d..6078c3810d4 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -249,10 +249,10 @@ void brw_upload_urb_fence(struct brw_context *brw)
uf.bits1.cs_fence = brw->urb.size;
/* erratum: URB_FENCE must not cross a 64byte cacheline */
- if ((brw->batch.used & 15) > 12) {
- int pad = 16 - (brw->batch.used & 15);
+ if ((USED_BATCH(brw->batch) & 15) > 12) {
+ int pad = 16 - (USED_BATCH(brw->batch) & 15);
do
- brw->batch.map[brw->batch.used++] = MI_NOOP;
+ *brw->batch.map_next++ = MI_NOOP;
while (--pad);
}
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
index 04e4e944118..68f4318d371 100644
--- a/src/mesa/drivers/dri/i965/brw_util.h
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -53,14 +53,14 @@ brw_get_line_width(struct brw_context *brw)
float line_width =
CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
- 0.0, brw->ctx.Const.MaxLineWidth);
+ 0.0f, brw->ctx.Const.MaxLineWidth);
uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
/* Line width of 0 is not allowed when MSAA enabled */
if (brw->ctx.Multisample._Enabled) {
if (line_width_u3_7 == 0)
line_width_u3_7 = 1;
- } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) {
+ } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
/* For 1 pixel line thickness or less, the general
* anti-aliasing algorithm gives up, and a garbage line is
* generated. Setting a Line Width of 0.0 specifies the
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 05f188fe116..63f75da7e99 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -171,6 +171,17 @@ dst_reg::dst_reg(register_file file, int reg, const glsl_type *type,
this->writemask = writemask;
}
+dst_reg::dst_reg(register_file file, int reg, brw_reg_type type,
+ unsigned writemask)
+{
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ this->type = type;
+ this->writemask = writemask;
+}
+
dst_reg::dst_reg(struct brw_reg reg)
{
init();
@@ -1709,6 +1720,9 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
bool
vec4_visitor::run(gl_clip_plane *clip_planes)
{
+ bool use_vec4_nir =
+ compiler->glsl_compiler_options[stage].NirOptions != NULL;
+
sanity_param_count = prog->Parameters->NumParameters;
if (shader_time_index >= 0)
@@ -1718,10 +1732,15 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
emit_prolog();
- /* Generate VS IR for main(). (the visitor only descends into
- * functions called "main").
- */
- if (shader) {
+ if (use_vec4_nir) {
+ assert(prog->nir != NULL);
+ emit_nir_code();
+ if (failed)
+ return false;
+ } else if (shader) {
+ /* Generate VS IR for main(). (the visitor only descends into
+ * functions called "main").
+ */
visit_instructions(shader->base.ir);
} else {
emit_program_code();
@@ -1741,7 +1760,7 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
* that we have reladdr computations available for CSE, since we'll
* often do repeated subexpressions for those.
*/
- if (shader) {
+ if (shader || use_vec4_nir) {
move_grf_array_access_to_scratch();
move_uniform_array_access_to_pull_constants();
} else {
@@ -1827,15 +1846,30 @@ vec4_visitor::run(gl_clip_plane *clip_planes)
}
}
- while (!reg_allocate()) {
- if (failed)
- return false;
+ bool allocated_without_spills = reg_allocate();
+
+ if (!allocated_without_spills) {
+ compiler->shader_perf_log(log_data,
+ "%s shader triggered register spilling. "
+ "Try reducing the number of live vec4 values "
+ "to improve performance.\n",
+ stage_name);
+
+ while (!reg_allocate()) {
+ if (failed)
+ return false;
+ }
}
opt_schedule_instructions();
opt_set_dependency_control();
+ if (last_scratch > 0) {
+ prog_data->base.total_scratch =
+ brw_get_scratch_size(last_scratch * REG_SIZE);
+ }
+
/* If any state parameters were appended, then ParameterValues could have
* been realloced, in which case the driver uniform storage set up by
* _mesa_associate_uniform_storage() would point to freed memory. Make
@@ -1857,10 +1891,11 @@ extern "C" {
*/
const unsigned *
brw_vs_emit(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_vs_compile *c,
- struct brw_vs_prog_data *prog_data,
void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ struct gl_vertex_program *vp,
+ struct gl_shader_program *prog,
unsigned *final_assembly_size)
{
bool start_busy = false;
@@ -1879,29 +1914,31 @@ brw_vs_emit(struct brw_context *brw,
int st_index = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base,
- ST_VS);
+ st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
- brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
+ brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
+
+ if (!vp->Base.nir &&
+ (brw->intelScreen->compiler->scalar_vs ||
+ brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) {
+ /* Normally we generate NIR in LinkShader() or
+ * ProgramStringNotify(), but Mesa's fixed-function vertex program
+ * handling doesn't notify the driver at all. Just do it here, at
+ * the last minute, even though it's lame.
+ */
+ assert(vp->Base.Id == 0 && prog == NULL);
+ vp->Base.nir =
+ brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX,
+ brw->intelScreen->compiler->scalar_vs);
+ }
if (brw->intelScreen->compiler->scalar_vs) {
- if (!c->vp->program.Base.nir) {
- /* Normally we generate NIR in LinkShader() or
- * ProgramStringNotify(), but Mesa's fixed-function vertex program
- * handling doesn't notify the driver at all. Just do it here, at
- * the last minute, even though it's lame.
- */
- assert(c->vp->program.Base.Id == 0 && prog == NULL);
- c->vp->program.Base.nir =
- brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
- }
-
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_VERTEX, &c->key,
- &prog_data->base.base, prog, &c->vp->program.Base,
+ mem_ctx, MESA_SHADER_VERTEX, key,
+ &prog_data->base.base, prog, &vp->Base,
8, st_index);
if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
@@ -1916,8 +1953,8 @@ brw_vs_emit(struct brw_context *brw,
}
fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void *) &c->key, &prog_data->base.base,
- &c->vp->program.Base, v.promoted_constants,
+ mem_ctx, (void *) key, &prog_data->base.base,
+ &vp->Base, v.promoted_constants,
v.runtime_check_aads_emit, "VS");
if (INTEL_DEBUG & DEBUG_VS) {
char *name;
@@ -1927,21 +1964,19 @@ brw_vs_emit(struct brw_context *brw,
prog->Name);
} else {
name = ralloc_asprintf(mem_ctx, "vertex program %d",
- c->vp->program.Base.Id);
+ vp->Base.Id);
}
g.enable_debug(name);
}
g.generate_code(v.cfg, 8);
assembly = g.get_assembly(final_assembly_size);
-
- c->base.last_scratch = v.last_scratch;
}
if (!assembly) {
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_vs_visitor v(brw->intelScreen->compiler,
- c, prog_data, prog, mem_ctx, st_index,
+ vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
+ vp, prog, mem_ctx, st_index,
!_mesa_is_gles3(&brw->ctx));
if (!v.run(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
@@ -1956,14 +1991,14 @@ brw_vs_emit(struct brw_context *brw,
}
vec4_generator g(brw->intelScreen->compiler, brw,
- prog, &c->vp->program.Base, &prog_data->base,
+ prog, &vp->Base, &prog_data->base,
mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
assembly = g.generate_assembly(v.cfg, final_assembly_size);
}
if (unlikely(brw->perf_debug) && shader) {
if (shader->compiled_once) {
- brw_vs_debug_recompile(brw, prog, &c->key);
+ brw_vs_debug_recompile(brw, prog, key);
}
if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
perf_debug("VS compile took %.03f ms and stalled the GPU\n",
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 2ac16932189..341c516b39a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -45,12 +45,9 @@ extern "C" {
#endif
#include "glsl/ir.h"
+#include "glsl/nir/nir.h"
-struct brw_vec4_compile {
- GLuint last_scratch; /**< measured in 32-byte (register size) units */
-};
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -77,7 +74,7 @@ class vec4_visitor : public backend_shader, public ir_visitor
{
public:
vec4_visitor(const struct brw_compiler *compiler,
- struct brw_vec4_compile *c,
+ void *log_data,
struct gl_program *prog,
const struct brw_vue_prog_key *key,
struct brw_vue_prog_data *prog_data,
@@ -103,7 +100,6 @@ public:
return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
}
- struct brw_vec4_compile * const c;
const struct brw_vue_prog_key * const key;
struct brw_vue_prog_data * const prog_data;
unsigned int sanity_param_count;
@@ -181,9 +177,12 @@ public:
void fail(const char *msg, ...);
void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
+ virtual void setup_vector_uniform_values(const gl_constant_value *values,
+ unsigned n);
void setup_uniform_values(ir_variable *ir);
void setup_builtin_uniform_values(ir_variable *ir);
int setup_uniforms(int payload_reg);
+
bool reg_allocate_trivial();
bool reg_allocate();
void evaluate_spill_costs(float *spill_costs, bool *no_spill);
@@ -292,14 +291,17 @@ public:
void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
void emit_if_gen6(ir_if *ir);
- void emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
- src_reg src0, src_reg src1);
+ vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
+ src_reg src0, src_reg src1);
- void emit_lrp(const dst_reg &dst,
- const src_reg &x, const src_reg &y, const src_reg &a);
+ vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x,
+ const src_reg &y, const src_reg &a);
- /** Copy any live channel from \p src to the first channel of \p dst. */
- void emit_uniformize(const dst_reg &dst, const src_reg &src);
+ /**
+ * Copy any live channel from \p src to the first channel of the
+ * result.
+ */
+ src_reg emit_uniformize(const src_reg &src);
void emit_block_move(dst_reg *dst, src_reg *src,
const struct glsl_type *type, brw_predicate predicate);
@@ -317,11 +319,13 @@ public:
void emit_scalar(ir_instruction *ir, enum prog_opcode op,
dst_reg dst, src_reg src0, src_reg src1);
- src_reg fix_3src_operand(src_reg src);
+ src_reg fix_3src_operand(const src_reg &src);
+ src_reg resolve_source_modifiers(const src_reg &src);
+
+ vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1 = src_reg());
- void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
- const src_reg &src1 = src_reg());
- src_reg fix_math_operand(src_reg src);
+ src_reg fix_math_operand(const src_reg &src);
void emit_pack_half_2x16(dst_reg dst, src_reg src0);
void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
@@ -330,10 +334,27 @@ public:
void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
- uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
- src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
+ void emit_texture(ir_texture_opcode op,
+ dst_reg dest,
+ const glsl_type *dest_type,
+ src_reg coordinate,
+ int coord_components,
+ src_reg shadow_comparitor,
+ src_reg lod, src_reg lod2,
+ src_reg sample_index,
+ uint32_t constant_offset,
+ src_reg offset_value,
+ src_reg mcs,
+ bool is_cube_array,
+ uint32_t sampler, src_reg sampler_reg);
+
+ uint32_t gather_channel(unsigned gather_component, uint32_t sampler);
+ src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
+ src_reg sampler);
void emit_gen6_gather_wa(uint8_t wa, dst_reg dst);
- void swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler);
+ void swizzle_result(ir_texture_opcode op, dst_reg dest,
+ src_reg orig_val, uint32_t sampler,
+ const glsl_type *dest_type);
void emit_ndc_computation();
void emit_psiz_and_flags(dst_reg reg);
@@ -388,13 +409,53 @@ public:
void visit_atomic_counter_intrinsic(ir_call *ir);
+ int type_size(const struct glsl_type *type);
+ bool is_high_sampler(src_reg sampler);
+
+ virtual void emit_nir_code();
+ virtual void nir_setup_inputs(nir_shader *shader);
+ virtual void nir_setup_uniforms(nir_shader *shader);
+ virtual void nir_setup_uniform(nir_variable *var);
+ virtual void nir_setup_builtin_uniform(nir_variable *var);
+ virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+ virtual void nir_setup_system_values(nir_shader *shader);
+ virtual void nir_emit_impl(nir_function_impl *impl);
+ virtual void nir_emit_cf_list(exec_list *list);
+ virtual void nir_emit_if(nir_if *if_stmt);
+ virtual void nir_emit_loop(nir_loop *loop);
+ virtual void nir_emit_block(nir_block *block);
+ virtual void nir_emit_instr(nir_instr *instr);
+ virtual void nir_emit_load_const(nir_load_const_instr *instr);
+ virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
+ virtual void nir_emit_alu(nir_alu_instr *instr);
+ virtual void nir_emit_jump(nir_jump_instr *instr);
+ virtual void nir_emit_texture(nir_tex_instr *instr);
+
+ dst_reg get_nir_dest(nir_dest dest, enum brw_reg_type type);
+ dst_reg get_nir_dest(nir_dest dest, nir_alu_type type);
+ dst_reg get_nir_dest(nir_dest dest);
+ src_reg get_nir_src(nir_src src, enum brw_reg_type type,
+ unsigned num_components = 4);
+ src_reg get_nir_src(nir_src src, nir_alu_type type,
+ unsigned num_components = 4);
+ src_reg get_nir_src(nir_src src,
+ unsigned num_components = 4);
+
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type) = 0;
+
+ dst_reg *nir_locals;
+ dst_reg *nir_ssa_values;
+ src_reg *nir_inputs;
+ unsigned *nir_uniform_driver_location;
+ dst_reg *nir_system_values;
+
protected:
void emit_vertex();
void lower_attributes_to_hw_regs(const int *attribute_map,
bool interleaved);
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
int reg_node_count);
- virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
virtual void assign_binding_table_offsets();
virtual void setup_payload() = 0;
virtual void emit_prolog() = 0;
@@ -403,6 +464,8 @@ protected:
virtual void emit_urb_write_header(int mrf) = 0;
virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
virtual int compute_array_stride(ir_dereference_array *ir);
+ virtual void gs_emit_vertex(int stream_id);
+ virtual void gs_end_primitive();
private:
/**
@@ -411,6 +474,8 @@ private:
const bool no_spills;
int shader_time_index;
+
+ unsigned last_scratch; /**< measured in 32-byte (register size) units */
};
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 2d9afa8145f..5a15eb89766 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -179,6 +179,7 @@ try_constant_propagate(const struct brw_device_info *devinfo,
case BRW_OPCODE_MACH:
case BRW_OPCODE_MUL:
+ case SHADER_OPCODE_MULH:
case BRW_OPCODE_ADD:
case BRW_OPCODE_OR:
case BRW_OPCODE_AND:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index c9fe0cebf27..5a277f74c44 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -62,6 +62,7 @@ is_expression(const vec4_instruction *const inst)
case BRW_OPCODE_CMPN:
case BRW_OPCODE_ADD:
case BRW_OPCODE_MUL:
+ case SHADER_OPCODE_MULH:
case BRW_OPCODE_FRC:
case BRW_OPCODE_RNDU:
case BRW_OPCODE_RNDD:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index d2de2f0be25..92050b94d33 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1465,19 +1465,15 @@ vec4_generator::generate_code(const cfg_t *cfg)
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
- assert(src[1].file == BRW_IMMEDIATE_VALUE &&
- src[2].file == BRW_IMMEDIATE_VALUE);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
!inst->dst.is_null());
- brw_mark_surface_used(&prog_data->base, src[1].dw1.ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- assert(src[1].file == BRW_IMMEDIATE_VALUE &&
- src[2].file == BRW_IMMEDIATE_VALUE);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].dw1.ud);
- brw_mark_surface_used(&prog_data->base, src[1].dw1.ud);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
@@ -1549,7 +1545,7 @@ vec4_generator::generate_code(const cfg_t *cfg)
*
* where they pack the four bytes from the low and high four DW.
*/
- assert(is_power_of_two(dst.dw1.bits.writemask) &&
+ assert(_mesa_is_pow_two(dst.dw1.bits.writemask) &&
dst.dw1.bits.writemask != 0);
unsigned offset = __builtin_ctz(dst.dw1.bits.writemask);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
new file mode 100644
index 00000000000..d85fb6f31ec
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "brw_vec4_gs_visitor.h"
+
+namespace brw {
+
+void
+vec4_gs_visitor::nir_setup_inputs(nir_shader *shader)
+{
+ nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+
+ foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ int offset = var->data.driver_location;
+ if (var->type->base_type == GLSL_TYPE_ARRAY) {
+ /* Geometry shader inputs are arrays, but they use an unusual array
+ * layout: instead of all array elements for a given geometry shader
+ * input being stored consecutively, all geometry shader inputs are
+ * interleaved into one giant array. At this stage of compilation, we
+ * assume that the stride of the array is BRW_VARYING_SLOT_COUNT.
+ * Later, setup_attributes() will remap our accesses to the actual
+ * input array.
+ */
+ assert(var->type->length > 0);
+ int length = var->type->length;
+ int size = type_size(var->type) / length;
+ for (int i = 0; i < length; i++) {
+ int location = var->data.location + i * BRW_VARYING_SLOT_COUNT;
+ for (int j = 0; j < size; j++) {
+ src_reg src = src_reg(ATTR, location + j, var->type);
+ src = retype(src, brw_type_for_base_type(var->type));
+ nir_inputs[offset] = src;
+ offset++;
+ }
+ }
+ } else {
+ int size = type_size(var->type);
+ for (int i = 0; i < size; i++) {
+ src_reg src = src_reg(ATTR, var->data.location + i, var->type);
+ src = retype(src, brw_type_for_base_type(var->type));
+ nir_inputs[offset] = src;
+ offset++;
+ }
+ }
+ }
+}
+
+void
+vec4_gs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+ dst_reg *reg;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_invocation_id:
+ reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
+ if (reg->file == BAD_FILE)
+ *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INVOCATION_ID,
+ glsl_type::int_type);
+ break;
+
+ default:
+ vec4_visitor::nir_setup_system_value_intrinsic(instr);
+ }
+
+}
+
+void
+vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+ dst_reg dest;
+ src_reg src;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_emit_vertex: {
+ int stream_id = instr->const_index[0];
+ gs_emit_vertex(stream_id);
+ break;
+ }
+
+ case nir_intrinsic_end_primitive:
+ gs_end_primitive();
+ break;
+
+ case nir_intrinsic_load_invocation_id: {
+ src_reg invocation_id =
+ src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
+ assert(invocation_id.file != BAD_FILE);
+ dest = get_nir_dest(instr->dest, invocation_id.type);
+ emit(MOV(dest, invocation_id));
+ break;
+ }
+
+ default:
+ vec4_visitor::nir_emit_intrinsic(instr);
+ }
+}
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 69bcf5afc51..019efecac66 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -35,12 +35,14 @@ const unsigned MAX_GS_INPUT_VERTICES = 6;
namespace brw {
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
+ void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
void *mem_ctx,
bool no_spills,
int shader_time_index)
- : vec4_visitor(compiler, &c->base, &c->gp->program.Base, &c->key.base,
+ : vec4_visitor(compiler, log_data,
+ &c->gp->program.Base, &c->key.base,
&c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
no_spills, shader_time_index),
c(c)
@@ -49,11 +51,12 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
dst_reg *
-vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir)
+vec4_gs_visitor::make_reg_for_system_value(int location,
+ const glsl_type *type)
{
- dst_reg *reg = new(mem_ctx) dst_reg(this, ir->type);
+ dst_reg *reg = new(mem_ctx) dst_reg(this, type);
- switch (ir->data.location) {
+ switch (location) {
case SYSTEM_VALUE_INVOCATION_ID:
this->current_annotation = "initialize gl_InvocationID";
emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
@@ -346,90 +349,82 @@ vec4_gs_visitor::emit_control_data_bits()
if (c->control_data_header_size_bits > 128)
urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET;
- /* If vertex_count is 0, then no control data bits have been accumulated
- * yet, so we should do nothing.
+ /* If we are using either channel masks or a per-slot offset, then we
+ * need to figure out which DWORD we are trying to write to, using the
+ * formula:
+ *
+ * dword_index = (vertex_count - 1) * bits_per_vertex / 32
+ *
+ * Since bits_per_vertex is a power of two, and is known at compile
+ * time, this can be optimized to:
+ *
+ * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
*/
- emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ));
- emit(IF(BRW_PREDICATE_NORMAL));
- {
- /* If we are using either channel masks or a per-slot offset, then we
- * need to figure out which DWORD we are trying to write to, using the
- * formula:
- *
- * dword_index = (vertex_count - 1) * bits_per_vertex / 32
- *
- * Since bits_per_vertex is a power of two, and is known at compile
- * time, this can be optimized to:
- *
- * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
+ src_reg dword_index(this, glsl_type::uint_type);
+ if (urb_write_flags) {
+ src_reg prev_count(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ unsigned log2_bits_per_vertex =
+ _mesa_fls(c->control_data_bits_per_vertex);
+ emit(SHR(dst_reg(dword_index), prev_count,
+ (uint32_t) (6 - log2_bits_per_vertex)));
+ }
+
+ /* Start building the URB write message. The first MRF gets a copy of
+ * R0.
+ */
+ int base_mrf = 1;
+ dst_reg mrf_reg(MRF, base_mrf);
+ src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+ inst->force_writemask_all = true;
+
+ if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
+ /* Set the per-slot offset to dword_index / 4, to that we'll write to
+ * the appropriate OWORD within the control data header.
*/
- src_reg dword_index(this, glsl_type::uint_type);
- if (urb_write_flags) {
- src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
- unsigned log2_bits_per_vertex =
- _mesa_fls(c->control_data_bits_per_vertex);
- emit(SHR(dst_reg(dword_index), prev_count,
- (uint32_t) (6 - log2_bits_per_vertex)));
- }
+ src_reg per_slot_offset(this, glsl_type::uint_type);
+ emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
+ emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+ }
- /* Start building the URB write message. The first MRF gets a copy of
- * R0.
+ if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
+ /* Set the channel masks to 1 << (dword_index % 4), so that we'll
+ * write to the appropriate DWORD within the OWORD. We need to do
+ * this computation with force_writemask_all, otherwise garbage data
+ * from invocation 0 might clobber the mask for invocation 1 when
+ * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
+ * together.
*/
- int base_mrf = 1;
- dst_reg mrf_reg(MRF, base_mrf);
- src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- vec4_instruction *inst = emit(MOV(mrf_reg, r0));
+ src_reg channel(this, glsl_type::uint_type);
+ inst = emit(AND(dst_reg(channel), dword_index, 3u));
inst->force_writemask_all = true;
-
- if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) {
- /* Set the per-slot offset to dword_index / 4, to that we'll write to
- * the appropriate OWORD within the control data header.
- */
- src_reg per_slot_offset(this, glsl_type::uint_type);
- emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
- emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
- }
-
- if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
- /* Set the channel masks to 1 << (dword_index % 4), so that we'll
- * write to the appropriate DWORD within the OWORD. We need to do
- * this computation with force_writemask_all, otherwise garbage data
- * from invocation 0 might clobber the mask for invocation 1 when
- * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks
- * together.
- */
- src_reg channel(this, glsl_type::uint_type);
- inst = emit(AND(dst_reg(channel), dword_index, 3u));
- inst->force_writemask_all = true;
- src_reg one(this, glsl_type::uint_type);
- inst = emit(MOV(dst_reg(one), 1u));
- inst->force_writemask_all = true;
- src_reg channel_mask(this, glsl_type::uint_type);
- inst = emit(SHL(dst_reg(channel_mask), one, channel));
- inst->force_writemask_all = true;
- emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
- channel_mask);
- emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
- }
-
- /* Store the control data bits in the message payload and send it. */
- dst_reg mrf_reg2(MRF, base_mrf + 1);
- inst = emit(MOV(mrf_reg2, this->control_data_bits));
+ src_reg one(this, glsl_type::uint_type);
+ inst = emit(MOV(dst_reg(one), 1u));
inst->force_writemask_all = true;
- inst = emit(GS_OPCODE_URB_WRITE);
- inst->urb_write_flags = urb_write_flags;
- /* We need to increment Global Offset by 256-bits to make room for
- * Broadwell's extra "Vertex Count" payload at the beginning of the
- * URB entry. Since this is an OWord message, Global Offset is counted
- * in 128-bit units, so we must set it to 2.
- */
- if (devinfo->gen >= 8)
- inst->offset = 2;
- inst->base_mrf = base_mrf;
- inst->mlen = 2;
+ src_reg channel_mask(this, glsl_type::uint_type);
+ inst = emit(SHL(dst_reg(channel_mask), one, channel));
+ inst->force_writemask_all = true;
+ emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask),
+ channel_mask);
+ emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask);
}
- emit(BRW_OPCODE_ENDIF);
+
+ /* Store the control data bits in the message payload and send it. */
+ dst_reg mrf_reg2(MRF, base_mrf + 1);
+ inst = emit(MOV(mrf_reg2, this->control_data_bits));
+ inst->force_writemask_all = true;
+ inst = emit(GS_OPCODE_URB_WRITE);
+ inst->urb_write_flags = urb_write_flags;
+ /* We need to increment Global Offset by 256-bits to make room for
+ * Broadwell's extra "Vertex Count" payload at the beginning of the
+ * URB entry. Since this is an OWord message, Global Offset is counted
+ * in 128-bit units, so we must set it to 2.
+ */
+ if (devinfo->gen >= 8)
+ inst->offset = 2;
+ inst->base_mrf = base_mrf;
+ inst->mlen = 2;
}
void
@@ -472,7 +467,7 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
}
void
-vec4_gs_visitor::visit(ir_emit_vertex *ir)
+vec4_gs_visitor::gs_emit_vertex(int stream_id)
{
this->current_annotation = "emit vertex: safety check";
@@ -486,7 +481,7 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
* be recorded by transform feedback, we can simply discard all geometry
* bound to these streams when transform feedback is disabled.
*/
- if (ir->stream_id() > 0 && shader_prog->TransformFeedback.NumVarying == 0)
+ if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
return;
/* To ensure that we don't output more vertices than the shader specified
@@ -529,9 +524,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
emit(AND(dst_null_d(), this->vertex_count,
(uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
+
emit(IF(BRW_PREDICATE_NORMAL));
{
+ /* If vertex_count is 0, then no control data bits have been
+ * accumulated yet, so we skip emitting them.
+ */
+ emit(CMP(dst_null_d(), this->vertex_count, 0u,
+ BRW_CONDITIONAL_NEQ));
+ emit(IF(BRW_PREDICATE_NORMAL));
emit_control_data_bits();
+ emit(BRW_OPCODE_ENDIF);
/* Reset control_data_bits to 0 so we can start accumulating a new
* batch.
@@ -557,7 +560,7 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
c->prog_data.control_data_format ==
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
this->current_annotation = "emit vertex: Stream control data bits";
- set_stream_control_data_bits(ir->stream_id());
+ set_stream_control_data_bits(stream_id);
}
this->current_annotation = "emit vertex: increment vertex count";
@@ -570,7 +573,13 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
}
void
-vec4_gs_visitor::visit(ir_end_primitive *)
+vec4_gs_visitor::visit(ir_emit_vertex *ir)
+{
+ gs_emit_vertex(ir->stream_id());
+}
+
+void
+vec4_gs_visitor::gs_end_primitive()
{
/* We can only do EndPrimitive() functionality when the control data
* consists of cut bits. Fortunately, the only time it isn't is when the
@@ -620,6 +629,12 @@ vec4_gs_visitor::visit(ir_end_primitive *)
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
+void
+vec4_gs_visitor::visit(ir_end_primitive *)
+{
+ gs_end_primitive();
+}
+
static const unsigned *
generate_assembly(struct brw_context *brw,
struct gl_shader_program *shader_prog,
@@ -662,7 +677,7 @@ brw_gs_emit(struct brw_context *brw,
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(brw->intelScreen->compiler,
+ vec4_gs_visitor v(brw->intelScreen->compiler, brw,
c, prog, mem_ctx, true /* no_spills */, st_index);
if (v.run(NULL /* clip planes */)) {
return generate_assembly(brw, prog, &c->gp->program.Base,
@@ -704,11 +719,11 @@ brw_gs_emit(struct brw_context *brw,
const unsigned *ret = NULL;
if (brw->gen >= 7)
- gs = new vec4_gs_visitor(brw->intelScreen->compiler,
+ gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
c, prog, mem_ctx, false /* no_spills */,
st_index);
else
- gs = new gen6_gs_visitor(brw->intelScreen->compiler,
+ gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
c, prog, mem_ctx, false /* no_spills */,
st_index);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index e693c56b58f..0e8fefabecc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -37,7 +37,6 @@
*/
struct brw_gs_compile
{
- struct brw_vec4_compile base;
struct brw_gs_prog_key key;
struct brw_gs_prog_data prog_data;
struct brw_vue_map input_vue_map;
@@ -69,14 +68,19 @@ class vec4_gs_visitor : public vec4_visitor
{
public:
vec4_gs_visitor(const struct brw_compiler *compiler,
+ void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
void *mem_ctx,
bool no_spills,
int shader_time_index);
+ virtual void nir_setup_inputs(nir_shader *shader);
+ virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+
protected:
- virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
virtual void setup_payload();
virtual void emit_prolog();
virtual void emit_program_code();
@@ -86,6 +90,9 @@ protected:
virtual int compute_array_stride(ir_dereference_array *ir);
virtual void visit(ir_emit_vertex *);
virtual void visit(ir_end_primitive *);
+ virtual void gs_emit_vertex(int stream_id);
+ virtual void gs_end_primitive();
+ virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
protected:
int setup_varying_inputs(int payload_reg, int *attribute_map,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 95b9d9017e2..cc688ef8083 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -96,7 +96,8 @@ vec4_live_variables::setup_def_use()
* are the things that screen off preceding definitions of a
* variable, and thus qualify for being in def[].
*/
- if (inst->dst.file == GRF && !inst->predicate) {
+ if (inst->dst.file == GRF &&
+ (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
for (unsigned i = 0; i < inst->regs_written; i++) {
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
@@ -133,27 +134,9 @@ vec4_live_variables::compute_live_variables()
while (cont) {
cont = false;
- foreach_block (block, cfg) {
+ foreach_block_reverse (block, cfg) {
struct block_data *bd = &block_data[block->num];
- /* Update livein */
- for (int i = 0; i < bitset_words; i++) {
- BITSET_WORD new_livein = (bd->use[i] |
- (bd->liveout[i] &
- ~bd->def[i]));
- if (new_livein & ~bd->livein[i]) {
- bd->livein[i] |= new_livein;
- cont = true;
- }
- }
- BITSET_WORD new_livein = (bd->flag_use[0] |
- (bd->flag_liveout[0] &
- ~bd->flag_def[0]));
- if (new_livein & ~bd->flag_livein[0]) {
- bd->flag_livein[0] |= new_livein;
- cont = true;
- }
-
/* Update liveout */
foreach_list_typed(bblock_link, child_link, link, &block->children) {
struct block_data *child_bd = &block_data[child_link->block->num];
@@ -173,6 +156,24 @@ vec4_live_variables::compute_live_variables()
cont = true;
}
}
+
+ /* Update livein */
+ for (int i = 0; i < bitset_words; i++) {
+ BITSET_WORD new_livein = (bd->use[i] |
+ (bd->liveout[i] &
+ ~bd->def[i]));
+ if (new_livein & ~bd->livein[i]) {
+ bd->livein[i] |= new_livein;
+ cont = true;
+ }
+ }
+ BITSET_WORD new_livein = (bd->flag_use[0] |
+ (bd->flag_liveout[0] &
+ ~bd->flag_def[0]));
+ if (new_livein & ~bd->flag_livein[0]) {
+ bd->flag_livein[0] |= new_livein;
+ cont = true;
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
new file mode 100644
index 00000000000..923e2d30a4c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -0,0 +1,1548 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "brw_vec4.h"
+#include "glsl/ir_uniform.h"
+
+namespace brw {
+
+void
+vec4_visitor::emit_nir_code()
+{
+ nir_shader *nir = prog->nir;
+
+ if (nir->num_inputs > 0)
+ nir_setup_inputs(nir);
+
+ if (nir->num_uniforms > 0)
+ nir_setup_uniforms(nir);
+
+ nir_setup_system_values(nir);
+
+ /* get the main function and emit it */
+ nir_foreach_overload(nir, overload) {
+ assert(strcmp(overload->function->name, "main") == 0);
+ assert(overload->impl);
+ nir_emit_impl(overload->impl);
+ }
+}
+
+void
+vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+ dst_reg *reg;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_vertex_id:
+ unreachable("should be lowered by lower_vertex_id().");
+
+ case nir_intrinsic_load_vertex_id_zero_base:
+ reg = &this->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
+ if (reg->file == BAD_FILE)
+ *reg =
+ *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+ glsl_type::int_type);
+ break;
+
+ case nir_intrinsic_load_base_vertex:
+ reg = &this->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
+ if (reg->file == BAD_FILE)
+ *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX,
+ glsl_type::int_type);
+ break;
+
+ case nir_intrinsic_load_instance_id:
+ reg = &this->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
+ if (reg->file == BAD_FILE)
+ *reg = *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID,
+ glsl_type::int_type);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+setup_system_values_block(nir_block *block, void *void_visitor)
+{
+ vec4_visitor *v = (vec4_visitor *)void_visitor;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ v->nir_setup_system_value_intrinsic(intrin);
+ }
+
+ return true;
+}
+
+void
+vec4_visitor::nir_setup_system_values(nir_shader *shader)
+{
+ nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
+
+ nir_foreach_overload(shader, overload) {
+ assert(strcmp(overload->function->name, "main") == 0);
+ assert(overload->impl);
+ nir_foreach_block(overload->impl, setup_system_values_block, this);
+ }
+}
+
+void
+vec4_visitor::nir_setup_inputs(nir_shader *shader)
+{
+ nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+
+ foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ int offset = var->data.driver_location;
+ unsigned size = type_size(var->type);
+ for (unsigned i = 0; i < size; i++) {
+ src_reg src = src_reg(ATTR, var->data.location + i, var->type);
+ nir_inputs[offset + i] = src;
+ }
+ }
+}
+
+void
+vec4_visitor::nir_setup_uniforms(nir_shader *shader)
+{
+ uniforms = 0;
+
+ nir_uniform_driver_location =
+ rzalloc_array(mem_ctx, unsigned, this->uniform_array_size);
+
+ if (shader_prog) {
+ foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
+ /* UBO's, atomics and samplers don't take up space in the
+ uniform file */
+ if (var->interface_type != NULL || var->type->contains_atomic() ||
+ type_size(var->type) == 0) {
+ continue;
+ }
+
+ assert(uniforms < uniform_array_size);
+ this->uniform_size[uniforms] = type_size(var->type);
+
+ if (strncmp(var->name, "gl_", 3) == 0)
+ nir_setup_builtin_uniform(var);
+ else
+ nir_setup_uniform(var);
+ }
+ } else {
+ /* For ARB_vertex_program, only a single "parameters" variable is
+ * generated to support uniform data.
+ */
+ nir_variable *var = (nir_variable *) shader->uniforms.get_head();
+ assert(shader->uniforms.length() == 1 &&
+ strcmp(var->name, "parameters") == 0);
+
+ assert(uniforms < uniform_array_size);
+ this->uniform_size[uniforms] = type_size(var->type);
+
+ struct gl_program_parameter_list *plist = prog->Parameters;
+ for (unsigned p = 0; p < plist->NumParameters; p++) {
+ uniform_vector_size[uniforms] = plist->Parameters[p].Size;
+
+ /* Parameters should be either vec4 uniforms or single component
+ * constants; matrices and other larger types should have been broken
+ * down earlier.
+ */
+ assert(uniform_vector_size[uniforms] <= 4);
+
+ int i;
+ for (i = 0; i < uniform_vector_size[uniforms]; i++) {
+ stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i];
+ }
+ for (; i < 4; i++) {
+ static const gl_constant_value zero = { 0.0 };
+ stage_prog_data->param[uniforms * 4 + i] = &zero;
+ }
+
+ nir_uniform_driver_location[uniforms] = var->data.driver_location;
+ uniforms++;
+ }
+ }
+}
+
+void
+vec4_visitor::nir_setup_uniform(nir_variable *var)
+{
+ int namelen = strlen(var->name);
+
+ /* The data for our (non-builtin) uniforms is stored in a series of
+ * gl_uniform_driver_storage structs for each subcomponent that
+ * glGetUniformLocation() could name. We know it's been set up in the same
+ * order we'd walk the type, so walk the list of storage and find anything
+ * with our name, or the prefix of a component that starts with our name.
+ */
+ for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
+ struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+
+ if (storage->builtin)
+ continue;
+
+ if (strncmp(var->name, storage->name, namelen) != 0 ||
+ (storage->name[namelen] != 0 &&
+ storage->name[namelen] != '.' &&
+ storage->name[namelen] != '[')) {
+ continue;
+ }
+
+ gl_constant_value *components = storage->storage;
+ unsigned vector_count = (MAX2(storage->array_elements, 1) *
+ storage->type->matrix_columns);
+
+ for (unsigned s = 0; s < vector_count; s++) {
+ assert(uniforms < uniform_array_size);
+ uniform_vector_size[uniforms] = storage->type->vector_elements;
+
+ int i;
+ for (i = 0; i < uniform_vector_size[uniforms]; i++) {
+ stage_prog_data->param[uniforms * 4 + i] = components;
+ components++;
+ }
+ for (; i < 4; i++) {
+ static const gl_constant_value zero = { 0.0 };
+ stage_prog_data->param[uniforms * 4 + i] = &zero;
+ }
+
+ nir_uniform_driver_location[uniforms] = var->data.driver_location;
+ uniforms++;
+ }
+ }
+}
+
+void
+vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
+{
+ const nir_state_slot *const slots = var->state_slots;
+ assert(var->state_slots != NULL);
+
+ for (unsigned int i = 0; i < var->num_state_slots; i++) {
+ /* This state reference has already been setup by ir_to_mesa,
+ * but we'll get the same index back here. We can reference
+ * ParameterValues directly, since unlike brw_fs.cpp, we never
+ * add new state references during compile.
+ */
+ int index = _mesa_add_state_reference(this->prog->Parameters,
+ (gl_state_index *)slots[i].tokens);
+ gl_constant_value *values =
+ &this->prog->Parameters->ParameterValues[index][0];
+
+ assert(uniforms < uniform_array_size);
+
+ for (unsigned j = 0; j < 4; j++)
+ stage_prog_data->param[uniforms * 4 + j] =
+ &values[GET_SWZ(slots[i].swizzle, j)];
+
+ this->uniform_vector_size[uniforms] =
+ (var->type->is_scalar() || var->type->is_vector() ||
+ var->type->is_matrix() ? var->type->vector_elements : 4);
+
+ nir_uniform_driver_location[uniforms] = var->data.driver_location;
+ uniforms++;
+ }
+}
+
+void
+vec4_visitor::nir_emit_impl(nir_function_impl *impl)
+{
+ nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc);
+
+ foreach_list_typed(nir_register, reg, node, &impl->registers) {
+ unsigned array_elems =
+ reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
+
+ nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems));
+ }
+
+ nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
+
+ nir_emit_cf_list(&impl->body);
+}
+
+void
+vec4_visitor::nir_emit_cf_list(exec_list *list)
+{
+ exec_list_validate(list);
+ foreach_list_typed(nir_cf_node, node, node, list) {
+ switch (node->type) {
+ case nir_cf_node_if:
+ nir_emit_if(nir_cf_node_as_if(node));
+ break;
+
+ case nir_cf_node_loop:
+ nir_emit_loop(nir_cf_node_as_loop(node));
+ break;
+
+ case nir_cf_node_block:
+ nir_emit_block(nir_cf_node_as_block(node));
+ break;
+
+ default:
+ unreachable("Invalid CFG node block");
+ }
+ }
+}
+
+void
+vec4_visitor::nir_emit_if(nir_if *if_stmt)
+{
+ /* First, put the condition in f0 */
+ src_reg condition = get_nir_src(if_stmt->condition, BRW_REGISTER_TYPE_D, 1);
+ vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ emit(IF(BRW_PREDICATE_NORMAL));
+
+ nir_emit_cf_list(&if_stmt->then_list);
+
+ /* note: if the else is empty, dead CF elimination will remove it */
+ emit(BRW_OPCODE_ELSE);
+
+ nir_emit_cf_list(&if_stmt->else_list);
+
+ emit(BRW_OPCODE_ENDIF);
+}
+
+void
+vec4_visitor::nir_emit_loop(nir_loop *loop)
+{
+ emit(BRW_OPCODE_DO);
+
+ nir_emit_cf_list(&loop->body);
+
+ emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::nir_emit_block(nir_block *block)
+{
+ nir_foreach_instr(block, instr) {
+ nir_emit_instr(instr);
+ }
+}
+
+void
+vec4_visitor::nir_emit_instr(nir_instr *instr)
+{
+ this->base_ir = instr;
+
+ switch (instr->type) {
+ case nir_instr_type_load_const:
+ nir_emit_load_const(nir_instr_as_load_const(instr));
+ break;
+
+ case nir_instr_type_intrinsic:
+ nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
+ break;
+
+ case nir_instr_type_alu:
+ nir_emit_alu(nir_instr_as_alu(instr));
+ break;
+
+ case nir_instr_type_jump:
+ nir_emit_jump(nir_instr_as_jump(instr));
+ break;
+
+ case nir_instr_type_tex:
+ nir_emit_texture(nir_instr_as_tex(instr));
+ break;
+
+ default:
+ fprintf(stderr, "VS instruction not yet implemented by NIR->vec4\n");
+ break;
+ }
+}
+
+static dst_reg
+dst_reg_for_nir_reg(vec4_visitor *v, nir_register *nir_reg,
+ unsigned base_offset, nir_src *indirect)
+{
+ dst_reg reg;
+
+ reg = v->nir_locals[nir_reg->index];
+ reg = offset(reg, base_offset);
+ if (indirect) {
+ reg.reladdr =
+ new(v->mem_ctx) src_reg(v->get_nir_src(*indirect,
+ BRW_REGISTER_TYPE_D,
+ 1));
+ }
+ return reg;
+}
+
+dst_reg
+vec4_visitor::get_nir_dest(nir_dest dest)
+{
+ assert(!dest.is_ssa);
+ return dst_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
+ dest.reg.indirect);
+}
+
+dst_reg
+vec4_visitor::get_nir_dest(nir_dest dest, enum brw_reg_type type)
+{
+ return retype(get_nir_dest(dest), type);
+}
+
+dst_reg
+vec4_visitor::get_nir_dest(nir_dest dest, nir_alu_type type)
+{
+ return get_nir_dest(dest, brw_type_for_nir_type(type));
+}
+
+src_reg
+vec4_visitor::get_nir_src(nir_src src, enum brw_reg_type type,
+ unsigned num_components)
+{
+ dst_reg reg;
+
+ if (src.is_ssa) {
+ assert(src.ssa != NULL);
+ reg = nir_ssa_values[src.ssa->index];
+ }
+ else {
+ reg = dst_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
+ src.reg.indirect);
+ }
+
+ reg = retype(reg, type);
+
+ src_reg reg_as_src = src_reg(reg);
+ reg_as_src.swizzle = brw_swizzle_for_size(num_components);
+ return reg_as_src;
+}
+
+src_reg
+vec4_visitor::get_nir_src(nir_src src, nir_alu_type type,
+ unsigned num_components)
+{
+ return get_nir_src(src, brw_type_for_nir_type(type), num_components);
+}
+
+src_reg
+vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
+{
+ /* if type is not specified, default to signed int */
+ return get_nir_src(src, nir_type_int, num_components);
+}
+
+void
+vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
+{
+ dst_reg reg = dst_reg(GRF, alloc.allocate(1));
+ reg.type = BRW_REGISTER_TYPE_F;
+
+ /* @FIXME: consider emitting vector operations to save some MOVs in
+ * cases where the components are representable in 8 bits.
+ * By now, we emit a MOV for each component.
+ */
+ for (unsigned i = 0; i < instr->def.num_components; ++i) {
+ reg.writemask = 1 << i;
+ emit(MOV(reg, src_reg(instr->value.f[i])));
+ }
+
+ /* Set final writemask */
+ reg.writemask = brw_writemask_for_size(instr->def.num_components);
+
+ nir_ssa_values[instr->def.index] = reg;
+}
+
+void
+vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+ dst_reg dest;
+ src_reg src;
+
+ bool has_indirect = false;
+
+ switch (instr->intrinsic) {
+
+ case nir_intrinsic_load_input_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_input: {
+ int offset = instr->const_index[0];
+ src = nir_inputs[offset];
+
+ if (has_indirect) {
+ dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0],
+ BRW_REGISTER_TYPE_D,
+ 1));
+ }
+ dest = get_nir_dest(instr->dest, src.type);
+ dest.writemask = brw_writemask_for_size(instr->num_components);
+
+ emit(MOV(dest, src));
+ break;
+ }
+
+ case nir_intrinsic_store_output_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_store_output: {
+ int varying = instr->const_index[0];
+
+ src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F,
+ instr->num_components);
+ dest = dst_reg(src);
+
+ if (has_indirect) {
+ dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[1],
+ BRW_REGISTER_TYPE_D,
+ 1));
+ }
+ output_reg[varying] = dest;
+ break;
+ }
+
+ case nir_intrinsic_load_vertex_id:
+ unreachable("should be lowered by lower_vertex_id()");
+
+ case nir_intrinsic_load_vertex_id_zero_base: {
+ src_reg vertex_id =
+ src_reg(nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);
+ assert(vertex_id.file != BAD_FILE);
+ dest = get_nir_dest(instr->dest, vertex_id.type);
+ emit(MOV(dest, vertex_id));
+ break;
+ }
+
+ case nir_intrinsic_load_base_vertex: {
+ src_reg base_vertex =
+ src_reg(nir_system_values[SYSTEM_VALUE_BASE_VERTEX]);
+ assert(base_vertex.file != BAD_FILE);
+ dest = get_nir_dest(instr->dest, base_vertex.type);
+ emit(MOV(dest, base_vertex));
+ break;
+ }
+
+ case nir_intrinsic_load_instance_id: {
+ src_reg instance_id =
+ src_reg(nir_system_values[SYSTEM_VALUE_INSTANCE_ID]);
+ assert(instance_id.file != BAD_FILE);
+ dest = get_nir_dest(instr->dest, instance_id.type);
+ emit(MOV(dest, instance_id));
+ break;
+ }
+
+ case nir_intrinsic_load_uniform_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_uniform: {
+ int uniform = instr->const_index[0];
+
+ dest = get_nir_dest(instr->dest);
+
+ if (has_indirect) {
+ /* Split addressing into uniform and offset */
+ int offset = uniform - nir_uniform_driver_location[uniform];
+ assert(offset >= 0);
+
+ uniform -= offset;
+ assert(uniform >= 0);
+
+ src = src_reg(dst_reg(UNIFORM, uniform));
+ src.reg_offset = offset;
+ src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
+ src.reladdr = new(mem_ctx) src_reg(tmp);
+ } else {
+ src = src_reg(dst_reg(UNIFORM, uniform));
+ }
+
+ emit(MOV(dest, src));
+ break;
+ }
+
+ case nir_intrinsic_atomic_counter_read:
+ case nir_intrinsic_atomic_counter_inc:
+ case nir_intrinsic_atomic_counter_dec: {
+ unsigned surf_index = prog_data->base.binding_table.abo_start +
+ (unsigned) instr->const_index[0];
+ src_reg offset = get_nir_src(instr->src[0], nir_type_int,
+ instr->num_components);
+ dest = get_nir_dest(instr->dest);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_atomic_counter_inc:
+ emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
+ src_reg(), src_reg());
+ break;
+ case nir_intrinsic_atomic_counter_dec:
+ emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
+ src_reg(), src_reg());
+ break;
+ case nir_intrinsic_atomic_counter_read:
+ emit_untyped_surface_read(surf_index, dest, offset);
+ break;
+ default:
+ unreachable("Unreachable");
+ }
+
+ brw_mark_surface_used(stage_prog_data, surf_index);
+ break;
+ }
+
+ case nir_intrinsic_load_ubo_indirect:
+ has_indirect = true;
+ /* fallthrough */
+ case nir_intrinsic_load_ubo: {
+ nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]);
+ src_reg surf_index;
+
+ dest = get_nir_dest(instr->dest);
+
+ if (const_block_index) {
+ /* The block index is a constant, so just emit the binding table entry
+ * as an immediate.
+ */
+ surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+ const_block_index->u[0]);
+ } else {
+ /* The block index is not a constant. Evaluate the index expression
+ * per-channel and add the base UBO index; we have to select a value
+ * from any live channel.
+ */
+ surf_index = src_reg(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], nir_type_int,
+ instr->num_components),
+ src_reg(prog_data->base.binding_table.ubo_start)));
+ surf_index = emit_uniformize(surf_index);
+
+ /* Assume this may touch any UBO. It would be nice to provide
+ * a tighter bound, but the array information is already lowered away.
+ */
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.ubo_start +
+ shader_prog->NumUniformBlocks - 1);
+ }
+
+ unsigned const_offset = instr->const_index[0];
+ src_reg offset;
+
+ if (!has_indirect) {
+ offset = src_reg(const_offset / 16);
+ } else {
+ offset = src_reg(this, glsl_type::uint_type);
+ emit(SHR(dst_reg(offset), get_nir_src(instr->src[1], nir_type_int, 1),
+ src_reg(4u)));
+ }
+
+ src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
+ packed_consts.type = dest.type;
+
+ emit_pull_constant_load_reg(dst_reg(packed_consts),
+ surf_index,
+ offset,
+ NULL, NULL /* before_block/inst */);
+
+ packed_consts.swizzle = brw_swizzle_for_size(instr->num_components);
+ packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4);
+
+ emit(MOV(dest, packed_consts));
+ break;
+ }
+
+ default:
+ unreachable("Unknown intrinsic");
+ }
+}
+
+static unsigned
+brw_swizzle_for_nir_swizzle(uint8_t swizzle[4])
+{
+ return BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+}
+
+static enum brw_conditional_mod
+brw_conditional_for_nir_comparison(nir_op op)
+{
+ switch (op) {
+ case nir_op_flt:
+ case nir_op_ilt:
+ case nir_op_ult:
+ return BRW_CONDITIONAL_L;
+
+ case nir_op_fge:
+ case nir_op_ige:
+ case nir_op_uge:
+ return BRW_CONDITIONAL_GE;
+
+ case nir_op_feq:
+ case nir_op_ieq:
+ case nir_op_ball_fequal2:
+ case nir_op_ball_iequal2:
+ case nir_op_ball_fequal3:
+ case nir_op_ball_iequal3:
+ case nir_op_ball_fequal4:
+ case nir_op_ball_iequal4:
+ return BRW_CONDITIONAL_Z;
+
+ case nir_op_fne:
+ case nir_op_ine:
+ case nir_op_bany_fnequal2:
+ case nir_op_bany_inequal2:
+ case nir_op_bany_fnequal3:
+ case nir_op_bany_inequal3:
+ case nir_op_bany_fnequal4:
+ case nir_op_bany_inequal4:
+ return BRW_CONDITIONAL_NZ;
+
+ default:
+ unreachable("not reached: bad operation for comparison");
+ }
+}
+
+void
+vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
+{
+ vec4_instruction *inst;
+
+ dst_reg dst = get_nir_dest(instr->dest.dest,
+ nir_op_infos[instr->op].output_type);
+ dst.writemask = instr->dest.write_mask;
+
+ src_reg op[4];
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+ op[i] = get_nir_src(instr->src[i].src,
+ nir_op_infos[instr->op].input_types[i], 4);
+ op[i].swizzle = brw_swizzle_for_nir_swizzle(instr->src[i].swizzle);
+ op[i].abs = instr->src[i].abs;
+ op[i].negate = instr->src[i].negate;
+ }
+
+ switch (instr->op) {
+ case nir_op_imov:
+ case nir_op_fmov:
+ inst = emit(MOV(dst, op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ unreachable("not reached: should be handled by lower_vec_to_movs()");
+
+ case nir_op_i2f:
+ case nir_op_u2f:
+ inst = emit(MOV(dst, op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_f2i:
+ case nir_op_f2u:
+ inst = emit(MOV(dst, op[0]));
+ break;
+
+ case nir_op_fadd:
+ /* fall through */
+ case nir_op_iadd:
+ inst = emit(ADD(dst, op[0], op[1]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fmul:
+ inst = emit(MUL(dst, op[0], op[1]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_imul: {
+ if (devinfo->gen < 8) {
+ nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
+ nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
+
+ /* For integer multiplication, the MUL uses the low 16 bits of one of
+ * the operands (src0 through SNB, src1 on IVB and later). The MACH
+ * accumulates in the contribution of the upper 16 bits of that
+ * operand. If we can determine that one of the args is in the low
+ * 16 bits, though, we can just emit a single MUL.
+ */
+ if (value0 && value0->u[0] < (1 << 16)) {
+ if (devinfo->gen < 7)
+ emit(MUL(dst, op[0], op[1]));
+ else
+ emit(MUL(dst, op[1], op[0]));
+ } else if (value1 && value1->u[0] < (1 << 16)) {
+ if (devinfo->gen < 7)
+ emit(MUL(dst, op[1], op[0]));
+ else
+ emit(MUL(dst, op[0], op[1]));
+ } else {
+ struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+ emit(MUL(acc, op[0], op[1]));
+ emit(MACH(dst_null_d(), op[0], op[1]));
+ emit(MOV(dst, src_reg(acc)));
+ }
+ } else {
+ emit(MUL(dst, op[0], op[1]));
+ }
+ break;
+ }
+
+ case nir_op_imul_high:
+ case nir_op_umul_high: {
+ struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+ emit(MUL(acc, op[0], op[1]));
+ emit(MACH(dst, op[0], op[1]));
+ break;
+ }
+
+ case nir_op_frcp:
+ inst = emit_math(SHADER_OPCODE_RCP, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fexp2:
+ inst = emit_math(SHADER_OPCODE_EXP2, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_flog2:
+ inst = emit_math(SHADER_OPCODE_LOG2, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fsin:
+ inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fcos:
+ inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_idiv:
+ case nir_op_udiv:
+ emit_math(SHADER_OPCODE_INT_QUOTIENT, dst, op[0], op[1]);
+ break;
+
+ case nir_op_umod:
+ emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
+ break;
+
+ case nir_op_ldexp:
+ unreachable("not reached: should be handled by ldexp_to_arith()");
+
+ case nir_op_fsqrt:
+ inst = emit_math(SHADER_OPCODE_SQRT, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_frsq:
+ inst = emit_math(SHADER_OPCODE_RSQ, dst, op[0]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fpow:
+ inst = emit_math(SHADER_OPCODE_POW, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_uadd_carry: {
+ struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+
+ emit(ADDC(dst_null_ud(), op[0], op[1]));
+ emit(MOV(dst, src_reg(acc)));
+ break;
+ }
+
+ case nir_op_usub_borrow: {
+ struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+
+ emit(SUBB(dst_null_ud(), op[0], op[1]));
+ emit(MOV(dst, src_reg(acc)));
+ break;
+ }
+
+ case nir_op_ftrunc:
+ inst = emit(RNDZ(dst, op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fceil: {
+ src_reg tmp = src_reg(this, glsl_type::float_type);
+ tmp.swizzle =
+ brw_swizzle_for_size(instr->src[0].src.is_ssa ?
+ instr->src[0].src.ssa->num_components :
+ instr->src[0].src.reg.reg->num_components);
+
+ op[0].negate = !op[0].negate;
+ emit(RNDD(dst_reg(tmp), op[0]));
+ tmp.negate = true;
+ inst = emit(MOV(dst, tmp));
+ inst->saturate = instr->dest.saturate;
+ break;
+ }
+
+ case nir_op_ffloor:
+ inst = emit(RNDD(dst, op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_ffract:
+ inst = emit(FRC(dst, op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fround_even:
+ inst = emit(RNDE(dst, op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fmin:
+ case nir_op_imin:
+ case nir_op_umin:
+ inst = emit_minmax(BRW_CONDITIONAL_L, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fmax:
+ case nir_op_imax:
+ case nir_op_umax:
+ inst = emit_minmax(BRW_CONDITIONAL_GE, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fddx:
+ case nir_op_fddx_coarse:
+ case nir_op_fddx_fine:
+ case nir_op_fddy:
+ case nir_op_fddy_coarse:
+ case nir_op_fddy_fine:
+ unreachable("derivatives are not valid in vertex shaders");
+
+ case nir_op_flt:
+ case nir_op_ilt:
+ case nir_op_ult:
+ case nir_op_fge:
+ case nir_op_ige:
+ case nir_op_uge:
+ case nir_op_feq:
+ case nir_op_ieq:
+ case nir_op_fne:
+ case nir_op_ine:
+ emit(CMP(dst, op[0], op[1],
+ brw_conditional_for_nir_comparison(instr->op)));
+ break;
+
+ case nir_op_ball_fequal2:
+ case nir_op_ball_iequal2:
+ case nir_op_ball_fequal3:
+ case nir_op_ball_iequal3:
+ case nir_op_ball_fequal4:
+ case nir_op_ball_iequal4: {
+ dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+
+ switch (instr->op) {
+ case nir_op_ball_fequal2:
+ case nir_op_ball_iequal2:
+ tmp.writemask = WRITEMASK_XY;
+ break;
+ case nir_op_ball_fequal3:
+ case nir_op_ball_iequal3:
+ tmp.writemask = WRITEMASK_XYZ;
+ break;
+ case nir_op_ball_fequal4:
+ case nir_op_ball_iequal4:
+ tmp.writemask = WRITEMASK_XYZW;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ emit(CMP(tmp, op[0], op[1],
+ brw_conditional_for_nir_comparison(instr->op)));
+ emit(MOV(dst, src_reg(0)));
+ inst = emit(MOV(dst, src_reg(~0)));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+ break;
+ }
+
+ case nir_op_bany_fnequal2:
+ case nir_op_bany_inequal2:
+ case nir_op_bany_fnequal3:
+ case nir_op_bany_inequal3:
+ case nir_op_bany_fnequal4:
+ case nir_op_bany_inequal4: {
+ dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+
+ switch (instr->op) {
+ case nir_op_bany_fnequal2:
+ case nir_op_bany_inequal2:
+ tmp.writemask = WRITEMASK_XY;
+ break;
+ case nir_op_bany_fnequal3:
+ case nir_op_bany_inequal3:
+ tmp.writemask = WRITEMASK_XYZ;
+ break;
+ case nir_op_bany_fnequal4:
+ case nir_op_bany_inequal4:
+ tmp.writemask = WRITEMASK_XYZW;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ emit(CMP(tmp, op[0], op[1],
+ brw_conditional_for_nir_comparison(instr->op)));
+
+ emit(MOV(dst, src_reg(0)));
+ inst = emit(MOV(dst, src_reg(~0)));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ break;
+ }
+
+ case nir_op_inot:
+ if (devinfo->gen >= 8) {
+ op[0] = resolve_source_modifiers(op[0]);
+ }
+ emit(NOT(dst, op[0]));
+ break;
+
+ case nir_op_ixor:
+ if (devinfo->gen >= 8) {
+ op[0] = resolve_source_modifiers(op[0]);
+ op[1] = resolve_source_modifiers(op[1]);
+ }
+ emit(XOR(dst, op[0], op[1]));
+ break;
+
+ case nir_op_ior:
+ if (devinfo->gen >= 8) {
+ op[0] = resolve_source_modifiers(op[0]);
+ op[1] = resolve_source_modifiers(op[1]);
+ }
+ emit(OR(dst, op[0], op[1]));
+ break;
+
+ case nir_op_iand:
+ if (devinfo->gen >= 8) {
+ op[0] = resolve_source_modifiers(op[0]);
+ op[1] = resolve_source_modifiers(op[1]);
+ }
+ emit(AND(dst, op[0], op[1]));
+ break;
+
+ case nir_op_b2i:
+ emit(AND(dst, op[0], src_reg(1)));
+ break;
+
+ case nir_op_b2f:
+ op[0].type = BRW_REGISTER_TYPE_D;
+ dst.type = BRW_REGISTER_TYPE_D;
+ emit(AND(dst, op[0], src_reg(0x3f800000u)));
+ dst.type = BRW_REGISTER_TYPE_F;
+ break;
+
+ case nir_op_f2b:
+ emit(CMP(dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+ break;
+
+ case nir_op_i2b:
+ emit(CMP(dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ break;
+
+ case nir_op_fnoise1_1:
+ case nir_op_fnoise1_2:
+ case nir_op_fnoise1_3:
+ case nir_op_fnoise1_4:
+ case nir_op_fnoise2_1:
+ case nir_op_fnoise2_2:
+ case nir_op_fnoise2_3:
+ case nir_op_fnoise2_4:
+ case nir_op_fnoise3_1:
+ case nir_op_fnoise3_2:
+ case nir_op_fnoise3_3:
+ case nir_op_fnoise3_4:
+ case nir_op_fnoise4_1:
+ case nir_op_fnoise4_2:
+ case nir_op_fnoise4_3:
+ case nir_op_fnoise4_4:
+ unreachable("not reached: should be handled by lower_noise");
+
+ case nir_op_unpack_half_2x16_split_x:
+ case nir_op_unpack_half_2x16_split_y:
+ case nir_op_pack_half_2x16_split:
+ unreachable("not reached: should not occur in vertex shader");
+
+ case nir_op_unpack_snorm_2x16:
+ case nir_op_unpack_unorm_2x16:
+ case nir_op_pack_snorm_2x16:
+ case nir_op_pack_unorm_2x16:
+ unreachable("not reached: should be handled by lower_packing_builtins");
+
+ case nir_op_unpack_half_2x16:
+ /* As NIR does not guarantee that we have a correct swizzle outside the
+ * boundaries of a vector, and the implementation of emit_unpack_half_2x16
+ * uses the source operand in an operation with WRITEMASK_Y while our
+ * source operand has only size 1, it accessed incorrect data producing
+ * regressions in Piglit. We repeat the swizzle of the first component on the
+ * rest of components to avoid regressions. In the vec4_visitor IR code path
+ * this is not needed because the operand has already the correct swizzle.
+ */
+ op[0].swizzle = brw_compose_swizzle(BRW_SWIZZLE_XXXX, op[0].swizzle);
+ emit_unpack_half_2x16(dst, op[0]);
+ break;
+
+ case nir_op_pack_half_2x16:
+ emit_pack_half_2x16(dst, op[0]);
+ break;
+
+ case nir_op_unpack_unorm_4x8:
+ emit_unpack_unorm_4x8(dst, op[0]);
+ break;
+
+ case nir_op_pack_unorm_4x8:
+ emit_pack_unorm_4x8(dst, op[0]);
+ break;
+
+ case nir_op_unpack_snorm_4x8:
+ emit_unpack_snorm_4x8(dst, op[0]);
+ break;
+
+ case nir_op_pack_snorm_4x8:
+ emit_pack_snorm_4x8(dst, op[0]);
+ break;
+
+ case nir_op_bitfield_reverse:
+ emit(BFREV(dst, op[0]));
+ break;
+
+ case nir_op_bit_count:
+ emit(CBIT(dst, op[0]));
+ break;
+
+ case nir_op_ufind_msb:
+ case nir_op_ifind_msb: {
+ src_reg temp = src_reg(this, glsl_type::uint_type);
+
+ inst = emit(FBH(dst_reg(temp), op[0]));
+ inst->dst.writemask = WRITEMASK_XYZW;
+
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+ * subtract the result from 31 to convert the MSB count into an LSB count.
+ */
+
+ /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+ temp.swizzle = BRW_SWIZZLE_NOOP;
+ emit(MOV(dst, temp));
+
+ src_reg src_tmp = src_reg(dst);
+ emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
+
+ src_tmp.negate = true;
+ inst = emit(ADD(dst, src_tmp, src_reg(31)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
+
+ case nir_op_find_lsb:
+ emit(FBL(dst, op[0]));
+ break;
+
+ case nir_op_ubitfield_extract:
+ case nir_op_ibitfield_extract:
+ op[0] = fix_3src_operand(op[0]);
+ op[1] = fix_3src_operand(op[1]);
+ op[2] = fix_3src_operand(op[2]);
+
+ emit(BFE(dst, op[2], op[1], op[0]));
+ break;
+
+ case nir_op_bfm:
+ emit(BFI1(dst, op[0], op[1]));
+ break;
+
+ case nir_op_bfi:
+ op[0] = fix_3src_operand(op[0]);
+ op[1] = fix_3src_operand(op[1]);
+ op[2] = fix_3src_operand(op[2]);
+
+ emit(BFI2(dst, op[0], op[1], op[2]));
+ break;
+
+ case nir_op_bitfield_insert:
+ unreachable("not reached: should be handled by "
+ "lower_instructions::bitfield_insert_to_bfm_bfi");
+
+ case nir_op_fsign:
+ /* AND(val, 0x80000000) gives the sign bit.
+ *
+ * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+ * zero.
+ */
+ emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ dst.type = BRW_REGISTER_TYPE_UD;
+ emit(AND(dst, op[0], src_reg(0x80000000u)));
+
+ inst = emit(OR(dst, src_reg(dst), src_reg(0x3f800000u)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ dst.type = BRW_REGISTER_TYPE_F;
+
+ if (instr->dest.saturate) {
+ inst = emit(MOV(dst, src_reg(dst)));
+ inst->saturate = true;
+ }
+ break;
+
+ case nir_op_isign:
+ /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+ * -> non-negative val generates 0x00000000.
+ * Predicated OR sets 1 if val is positive.
+ */
+ emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
+ emit(ASR(dst, op[0], src_reg(31)));
+ inst = emit(OR(dst, src_reg(dst), src_reg(1)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+
+ case nir_op_ishl:
+ emit(SHL(dst, op[0], op[1]));
+ break;
+
+ case nir_op_ishr:
+ emit(ASR(dst, op[0], op[1]));
+ break;
+
+ case nir_op_ushr:
+ emit(SHR(dst, op[0], op[1]));
+ break;
+
+ case nir_op_ffma:
+ op[0] = fix_3src_operand(op[0]);
+ op[1] = fix_3src_operand(op[1]);
+ op[2] = fix_3src_operand(op[2]);
+
+ inst = emit(MAD(dst, op[2], op[1], op[0]));
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_flrp:
+ inst = emit_lrp(dst, op[0], op[1], op[2]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_bcsel:
+ emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+
+ case nir_op_fdot2:
+ inst = emit(BRW_OPCODE_DP2, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fdot3:
+ inst = emit(BRW_OPCODE_DP3, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_fdot4:
+ inst = emit(BRW_OPCODE_DP4, dst, op[0], op[1]);
+ inst->saturate = instr->dest.saturate;
+ break;
+
+ case nir_op_bany2:
+ case nir_op_bany3:
+ case nir_op_bany4: {
+ dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+ tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]);
+
+ emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+
+ emit(MOV(dst, src_reg(0)));
+ inst = emit(MOV(dst, src_reg(~0)));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ break;
+ }
+
+ case nir_op_fabs:
+ case nir_op_iabs:
+ case nir_op_fneg:
+ case nir_op_ineg:
+ case nir_op_fsat:
+ unreachable("not reached: should be lowered by lower_source mods");
+
+ case nir_op_fdiv:
+ unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler");
+
+ case nir_op_fmod:
+ unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler");
+
+ case nir_op_fsub:
+ case nir_op_isub:
+ unreachable("not reached: should be handled by ir_sub_to_add_neg");
+
+ default:
+ unreachable("Unimplemented ALU operation");
+ }
+
+ /* If we need to do a boolean resolve, replace the result with -(x & 1)
+ * to sign extend the low bit to 0/~0
+ */
+ if (devinfo->gen <= 5 &&
+ (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) ==
+ BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
+ dst_reg masked = dst_reg(this, glsl_type::int_type);
+ masked.writemask = dst.writemask;
+ emit(AND(masked, src_reg(dst), src_reg(1)));
+ src_reg masked_neg = src_reg(masked);
+ masked_neg.negate = true;
+ emit(MOV(retype(dst, BRW_REGISTER_TYPE_D), masked_neg));
+ }
+}
+
+void
+vec4_visitor::nir_emit_jump(nir_jump_instr *instr)
+{
+ switch (instr->type) {
+ case nir_jump_break:
+ emit(BRW_OPCODE_BREAK);
+ break;
+
+ case nir_jump_continue:
+ emit(BRW_OPCODE_CONTINUE);
+ break;
+
+ case nir_jump_return:
+ /* fall through */
+ default:
+ unreachable("unknown jump");
+ }
+}
+
+enum ir_texture_opcode
+ir_texture_opcode_for_nir_texop(nir_texop texop)
+{
+ enum ir_texture_opcode op;
+
+ switch (texop) {
+ case nir_texop_lod: op = ir_lod; break;
+ case nir_texop_query_levels: op = ir_query_levels; break;
+ case nir_texop_tex: op = ir_tex; break;
+ case nir_texop_tg4: op = ir_tg4; break;
+ case nir_texop_txb: op = ir_txb; break;
+ case nir_texop_txd: op = ir_txd; break;
+ case nir_texop_txf: op = ir_txf; break;
+ case nir_texop_txf_ms: op = ir_txf_ms; break;
+ case nir_texop_txl: op = ir_txl; break;
+ case nir_texop_txs: op = ir_txs; break;
+ default:
+ unreachable("unknown texture opcode");
+ }
+
+ return op;
+}
+const glsl_type *
+glsl_type_for_nir_alu_type(nir_alu_type alu_type,
+ unsigned components)
+{
+ switch (alu_type) {
+ case nir_type_float:
+ return glsl_type::vec(components);
+ case nir_type_int:
+ return glsl_type::ivec(components);
+ case nir_type_unsigned:
+ return glsl_type::uvec(components);
+ case nir_type_bool:
+ return glsl_type::bvec(components);
+ default:
+ return glsl_type::error_type;
+ }
+
+ return glsl_type::error_type;
+}
+
+void
+vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
+{
+ unsigned sampler = instr->sampler_index;
+ src_reg sampler_reg = src_reg(sampler);
+ src_reg coordinate;
+ const glsl_type *coord_type = NULL;
+ src_reg shadow_comparitor;
+ src_reg offset_value;
+ src_reg lod, lod2;
+ src_reg sample_index;
+ src_reg mcs;
+
+ const glsl_type *dest_type =
+ glsl_type_for_nir_alu_type(instr->dest_type,
+ nir_tex_instr_dest_size(instr));
+ dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
+
+ /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
+ * emitting anything other than setting up the constant result.
+ */
+ if (instr->op == nir_texop_tg4) {
+ int swiz = GET_SWZ(key->tex.swizzles[sampler], instr->component);
+ if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
+ emit(MOV(dest, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
+ return;
+ }
+ }
+
+ /* Load the texture operation sources */
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_comparitor:
+ shadow_comparitor = get_nir_src(instr->src[i].src,
+ BRW_REGISTER_TYPE_F, 1);
+ break;
+
+ case nir_tex_src_coord: {
+ unsigned src_size = nir_tex_instr_src_size(instr, i);
+
+ switch (instr->op) {
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D,
+ src_size);
+ coord_type = glsl_type::ivec(src_size);
+ break;
+
+ default:
+ coordinate = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
+ src_size);
+ coord_type = glsl_type::vec(src_size);
+ break;
+ }
+ break;
+ }
+
+ case nir_tex_src_ddx:
+ lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
+ nir_tex_instr_src_size(instr, i));
+ break;
+
+ case nir_tex_src_ddy:
+ lod2 = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F,
+ nir_tex_instr_src_size(instr, i));
+ break;
+
+ case nir_tex_src_lod:
+ switch (instr->op) {
+ case nir_texop_txs:
+ case nir_texop_txf:
+ lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
+ break;
+
+ default:
+ lod = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_F, 1);
+ break;
+ }
+ break;
+
+ case nir_tex_src_ms_index: {
+ sample_index = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 1);
+ assert(coord_type != NULL);
+ if (devinfo->gen >= 7 &&
+ key->tex.compressed_multisample_layout_mask & (1<<sampler)) {
+ mcs = emit_mcs_fetch(coord_type, coordinate, sampler_reg);
+ } else {
+ mcs = src_reg(0u);
+ }
+ mcs = retype(mcs, BRW_REGISTER_TYPE_UD);
+ break;
+ }
+
+ case nir_tex_src_offset:
+ offset_value = get_nir_src(instr->src[i].src, BRW_REGISTER_TYPE_D, 2);
+ break;
+
+ case nir_tex_src_sampler_offset: {
+ /* The highest sampler which may be used by this operation is
+ * the last element of the array. Mark it here, because the generator
+ * doesn't have enough information to determine the bound.
+ */
+ uint32_t array_size = instr->sampler_array_size;
+ uint32_t max_used = sampler + array_size - 1;
+ if (instr->op == nir_texop_tg4) {
+ max_used += prog_data->base.binding_table.gather_texture_start;
+ } else {
+ max_used += prog_data->base.binding_table.texture_start;
+ }
+
+ brw_mark_surface_used(&prog_data->base, max_used);
+
+ /* Emit code to evaluate the actual indexing expression */
+ src_reg src = get_nir_src(instr->src[i].src, 1);
+ src_reg temp(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(temp), src, src_reg(sampler)));
+ sampler_reg = emit_uniformize(temp);
+ break;
+ }
+
+ case nir_tex_src_projector:
+ unreachable("Should be lowered by do_lower_texture_projection");
+
+ case nir_tex_src_bias:
+ unreachable("LOD bias is not valid for vertex shaders.\n");
+
+ default:
+ unreachable("unknown texture source");
+ }
+ }
+
+ uint32_t constant_offset = 0;
+ for (unsigned i = 0; i < 3; i++) {
+ if (instr->const_offset[i] != 0) {
+ constant_offset = brw_texture_offset(instr->const_offset, 3);
+ break;
+ }
+ }
+
+ /* Stuff the channel select bits in the top of the texture offset */
+ if (instr->op == nir_texop_tg4)
+ constant_offset |= gather_channel(instr->component, sampler) << 16;
+
+ ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op);
+
+ bool is_cube_array =
+ instr->op == nir_texop_txs &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+ instr->is_array;
+
+ emit_texture(op, dest, dest_type, coordinate, instr->coord_components,
+ shadow_comparitor,
+ lod, lod2, sample_index,
+ constant_offset, offset_value,
+ mcs, is_cube_array, sampler, sampler_reg);
+}
+
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 555c42e2f24..617c9889cad 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -280,15 +280,15 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
*/
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (unsigned int i = 0; i < 3; i++) {
- if (inst->src[i].file == GRF) {
- spill_costs[inst->src[i].reg] += loop_scale;
+ if (inst->src[i].file == GRF) {
+ spill_costs[inst->src[i].reg] += loop_scale;
if (inst->src[i].reladdr)
no_spill[inst->src[i].reg] = true;
- }
+ }
}
if (inst->dst.file == GRF) {
- spill_costs[inst->dst.reg] += loop_scale;
+ spill_costs[inst->dst.reg] += loop_scale;
if (inst->dst.reladdr)
no_spill[inst->dst.reg] = true;
}
@@ -296,12 +296,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
switch (inst->opcode) {
case BRW_OPCODE_DO:
- loop_scale *= 10;
- break;
+ loop_scale *= 10;
+ break;
case BRW_OPCODE_WHILE:
- loop_scale /= 10;
- break;
+ loop_scale /= 10;
+ break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
@@ -309,12 +309,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
if (inst->src[i].file == GRF)
no_spill[inst->src[i].reg] = true;
}
- if (inst->dst.file == GRF)
- no_spill[inst->dst.reg] = true;
- break;
+ if (inst->dst.file == GRF)
+ no_spill[inst->dst.reg] = true;
+ break;
default:
- break;
+ break;
}
}
}
@@ -339,7 +339,7 @@ void
vec4_visitor::spill_reg(int spill_reg_nr)
{
assert(alloc.sizes[spill_reg_nr] == 1);
- unsigned int spill_offset = c->last_scratch++;
+ unsigned int spill_offset = last_scratch++;
/* Generate spill/unspill instructions for the objects being spilled. */
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 236fa51f92c..20b628e9192 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -287,7 +287,7 @@ vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements
}
src_reg
-vec4_visitor::fix_3src_operand(src_reg src)
+vec4_visitor::fix_3src_operand(const src_reg &src)
{
/* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
* able to use vertical stride of zero to replicate the vec4 uniform, like
@@ -313,7 +313,20 @@ vec4_visitor::fix_3src_operand(src_reg src)
}
src_reg
-vec4_visitor::fix_math_operand(src_reg src)
+vec4_visitor::resolve_source_modifiers(const src_reg &src)
+{
+ if (!src.abs && !src.negate)
+ return src;
+
+ dst_reg resolved = dst_reg(this, glsl_type::ivec4_type);
+ resolved.type = src.type;
+ emit(MOV(resolved, src));
+
+ return src_reg(resolved);
+}
+
+src_reg
+vec4_visitor::fix_math_operand(const src_reg &src)
{
if (devinfo->gen < 6 || devinfo->gen >= 8 || src.file == BAD_FILE)
return src;
@@ -338,7 +351,7 @@ vec4_visitor::fix_math_operand(src_reg src)
return src_reg(expanded);
}
-void
+vec4_instruction *
vec4_visitor::emit_math(enum opcode opcode,
const dst_reg &dst,
const src_reg &src0, const src_reg &src1)
@@ -350,11 +363,13 @@ vec4_visitor::emit_math(enum opcode opcode,
/* MATH on Gen6 must be align1, so we can't do writemasks. */
math->dst = dst_reg(this, glsl_type::vec4_type);
math->dst.type = dst.type;
- emit(MOV(dst, src_reg(math->dst)));
+ math = emit(MOV(dst, src_reg(math->dst)));
} else if (devinfo->gen < 6) {
math->base_mrf = 1;
math->mlen = src1.file == BAD_FILE ? 1 : 2;
}
+
+ return math;
}
void
@@ -572,9 +587,18 @@ vec4_visitor::visit_instructions(const exec_list *list)
}
}
-
-static int
-type_size(const struct glsl_type *type)
+/**
+ * Returns the minimum number of vec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single vec4); for matrices, the
+ * number of columns; for array and struct, the sum of the vec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ */
+int
+vec4_visitor::type_size(const struct glsl_type *type)
{
unsigned int i;
int size;
@@ -603,6 +627,9 @@ type_size(const struct glsl_type *type)
size += type_size(type->fields.structure[i].type);
}
return size;
+ case GLSL_TYPE_SUBROUTINE:
+ return 1;
+
case GLSL_TYPE_SAMPLER:
/* Samplers take up no register space, since they're baked in at
* link time.
@@ -611,6 +638,7 @@ type_size(const struct glsl_type *type)
case GLSL_TYPE_ATOMIC_UINT:
return 0;
case GLSL_TYPE_IMAGE:
+ return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
case GLSL_TYPE_VOID:
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_ERROR:
@@ -627,7 +655,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = GRF;
- this->reg = v->alloc.allocate(type_size(type));
+ this->reg = v->alloc.allocate(v->type_size(type));
if (type->is_array() || type->is_record()) {
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -645,7 +673,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
init();
this->file = GRF;
- this->reg = v->alloc.allocate(type_size(type) * size);
+ this->reg = v->alloc.allocate(v->type_size(type) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
@@ -657,7 +685,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = GRF;
- this->reg = v->alloc.allocate(type_size(type));
+ this->reg = v->alloc.allocate(v->type_size(type));
if (type->is_array() || type->is_record()) {
this->writemask = WRITEMASK_XYZW;
@@ -668,6 +696,21 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
this->type = brw_type_for_base_type(type);
}
+void
+vec4_visitor::setup_vector_uniform_values(const gl_constant_value *values,
+ unsigned n)
+{
+ static const gl_constant_value zero = { 0 };
+
+ for (unsigned i = 0; i < n; ++i)
+ stage_prog_data->param[4 * uniforms + i] = &values[i];
+
+ for (unsigned i = n; i < 4; ++i)
+ stage_prog_data->param[4 * uniforms + i] = &zero;
+
+ uniform_vector_size[uniforms++] = n;
+}
+
/* Our support for uniforms is piggy-backed on the struct
* gl_fragment_program, because that's where the values actually
* get stored, rather than in some global gl_shader_program uniform
@@ -697,26 +740,13 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
continue;
}
- gl_constant_value *components = storage->storage;
- unsigned vector_count = (MAX2(storage->array_elements, 1) *
- storage->type->matrix_columns);
-
- for (unsigned s = 0; s < vector_count; s++) {
- assert(uniforms < uniform_array_size);
- uniform_vector_size[uniforms] = storage->type->vector_elements;
-
- int i;
- for (i = 0; i < uniform_vector_size[uniforms]; i++) {
- stage_prog_data->param[uniforms * 4 + i] = components;
- components++;
- }
- for (; i < 4; i++) {
- static gl_constant_value zero = { 0.0 };
- stage_prog_data->param[uniforms * 4 + i] = &zero;
- }
+ const unsigned vector_count = (MAX2(storage->array_elements, 1) *
+ storage->type->matrix_columns);
+ const unsigned vector_size = storage->type->vector_elements;
- uniforms++;
- }
+ for (unsigned s = 0; s < vector_count; s++)
+ setup_vector_uniform_values(&storage->storage[s * vector_size],
+ vector_size);
}
}
@@ -1043,8 +1073,6 @@ vec4_visitor::visit(ir_variable *ir)
for (int i = 0; i < type_size(ir->type); i++) {
output_reg[ir->data.location + i] = *reg;
output_reg[ir->data.location + i].reg_offset = i;
- output_reg[ir->data.location + i].type =
- brw_type_for_base_type(ir->type->get_scalar_type());
output_reg_annotation[ir->data.location + i] = ir->name;
}
break;
@@ -1064,7 +1092,7 @@ vec4_visitor::visit(ir_variable *ir)
* Some uniforms, such as samplers and atomic counters, have no actual
* storage, so we should ignore them.
*/
- if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
+ if (ir->is_in_buffer_block() || type_size(ir->type) == 0)
return;
/* Track how big the whole uniform variable is, in case we need to put a
@@ -1081,7 +1109,7 @@ vec4_visitor::visit(ir_variable *ir)
break;
case ir_var_system_value:
- reg = make_reg_for_system_value(ir);
+ reg = make_reg_for_system_value(ir->data.location, ir->type);
break;
default:
@@ -1253,7 +1281,7 @@ vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
return true;
}
-void
+vec4_instruction *
vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1)
{
@@ -1268,9 +1296,11 @@ vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
inst->predicate = BRW_PREDICATE_NORMAL;
}
+
+ return inst;
}
-void
+vec4_instruction *
vec4_visitor::emit_lrp(const dst_reg &dst,
const src_reg &x, const src_reg &y, const src_reg &a)
{
@@ -1278,8 +1308,8 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
/* Note that the instruction's argument order is reversed from GLSL
* and the IR.
*/
- emit(LRP(dst,
- fix_3src_operand(a), fix_3src_operand(y), fix_3src_operand(x)));
+ return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
+ fix_3src_operand(x)));
} else {
/* Earlier generations don't support three source operations, so we
* need to emit x*(1-a) + y*a.
@@ -1294,7 +1324,7 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
emit(MUL(y_times_a, y, a));
emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
- emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
+ return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
}
}
@@ -1375,15 +1405,19 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
emit(pull);
}
-void
-vec4_visitor::emit_uniformize(const dst_reg &dst, const src_reg &src)
+src_reg
+vec4_visitor::emit_uniformize(const src_reg &src)
{
const src_reg chan_index(this, glsl_type::uint_type);
+ const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
+ src.type);
emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
->force_writemask_all = true;
emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
->force_writemask_all = true;
+
+ return src_reg(dst);
}
void
@@ -1555,6 +1589,10 @@ vec4_visitor::visit(ir_expression *ir)
case ir_unop_noise:
unreachable("not reached: should be handled by lower_noise");
+ case ir_unop_subroutine_to_int:
+ emit(MOV(result_dst, op[0]));
+ break;
+
case ir_binop_add:
emit(ADD(result_dst, op[0], op[1]));
break;
@@ -1602,20 +1640,13 @@ vec4_visitor::visit(ir_expression *ir)
assert(ir->type->is_integer());
emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
break;
- case ir_binop_carry: {
- struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
- emit(ADDC(dst_null_ud(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
- break;
- }
- case ir_binop_borrow: {
- struct brw_reg acc = retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD);
+ case ir_binop_carry:
+ unreachable("Should have been lowered by carry_to_arith().");
+
+ case ir_binop_borrow:
+ unreachable("Should have been lowered by borrow_to_arith().");
- emit(SUBB(dst_null_ud(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
- break;
- }
case ir_binop_mod:
/* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
assert(ir->type->is_integer());
@@ -1734,16 +1765,11 @@ vec4_visitor::visit(ir_expression *ir)
emit(MOV(result_dst, op[0]));
break;
case ir_unop_b2i:
- emit(AND(result_dst, op[0], src_reg(1)));
- break;
case ir_unop_b2f:
if (devinfo->gen <= 5) {
resolve_bool_comparison(ir->operands[0], &op[0]);
}
- op[0].type = BRW_REGISTER_TYPE_D;
- result_dst.type = BRW_REGISTER_TYPE_D;
- emit(AND(result_dst, op[0], src_reg(0x3f800000u)));
- result_dst.type = BRW_REGISTER_TYPE_F;
+ emit(MOV(result_dst, negate(op[0])));
break;
case ir_unop_f2b:
emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
@@ -1839,7 +1865,7 @@ vec4_visitor::visit(ir_expression *ir)
surf_index = src_reg(this, glsl_type::uint_type);
emit(ADD(dst_reg(surf_index), op[0],
src_reg(prog_data->base.binding_table.ubo_start)));
- emit_uniformize(dst_reg(surf_index), surf_index);
+ surf_index = emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -2439,6 +2465,8 @@ vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
src_reg(), src_reg());
}
+
+ brw_mark_surface_used(stage_prog_data, surf_index);
}
void
@@ -2456,7 +2484,8 @@ vec4_visitor::visit(ir_call *ir)
}
src_reg
-vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler)
+vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
+ src_reg coordinate, src_reg sampler)
{
vec4_instruction *inst =
new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
@@ -2483,21 +2512,21 @@ vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler
}
/* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
- int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+ int coord_mask = (1 << coordinate_type->vector_elements) - 1;
int zero_mask = 0xf & ~coord_mask;
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
+ emit(MOV(dst_reg(MRF, param_base, coordinate_type, coord_mask),
coordinate));
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+ emit(MOV(dst_reg(MRF, param_base, coordinate_type, zero_mask),
src_reg(0)));
emit(inst);
return src_reg(inst->dst);
}
-static bool
-is_high_sampler(const struct brw_device_info *devinfo, src_reg sampler)
+bool
+vec4_visitor::is_high_sampler(src_reg sampler)
{
if (devinfo->gen < 8 && !devinfo->is_haswell)
return false;
@@ -2506,6 +2535,183 @@ is_high_sampler(const struct brw_device_info *devinfo, src_reg sampler)
}
void
+vec4_visitor::emit_texture(ir_texture_opcode op,
+ dst_reg dest,
+ const glsl_type *dest_type,
+ src_reg coordinate,
+ int coord_components,
+ src_reg shadow_comparitor,
+ src_reg lod, src_reg lod2,
+ src_reg sample_index,
+ uint32_t constant_offset,
+ src_reg offset_value,
+ src_reg mcs,
+ bool is_cube_array,
+ uint32_t sampler,
+ src_reg sampler_reg)
+{
+ enum opcode opcode;
+ switch (op) {
+ case ir_tex: opcode = SHADER_OPCODE_TXL; break;
+ case ir_txl: opcode = SHADER_OPCODE_TXL; break;
+ case ir_txd: opcode = SHADER_OPCODE_TXD; break;
+ case ir_txf: opcode = SHADER_OPCODE_TXF; break;
+ case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+ case ir_txs: opcode = SHADER_OPCODE_TXS; break;
+ case ir_tg4: opcode = offset_value.file != BAD_FILE
+ ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
+ case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
+ case ir_txb:
+ unreachable("TXB is not valid for vertex shaders.");
+ case ir_lod:
+ unreachable("LOD is not valid for vertex shaders.");
+ default:
+ unreachable("Unrecognized tex op");
+ }
+
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(
+ opcode, dst_reg(this, dest_type));
+
+ inst->offset = constant_offset;
+
+ /* The message header is necessary for:
+ * - Gen4 (always)
+ * - Gen9+ for selecting SIMD4x2
+ * - Texel offsets
+ * - Gather channel selection
+ * - Sampler indices too large to fit in a 4-bit value.
+ */
+ inst->header_size =
+ (devinfo->gen < 5 || devinfo->gen >= 9 ||
+ inst->offset != 0 || op == ir_tg4 ||
+ is_high_sampler(sampler_reg)) ? 1 : 0;
+ inst->base_mrf = 2;
+ inst->mlen = inst->header_size + 1; /* always at least one */
+ inst->dst.writemask = WRITEMASK_XYZW;
+ inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
+
+ inst->src[1] = sampler_reg;
+
+ /* MRF for the first parameter */
+ int param_base = inst->base_mrf + inst->header_size;
+
+ if (op == ir_txs || op == ir_query_levels) {
+ int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
+ emit(MOV(dst_reg(MRF, param_base, lod.type, writemask), lod));
+ } else {
+ /* Load the coordinate */
+ /* FINISHME: gl_clamp_mask and saturate */
+ int coord_mask = (1 << coord_components) - 1;
+ int zero_mask = 0xf & ~coord_mask;
+
+ emit(MOV(dst_reg(MRF, param_base, coordinate.type, coord_mask),
+ coordinate));
+
+ if (zero_mask != 0) {
+ emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
+ src_reg(0)));
+ }
+ /* Load the shadow comparitor */
+ if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
+ emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
+ WRITEMASK_X),
+ shadow_comparitor));
+ inst->mlen++;
+ }
+
+ /* Load the LOD info */
+ if (op == ir_tex || op == ir_txl) {
+ int mrf, writemask;
+ if (devinfo->gen >= 5) {
+ mrf = param_base + 1;
+ if (shadow_comparitor.file != BAD_FILE) {
+ writemask = WRITEMASK_Y;
+ /* mlen already incremented */
+ } else {
+ writemask = WRITEMASK_X;
+ inst->mlen++;
+ }
+ } else /* devinfo->gen == 4 */ {
+ mrf = param_base;
+ writemask = WRITEMASK_W;
+ }
+ lod.swizzle = BRW_SWIZZLE_XXXX;
+ emit(MOV(dst_reg(MRF, mrf, lod.type, writemask), lod));
+ } else if (op == ir_txf) {
+ emit(MOV(dst_reg(MRF, param_base, lod.type, WRITEMASK_W), lod));
+ } else if (op == ir_txf_ms) {
+ emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
+ sample_index));
+ if (devinfo->gen >= 7) {
+ /* MCS data is in the first channel of `mcs`, but we need to get it into
+ * the .y channel of the second vec4 of params, so replicate .x across
+ * the whole vec4 and then mask off everything except .y
+ */
+ mcs.swizzle = BRW_SWIZZLE_XXXX;
+ emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
+ mcs));
+ }
+ inst->mlen++;
+ } else if (op == ir_txd) {
+ const brw_reg_type type = lod.type;
+
+ if (devinfo->gen >= 5) {
+ lod.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
+ lod2.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
+ emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), lod));
+ emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
+ inst->mlen++;
+
+ if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
+ lod.swizzle = BRW_SWIZZLE_ZZZZ;
+ lod2.swizzle = BRW_SWIZZLE_ZZZZ;
+ emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
+ emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
+ inst->mlen++;
+
+ if (shadow_comparitor.file != BAD_FILE) {
+ emit(MOV(dst_reg(MRF, param_base + 2,
+ shadow_comparitor.type, WRITEMASK_Z),
+ shadow_comparitor));
+ }
+ }
+ } else /* devinfo->gen == 4 */ {
+ emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), lod));
+ emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), lod2));
+ inst->mlen += 2;
+ }
+ } else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
+ if (shadow_comparitor.file != BAD_FILE) {
+ emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
+ shadow_comparitor));
+ }
+
+ emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
+ offset_value));
+ inst->mlen++;
+ }
+ }
+
+ emit(inst);
+
+ /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
+ * spec requires layers.
+ */
+ if (op == ir_txs && is_cube_array) {
+ emit_math(SHADER_OPCODE_INT_QUOTIENT,
+ writemask(inst->dst, WRITEMASK_Z),
+ src_reg(inst->dst), src_reg(6));
+ }
+
+ if (devinfo->gen == 6 && op == ir_tg4) {
+ emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
+ }
+
+ swizzle_result(op, dest,
+ src_reg(inst->dst), sampler, dest_type);
+}
+
+void
vec4_visitor::visit(ir_texture *ir)
{
uint32_t sampler =
@@ -2535,11 +2741,9 @@ vec4_visitor::visit(ir_texture *ir)
/* Emit code to evaluate the actual indexing expression */
nonconst_sampler_index->accept(this);
- dst_reg temp(this, glsl_type::uint_type);
- emit(ADD(temp, this->result, src_reg(sampler)));
- emit_uniformize(temp, src_reg(temp));
-
- sampler_reg = src_reg(temp);
+ src_reg temp(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
+ sampler_reg = emit_uniformize(temp);
} else {
/* Single sampler, or constant array index; the indexing expression
* is just an immediate.
@@ -2572,7 +2776,9 @@ vec4_visitor::visit(ir_texture *ir)
* generating these values may involve SEND messages that need the MRFs.
*/
src_reg coordinate;
+ int coord_components = 0;
if (ir->coordinate) {
+ coord_components = ir->coordinate->type->vector_elements;
ir->coordinate->accept(this);
coordinate = this->result;
}
@@ -2590,42 +2796,35 @@ vec4_visitor::visit(ir_texture *ir)
offset_value = src_reg(this->result);
}
- const glsl_type *lod_type = NULL, *sample_index_type = NULL;
- src_reg lod, dPdx, dPdy, sample_index, mcs;
+ src_reg lod, lod2, sample_index, mcs;
switch (ir->op) {
case ir_tex:
lod = src_reg(0.0f);
- lod_type = glsl_type::float_type;
break;
case ir_txf:
case ir_txl:
case ir_txs:
ir->lod_info.lod->accept(this);
lod = this->result;
- lod_type = ir->lod_info.lod->type;
break;
case ir_query_levels:
lod = src_reg(0);
- lod_type = glsl_type::int_type;
break;
case ir_txf_ms:
ir->lod_info.sample_index->accept(this);
sample_index = this->result;
- sample_index_type = ir->lod_info.sample_index->type;
if (devinfo->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
- mcs = emit_mcs_fetch(ir, coordinate, sampler_reg);
+ mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
else
mcs = src_reg(0u);
break;
case ir_txd:
ir->lod_info.grad.dPdx->accept(this);
- dPdx = this->result;
+ lod = this->result;
ir->lod_info.grad.dPdy->accept(this);
- dPdy = this->result;
-
- lod_type = ir->lod_info.grad.dPdx->type;
+ lod2 = this->result;
break;
case ir_txb:
case ir_lod:
@@ -2633,175 +2832,31 @@ vec4_visitor::visit(ir_texture *ir)
break;
}
- enum opcode opcode;
- switch (ir->op) {
- case ir_tex: opcode = SHADER_OPCODE_TXL; break;
- case ir_txl: opcode = SHADER_OPCODE_TXL; break;
- case ir_txd: opcode = SHADER_OPCODE_TXD; break;
- case ir_txf: opcode = SHADER_OPCODE_TXF; break;
- case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
- case ir_txs: opcode = SHADER_OPCODE_TXS; break;
- case ir_tg4: opcode = has_nonconstant_offset
- ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
- case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
- case ir_txb:
- unreachable("TXB is not valid for vertex shaders.");
- case ir_lod:
- unreachable("LOD is not valid for vertex shaders.");
- default:
- unreachable("Unrecognized tex op");
- }
-
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(
- opcode, dst_reg(this, ir->type));
-
+ uint32_t constant_offset = 0;
if (ir->offset != NULL && !has_nonconstant_offset) {
- inst->offset =
+ constant_offset =
brw_texture_offset(ir->offset->as_constant()->value.i,
ir->offset->type->vector_elements);
}
/* Stuff the channel select bits in the top of the texture offset */
if (ir->op == ir_tg4)
- inst->offset |= gather_channel(ir, sampler) << 16;
-
- /* The message header is necessary for:
- * - Gen4 (always)
- * - Gen9+ for selecting SIMD4x2
- * - Texel offsets
- * - Gather channel selection
- * - Sampler indices too large to fit in a 4-bit value.
- */
- inst->header_size =
- (devinfo->gen < 5 || devinfo->gen >= 9 ||
- inst->offset != 0 || ir->op == ir_tg4 ||
- is_high_sampler(devinfo, sampler_reg)) ? 1 : 0;
- inst->base_mrf = 2;
- inst->mlen = inst->header_size + 1; /* always at least one */
- inst->dst.writemask = WRITEMASK_XYZW;
- inst->shadow_compare = ir->shadow_comparitor != NULL;
-
- inst->src[1] = sampler_reg;
-
- /* MRF for the first parameter */
- int param_base = inst->base_mrf + inst->header_size;
-
- if (ir->op == ir_txs || ir->op == ir_query_levels) {
- int writemask = devinfo->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
- emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
- } else {
- /* Load the coordinate */
- /* FINISHME: gl_clamp_mask and saturate */
- int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
- int zero_mask = 0xf & ~coord_mask;
-
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
- coordinate));
-
- if (zero_mask != 0) {
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
- src_reg(0)));
- }
- /* Load the shadow comparitor */
- if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) {
- emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
- WRITEMASK_X),
- shadow_comparitor));
- inst->mlen++;
- }
+ constant_offset |=
+ gather_channel( ir->lod_info.component->as_constant()->value.i[0],
+ sampler) << 16;
- /* Load the LOD info */
- if (ir->op == ir_tex || ir->op == ir_txl) {
- int mrf, writemask;
- if (devinfo->gen >= 5) {
- mrf = param_base + 1;
- if (ir->shadow_comparitor) {
- writemask = WRITEMASK_Y;
- /* mlen already incremented */
- } else {
- writemask = WRITEMASK_X;
- inst->mlen++;
- }
- } else /* devinfo->gen == 4 */ {
- mrf = param_base;
- writemask = WRITEMASK_W;
- }
- emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
- } else if (ir->op == ir_txf) {
- emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
- } else if (ir->op == ir_txf_ms) {
- emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
- sample_index));
- if (devinfo->gen >= 7) {
- /* MCS data is in the first channel of `mcs`, but we need to get it into
- * the .y channel of the second vec4 of params, so replicate .x across
- * the whole vec4 and then mask off everything except .y
- */
- mcs.swizzle = BRW_SWIZZLE_XXXX;
- emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
- mcs));
- }
- inst->mlen++;
- } else if (ir->op == ir_txd) {
- const glsl_type *type = lod_type;
+ glsl_type const *type = ir->sampler->type;
+ bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+ type->sampler_array;
- if (devinfo->gen >= 5) {
- dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
- dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
- emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
- emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
- inst->mlen++;
-
- if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
- dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
- dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
- emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
- emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
- inst->mlen++;
-
- if (ir->shadow_comparitor) {
- emit(MOV(dst_reg(MRF, param_base + 2,
- ir->shadow_comparitor->type, WRITEMASK_Z),
- shadow_comparitor));
- }
- }
- } else /* devinfo->gen == 4 */ {
- emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
- emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
- inst->mlen += 2;
- }
- } else if (ir->op == ir_tg4 && has_nonconstant_offset) {
- if (ir->shadow_comparitor) {
- emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W),
- shadow_comparitor));
- }
-
- emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
- offset_value));
- inst->mlen++;
- }
- }
-
- emit(inst);
-
- /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
- * spec requires layers.
- */
- if (ir->op == ir_txs) {
- glsl_type const *type = ir->sampler->type;
- if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
- type->sampler_array) {
- emit_math(SHADER_OPCODE_INT_QUOTIENT,
- writemask(inst->dst, WRITEMASK_Z),
- src_reg(inst->dst), src_reg(6));
- }
- }
-
- if (devinfo->gen == 6 && ir->op == ir_tg4) {
- emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], inst->dst);
- }
+ this->result = src_reg(this, ir->type);
+ dst_reg dest = dst_reg(this->result);
- swizzle_result(ir, src_reg(inst->dst), sampler);
+ emit_texture(ir->op, dest, ir->type, coordinate, coord_components,
+ shadow_comparitor,
+ lod, lod2, sample_index,
+ constant_offset, offset_value,
+ mcs, is_cube_array, sampler, sampler_reg);
}
/**
@@ -2835,10 +2890,9 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
* Set up the gather channel based on the swizzle, for gather4.
*/
uint32_t
-vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler)
+vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
{
- ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+ int swiz = GET_SWZ(key->tex.swizzles[sampler], gather_component);
switch (swiz) {
case SWIZZLE_X: return 0;
case SWIZZLE_Y:
@@ -2856,22 +2910,23 @@ vec4_visitor::gather_channel(ir_texture *ir, uint32_t sampler)
}
void
-vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, uint32_t sampler)
+vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
+ src_reg orig_val, uint32_t sampler,
+ const glsl_type *dest_type)
{
int s = key->tex.swizzles[sampler];
- this->result = src_reg(this, ir->type);
- dst_reg swizzled_result(this->result);
+ dst_reg swizzled_result = dest;
- if (ir->op == ir_query_levels) {
+ if (op == ir_query_levels) {
/* # levels is in .w */
orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
emit(MOV(swizzled_result, orig_val));
return;
}
- if (ir->op == ir_txs || ir->type == glsl_type::float_type
- || s == SWIZZLE_NOOP || ir->op == ir_tg4) {
+ if (op == ir_txs || dest_type == glsl_type::float_type
+ || s == SWIZZLE_NOOP || op == ir_tg4) {
emit(MOV(swizzled_result, orig_val));
return;
}
@@ -2954,12 +3009,25 @@ vec4_visitor::visit(ir_if *ir)
}
void
+vec4_visitor::gs_emit_vertex(int stream_id)
+{
+ unreachable("not reached");
+}
+
+void
vec4_visitor::visit(ir_emit_vertex *)
{
unreachable("not reached");
}
void
+vec4_visitor::gs_end_primitive()
+{
+ unreachable("not reached");
+}
+
+
+void
vec4_visitor::visit(ir_end_primitive *)
{
unreachable("not reached");
@@ -3094,6 +3162,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
vec4_instruction *inst;
inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
inst->predicate = BRW_PREDICATE_NORMAL;
+ output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
inst->predicate = BRW_PREDICATE_NORMAL;
}
@@ -3106,18 +3175,23 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg)
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
dst_reg reg_w = reg;
reg_w.writemask = WRITEMASK_W;
- emit(MOV(reg_w, src_reg(output_reg[VARYING_SLOT_PSIZ])));
+ src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+ reg_as_src.type = reg_w.type;
+ reg_as_src.swizzle = brw_swizzle_for_size(1);
+ emit(MOV(reg_w, reg_as_src));
}
if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
dst_reg reg_y = reg;
reg_y.writemask = WRITEMASK_Y;
reg_y.type = BRW_REGISTER_TYPE_D;
+ output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
}
if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
dst_reg reg_z = reg;
reg_z.writemask = WRITEMASK_Z;
reg_z.type = BRW_REGISTER_TYPE_D;
+ output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
}
}
@@ -3155,8 +3229,8 @@ vec4_visitor::emit_clip_distances(dst_reg reg, int offset)
vec4_instruction *
vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
{
- assert (varying < VARYING_SLOT_MAX);
- reg.type = output_reg[varying].type;
+ assert(varying < VARYING_SLOT_MAX);
+ assert(output_reg[varying].type == reg.type);
current_annotation = output_reg_annotation[varying];
/* Copy the register, saturating if necessary */
return emit(MOV(reg, src_reg(output_reg[varying])));
@@ -3166,6 +3240,7 @@ void
vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
{
reg.type = BRW_REGISTER_TYPE_F;
+ output_reg[varying].type = reg.type;
switch (varying) {
case VARYING_SLOT_PSIZ:
@@ -3422,7 +3497,8 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
inst->dst.writemask));
vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
- write->predicate = inst->predicate;
+ if (inst->opcode != BRW_OPCODE_SEL)
+ write->predicate = inst->predicate;
write->ir = inst->ir;
write->annotation = inst->annotation;
inst->insert_after(block, write);
@@ -3485,16 +3561,16 @@ vec4_visitor::move_grf_array_access_to_scratch()
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
if (inst->dst.file == GRF && inst->dst.reladdr) {
if (scratch_loc[inst->dst.reg] == -1) {
- scratch_loc[inst->dst.reg] = c->last_scratch;
- c->last_scratch += this->alloc.sizes[inst->dst.reg];
+ scratch_loc[inst->dst.reg] = last_scratch;
+ last_scratch += this->alloc.sizes[inst->dst.reg];
}
for (src_reg *iter = inst->dst.reladdr;
iter->reladdr;
iter = iter->reladdr) {
if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
- scratch_loc[iter->reg] = c->last_scratch;
- c->last_scratch += this->alloc.sizes[iter->reg];
+ scratch_loc[iter->reg] = last_scratch;
+ last_scratch += this->alloc.sizes[iter->reg];
}
}
}
@@ -3504,8 +3580,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
iter->reladdr;
iter = iter->reladdr) {
if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
- scratch_loc[iter->reg] = c->last_scratch;
- c->last_scratch += this->alloc.sizes[iter->reg];
+ scratch_loc[iter->reg] = last_scratch;
+ last_scratch += this->alloc.sizes[iter->reg];
}
}
}
@@ -3679,7 +3755,7 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
}
vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
- struct brw_vec4_compile *c,
+ void *log_data,
struct gl_program *prog,
const struct brw_vue_prog_key *key,
struct brw_vue_prog_data *prog_data,
@@ -3688,9 +3764,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *mem_ctx,
bool no_spills,
int shader_time_index)
- : backend_shader(compiler, NULL, mem_ctx,
+ : backend_shader(compiler, log_data, mem_ctx,
shader_prog, prog, &prog_data->base, stage),
- c(c),
key(key),
prog_data(prog_data),
sanity_param_count(0),
@@ -3698,7 +3773,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
first_non_payload_grf(0),
need_all_constants_in_pull_buffer(false),
no_spills(no_spills),
- shader_time_index(shader_time_index)
+ shader_time_index(shader_time_index),
+ last_scratch(0)
{
this->failed = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
index dcbd2405078..d1a72d787e7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
@@ -394,8 +394,7 @@ vec4_vs_visitor::emit_program_code()
* pull constants. Do that now.
*/
if (this->need_all_constants_in_pull_buffer) {
- const struct gl_program_parameter_list *params =
- vs_compile->vp->program.Base.Parameters;
+ const struct gl_program_parameter_list *params = vp->Base.Parameters;
unsigned i;
for (i = 0; i < params->NumParameters * 4; i++) {
stage_prog_data->pull_param[i] =
@@ -415,8 +414,7 @@ vec4_vs_visitor::setup_vp_regs()
vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
/* PROGRAM_STATE_VAR etc. */
- struct gl_program_parameter_list *plist =
- vs_compile->vp->program.Base.Parameters;
+ struct gl_program_parameter_list *plist = vp->Base.Parameters;
for (unsigned p = 0; p < plist->NumParameters; p++) {
unsigned components = plist->Parameters[p].Size;
@@ -486,8 +484,7 @@ vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
src_reg
vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
{
- struct gl_program_parameter_list *plist =
- vs_compile->vp->program.Base.Parameters;
+ struct gl_program_parameter_list *plist = vp->Base.Parameters;
src_reg result;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index f93062b46d0..620f652d6dc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -36,7 +36,7 @@ vec4_vs_visitor::emit_prolog()
for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
- uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
+ uint8_t wa_flags = key->gl_attrib_wa_flags[i];
dst_reg reg(ATTR, i);
dst_reg reg_d = reg;
reg_d.type = BRW_REGISTER_TYPE_D;
@@ -143,7 +143,8 @@ vec4_vs_visitor::emit_prolog()
dst_reg *
-vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
+vec4_vs_visitor::make_reg_for_system_value(int location,
+ const glsl_type *type)
{
/* VertexID is stored by the VF as the last vertex element, but
* we don't represent it with a flag in inputs_read, so we call
@@ -151,7 +152,7 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
*/
dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
- switch (ir->data.location) {
+ switch (location) {
case SYSTEM_VALUE_BASE_VERTEX:
reg->writemask = WRITEMASK_X;
vs_prog_data->uses_vertexid = true;
@@ -212,19 +213,22 @@ vec4_vs_visitor::emit_thread_end()
vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
- struct brw_vs_compile *vs_compile,
+ void *log_data,
+ const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
+ struct gl_vertex_program *vp,
struct gl_shader_program *prog,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula)
- : vec4_visitor(compiler, &vs_compile->base, &vs_compile->vp->program.Base,
- &vs_compile->key.base, &vs_prog_data->base, prog,
+ : vec4_visitor(compiler, log_data,
+ &vp->Base, &key->base, &vs_prog_data->base, prog,
MESA_SHADER_VERTEX,
mem_ctx, false /* no_spills */,
shader_time_index),
- vs_compile(vs_compile),
+ key(key),
vs_prog_data(vs_prog_data),
+ vp(vp),
use_legacy_snorm_formula(use_legacy_snorm_formula)
{
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 6e9848fb1e9..c53cb49b612 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -94,7 +94,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
{
GLuint program_size;
const GLuint *program;
- struct brw_vs_compile c;
struct brw_vs_prog_data prog_data;
struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
void *mem_ctx;
@@ -104,8 +103,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
if (prog)
vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
- memset(&c, 0, sizeof(c));
- memcpy(&c.key, key, sizeof(*key));
memset(&prog_data, 0, sizeof(prog_data));
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
@@ -114,8 +111,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
mem_ctx = ralloc_context(NULL);
- c.vp = vp;
-
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
@@ -126,26 +121,30 @@ brw_codegen_vs_prog(struct brw_context *brw,
* case being a float value that gets blown up to a vec4, so be
* conservative here.
*/
- param_count = vs->num_uniform_components * 4;
-
+ param_count = vs->num_uniform_components * 4 +
+ vs->NumImages * BRW_IMAGE_PARAM_SIZE;
+ stage_prog_data->nr_image_params = vs->NumImages;
} else {
param_count = vp->program.Base.Parameters->NumParameters * 4;
}
/* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
* planes as uniforms.
*/
- param_count += c.key.base.nr_userclip_plane_consts * 4;
+ param_count += key->base.nr_userclip_plane_consts * 4;
stage_prog_data->param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
stage_prog_data->pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
+ stage_prog_data->image_param =
+ rzalloc_array(NULL, struct brw_image_param,
+ stage_prog_data->nr_image_params);
stage_prog_data->nr_params = param_count;
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
- if (c.key.copy_edgeflag) {
+ if (key->copy_edgeflag) {
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
}
@@ -158,7 +157,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
* coords, which would be a pain to handle.
*/
for (i = 0; i < 8; i++) {
- if (c.key.point_coord_replace & (1 << i))
+ if (key->point_coord_replace & (1 << i))
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
}
@@ -173,7 +172,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
* distance varying slots whenever clipping is enabled, even if the vertex
* shader doesn't write to gl_ClipDistance.
*/
- if (c.key.base.userclip_active) {
+ if (key->base.userclip_active) {
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
}
@@ -182,34 +181,28 @@ brw_codegen_vs_prog(struct brw_context *brw,
&prog_data.base.vue_map, outputs_written);
if (0) {
- _mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG,
+ _mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
true);
}
/* Emit GEN4 code.
*/
- program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size);
+ program = brw_vs_emit(brw, mem_ctx, key, &prog_data,
+ &vp->program, prog, &program_size);
if (program == NULL) {
ralloc_free(mem_ctx);
return false;
}
/* Scratch space is used for register spilling */
- if (c.base.last_scratch) {
- perf_debug("Vertex shader triggered register spilling. "
- "Try reducing the number of live vec4 values to "
- "improve performance.\n");
-
- prog_data.base.base.total_scratch
- = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
-
+ if (prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &brw->vs.base.scratch_bo,
prog_data.base.base.total_scratch *
brw->max_vs_threads);
}
brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
- &c.key, sizeof(c.key),
+ key, sizeof(struct brw_vs_prog_key),
program, program_size,
&prog_data, sizeof(prog_data),
&brw->vs.base.prog_offset, &brw->vs.prog_data);
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 61f9b006a58..1d9bee11c56 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -50,22 +50,16 @@
#define BRW_ATTRIB_WA_SIGN 32 /* interpret as signed in shader */
#define BRW_ATTRIB_WA_SCALE 64 /* interpret as scaled in shader */
-struct brw_vs_compile {
- struct brw_vec4_compile base;
- struct brw_vs_prog_key key;
-
- struct brw_vertex_program *vp;
-};
-
#ifdef __cplusplus
extern "C" {
#endif
const unsigned *brw_vs_emit(struct brw_context *brw,
- struct gl_shader_program *prog,
- struct brw_vs_compile *c,
- struct brw_vs_prog_data *prog_data,
void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ struct gl_vertex_program *vp,
+ struct gl_shader_program *shader_prog,
unsigned *program_size);
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
@@ -91,15 +85,18 @@ class vec4_vs_visitor : public vec4_visitor
{
public:
vec4_vs_visitor(const struct brw_compiler *compiler,
- struct brw_vs_compile *vs_compile,
+ void *log_data,
+ const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
+ struct gl_vertex_program *vp,
struct gl_shader_program *prog,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula);
protected:
- virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
virtual void setup_payload();
virtual void emit_prolog();
virtual void emit_program_code();
@@ -113,8 +110,9 @@ private:
dst_reg get_vp_dst_reg(const prog_dst_register &dst);
src_reg get_vp_src_reg(const prog_src_register &src);
- struct brw_vs_compile * const vs_compile;
+ const struct brw_vs_prog_key *const key;
struct brw_vs_prog_data * const vs_prog_data;
+ struct gl_vertex_program *const vp;
src_reg *vp_temp_regs;
src_reg vp_addr_reg;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index b2f91bd412b..72e37d4b467 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -191,3 +191,28 @@ const struct brw_tracked_state brw_vs_abo_surfaces = {
},
.emit = brw_upload_vs_abo_surfaces,
};
+
+static void
+brw_upload_vs_image_surfaces(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_VERTEX_PROGRAM */
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ if (prog) {
+ /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ &brw->vs.base, &brw->vs.prog_data->base.base);
+ }
+}
+
+const struct brw_tracked_state brw_vs_image_surfaces = {
+ .dirty = {
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_IMAGE_UNITS |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_VS_PROG_DATA,
+ },
+ .emit = brw_upload_vs_image_surfaces,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 4619ce1080d..41266f57560 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -1,34 +1,28 @@
/*
- Copyright (C) Intel Corp. 2006. All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
- * Authors:
- * Keith Whitwell <[email protected]>
- */
-
+ * Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ * Intel funded Tungsten Graphics to
+ * develop this 3D driver.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_state.h"
@@ -181,9 +175,12 @@ brw_codegen_wm_prog(struct brw_context *brw,
* so the shader definitely kills pixels.
*/
prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func;
-
+ prog_data.uses_omask =
+ fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
+ prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (!prog)
prog_data.base.use_alt_mode = true;
@@ -194,7 +191,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
*/
int param_count;
if (fs) {
- param_count = fs->num_uniform_components;
+ param_count = fs->num_uniform_components +
+ fs->NumImages * BRW_IMAGE_PARAM_SIZE;
+ prog_data.base.nr_image_params = fs->NumImages;
} else {
param_count = fp->program.Base.Parameters->NumParameters * 4;
}
@@ -204,6 +203,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
rzalloc_array(NULL, const gl_constant_value *, param_count);
prog_data.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param,
+ prog_data.base.nr_image_params);
prog_data.base.nr_params = param_count;
prog_data.barycentric_interp_modes =
@@ -349,13 +351,15 @@ static uint8_t
gen6_gather_workaround(GLenum internalformat)
{
switch (internalformat) {
- case GL_R8I: return WA_SIGN | WA_8BIT;
- case GL_R8UI: return WA_8BIT;
- case GL_R16I: return WA_SIGN | WA_16BIT;
- case GL_R16UI: return WA_16BIT;
- /* note that even though GL_R32I and GL_R32UI have format overrides
- * in the surface state, there is no shader w/a required */
- default: return 0;
+ case GL_R8I: return WA_SIGN | WA_8BIT;
+ case GL_R8UI: return WA_8BIT;
+ case GL_R16I: return WA_SIGN | WA_16BIT;
+ case GL_R16UI: return WA_16BIT;
+ default:
+ /* Note that even though GL_R32I and GL_R32UI have format overrides in
+ * the surface state, there is no shader w/a required.
+ */
+ return 0;
}
}
@@ -402,8 +406,9 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx,
key->gl_clamp_mask[2] |= 1 << s;
}
- /* gather4's channel select for green from RG32F is broken;
- * requires a shader w/a on IVB; fixable with just SCS on HSW. */
+ /* gather4's channel select for green from RG32F is broken; requires
+ * a shader w/a on IVB; fixable with just SCS on HSW.
+ */
if (brw->gen == 7 && !brw->is_haswell && prog->UsesGather) {
if (img->InternalFormat == GL_RG32F)
key->gather_channel_quirk_mask |= 1 << s;
@@ -452,13 +457,13 @@ brw_wm_state_dirty (struct brw_context *brw)
BRW_NEW_VUE_MAP_GEOM_OUT);
}
-static void brw_wm_populate_key( struct brw_context *brw,
- struct brw_wm_prog_key *key )
+static void
+brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
{
struct gl_context *ctx = &brw->ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
- (struct brw_fragment_program *)brw->fragment_program;
+ (struct brw_fragment_program *) brw->fragment_program;
const struct gl_program *prog = (struct gl_program *) brw->fragment_program;
GLuint lookup = 0;
GLuint line_aa;
@@ -604,7 +609,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
* like GL requires. Fix that by building the alpha test into the
* shader, and we'll skip enabling the fixed function alpha test.
*/
- if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
+ if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
+ ctx->Color.AlphaEnabled) {
key->alpha_test_func = ctx->Color.AlphaFunc;
key->alpha_test_ref = ctx->Color.AlphaRef;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 72aad96bb6a..f13a97ce2b0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1024,6 +1024,257 @@ const struct brw_tracked_state brw_cs_abo_surfaces = {
.emit = brw_upload_cs_abo_surfaces,
};
+static void
+brw_upload_cs_image_surfaces(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* _NEW_PROGRAM */
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (prog) {
+ /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ &brw->cs.base, &brw->cs.prog_data->base);
+ }
+}
+
+const struct brw_tracked_state brw_cs_image_surfaces = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_CS_PROG_DATA |
+ BRW_NEW_IMAGE_UNITS
+ },
+ .emit = brw_upload_cs_image_surfaces,
+};
+
+static uint32_t
+get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
+{
+ if (access == GL_WRITE_ONLY) {
+ return brw_format_for_mesa_format(format);
+ } else {
+ /* Typed surface reads support a very limited subset of the shader
+ * image formats. Translate it into the closest format the
+ * hardware supports.
+ */
+ if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
+ (_mesa_get_format_bytes(format) >= 8 &&
+ (brw->gen == 7 && !brw->is_haswell)))
+ return BRW_SURFACEFORMAT_RAW;
+ else
+ return brw_format_for_mesa_format(
+ brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
+ }
+}
+
+static void
+update_default_image_param(struct brw_context *brw,
+ struct gl_image_unit *u,
+ unsigned surface_idx,
+ struct brw_image_param *param)
+{
+ memset(param, 0, sizeof(*param));
+ param->surface_idx = surface_idx;
+ /* Set the swizzling shifts to all-ones to effectively disable swizzling --
+ * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
+ * detailed explanation of these parameters.
+ */
+ param->swizzling[0] = 0xff;
+ param->swizzling[1] = 0xff;
+}
+
+static void
+update_buffer_image_param(struct brw_context *brw,
+ struct gl_image_unit *u,
+ unsigned surface_idx,
+ struct brw_image_param *param)
+{
+ struct gl_buffer_object *obj = u->TexObj->BufferObject;
+
+ update_default_image_param(brw, u, surface_idx, param);
+
+ param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
+ param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
+}
+
+static void
+update_texture_image_param(struct brw_context *brw,
+ struct gl_image_unit *u,
+ unsigned surface_idx,
+ struct brw_image_param *param)
+{
+ struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
+
+ update_default_image_param(brw, u, surface_idx, param);
+
+ param->size[0] = minify(mt->logical_width0, u->Level);
+ param->size[1] = minify(mt->logical_height0, u->Level);
+ param->size[2] = (!u->Layered ? 1 :
+ u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
+ u->TexObj->Target == GL_TEXTURE_3D ?
+ minify(mt->logical_depth0, u->Level) :
+ mt->logical_depth0);
+
+ intel_miptree_get_image_offset(mt, u->Level, u->Layer,
+ &param->offset[0],
+ &param->offset[1]);
+
+ param->stride[0] = mt->cpp;
+ param->stride[1] = mt->pitch / mt->cpp;
+ param->stride[2] =
+ brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
+ param->stride[3] =
+ brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
+
+ if (mt->tiling == I915_TILING_X) {
+ /* An X tile is a rectangular block of 512x8 bytes. */
+ param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
+ param->tiling[1] = _mesa_logbase2(8);
+
+ if (brw->has_swizzling) {
+ /* Right shifts required to swizzle bits 9 and 10 of the memory
+ * address with bit 6.
+ */
+ param->swizzling[0] = 3;
+ param->swizzling[1] = 4;
+ }
+ } else if (mt->tiling == I915_TILING_Y) {
+ /* The layout of a Y-tiled surface in memory isn't really fundamentally
+ * different to the layout of an X-tiled surface, we simply pretend that
+ * the surface is broken up in a number of smaller 16Bx32 tiles, each
+ * one arranged in X-major order just like is the case for X-tiling.
+ */
+ param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
+ param->tiling[1] = _mesa_logbase2(32);
+
+ if (brw->has_swizzling) {
+ /* Right shift required to swizzle bit 9 of the memory address with
+ * bit 6.
+ */
+ param->swizzling[0] = 3;
+ }
+ }
+
+ /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
+ * address calculation algorithm (emit_address_calculation() in
+ * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
+ * modulus equal to the LOD.
+ */
+ param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
+ 0);
+}
+
+static void
+update_image_surface(struct brw_context *brw,
+ struct gl_image_unit *u,
+ GLenum access,
+ unsigned surface_idx,
+ uint32_t *surf_offset,
+ struct brw_image_param *param)
+{
+ if (u->_Valid) {
+ struct gl_texture_object *obj = u->TexObj;
+ const unsigned format = get_image_format(brw, u->_ActualFormat, access);
+
+ if (obj->Target == GL_TEXTURE_BUFFER) {
+ struct intel_buffer_object *intel_obj =
+ intel_buffer_object(obj->BufferObject);
+ const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
+ _mesa_get_format_bytes(u->_ActualFormat));
+
+ brw->vtbl.emit_buffer_surface_state(
+ brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
+ format, intel_obj->Base.Size / texel_size, texel_size,
+ access != GL_READ_ONLY);
+
+ update_buffer_image_param(brw, u, surface_idx, param);
+
+ } else {
+ struct intel_texture_object *intel_obj = intel_texture_object(obj);
+ struct intel_mipmap_tree *mt = intel_obj->mt;
+
+ if (format == BRW_SURFACEFORMAT_RAW) {
+ brw->vtbl.emit_buffer_surface_state(
+ brw, surf_offset, mt->bo, mt->offset,
+ format, mt->bo->size - mt->offset, 1 /* pitch */,
+ access != GL_READ_ONLY);
+
+ } else {
+ const unsigned min_layer = obj->MinLayer + u->Layer;
+ const unsigned min_level = obj->MinLevel + u->Level;
+ const unsigned num_layers = (!u->Layered ? 1 :
+ obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
+ mt->logical_depth0);
+ const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
+ obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
+ GL_TEXTURE_2D_ARRAY : obj->Target);
+
+ brw->vtbl.emit_texture_surface_state(
+ brw, mt, target,
+ min_layer, min_layer + num_layers,
+ min_level, min_level + 1,
+ format, SWIZZLE_XYZW,
+ surf_offset, access != GL_READ_ONLY, false);
+ }
+
+ update_texture_image_param(brw, u, surface_idx, param);
+ }
+
+ } else {
+ brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
+ update_default_image_param(brw, u, surface_idx, param);
+ }
+}
+
+void
+brw_upload_image_surfaces(struct brw_context *brw,
+ struct gl_shader *shader,
+ struct brw_stage_state *stage_state,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ if (shader && shader->NumImages) {
+ for (unsigned i = 0; i < shader->NumImages; i++) {
+ struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
+ const unsigned surf_idx = prog_data->binding_table.image_start + i;
+
+ update_image_surface(brw, u, shader->ImageAccess[i],
+ surf_idx,
+ &stage_state->surf_offset[surf_idx],
+ &prog_data->image_param[i]);
+ }
+
+ brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ }
+}
+
+static void
+brw_upload_wm_image_surfaces(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
+
+ if (prog) {
+ /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ &brw->wm.base, &brw->wm.prog_data->base);
+ }
+}
+
+const struct brw_tracked_state brw_wm_image_surfaces = {
+ .dirty = {
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_FS_PROG_DATA |
+ BRW_NEW_IMAGE_UNITS
+ },
+ .emit = brw_upload_wm_image_surfaces,
+};
+
void
gen4_init_vtable_surface_functions(struct brw_context *brw)
{
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index b6a3d78d849..54c4a6dfdd8 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -821,7 +821,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
/* 3DSTATE_DEPTH_BUFFER */
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
/* 3DSTATE_DEPTH_BUFFER dw0 */
@@ -896,7 +896,7 @@ static void
gen6_blorp_emit_depth_disable(struct brw_context *brw,
const brw_blorp_params *params)
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -1021,7 +1021,7 @@ gen6_blorp_exec(struct brw_context *brw,
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
/* Emit workaround flushes when we switch from drawing to blorping. */
- intel_emit_post_sync_nonzero_flush(brw);
+ brw_emit_post_sync_nonzero_flush(brw);
gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
gen6_emit_3dstate_sample_mask(brw,
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 2bfa271b527..3bab8f46ae8 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -51,7 +51,7 @@ gen6_upload_blend_state(struct brw_context *brw)
* with render target 0, which will reference BLEND_STATE[0] for
* alpha test enable.
*/
- if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
+ if (nr_draw_buffers == 0)
nr_draw_buffers = 1;
size = sizeof(*blend) * nr_draw_buffers;
@@ -97,8 +97,8 @@ gen6_upload_blend_state(struct brw_context *brw)
rb_type != GL_UNSIGNED_NORMALIZED &&
rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
"renderbuffer\n",
- _mesa_lookup_enum_by_nr(ctx->Color.LogicOp),
- _mesa_lookup_enum_by_nr(rb_type));
+ _mesa_enum_to_string(ctx->Color.LogicOp),
+ _mesa_enum_to_string(rb_type));
if (rb_type == GL_UNSIGNED_NORMALIZED) {
blend[b].blend1.logic_op_enable = 1;
blend[b].blend1.logic_op_func =
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 1df0bd47571..febd4781100 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -65,7 +65,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
*/
bool enable_hiz_ss = hiz || separate_stencil;
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
if (!irb)
@@ -73,7 +73,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
rb = (struct gl_renderbuffer*) irb;
if (rb) {
- depth = MAX2(rb->Depth, 1);
+ depth = MAX2(irb->layer_count, 1);
if (rb->TexImage)
gl_target = rb->TexImage->TexObject->Target;
}
@@ -89,6 +89,10 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
surftype = BRW_SURFACE_2D;
depth *= 6;
break;
+ case GL_TEXTURE_3D:
+ assert(mt);
+ depth = MAX2(mt->logical_depth0, 1);
+ /* fallthrough */
default:
surftype = translate_tex_target(gl_target);
break;
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
index 782687aac57..68e443d38a5 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -147,7 +147,12 @@ gen6_gs_visitor::emit_prolog()
}
void
-gen6_gs_visitor::visit(ir_emit_vertex *)
+gen6_gs_visitor::visit(ir_emit_vertex *ir)
+{
+ gs_emit_vertex(ir->stream_id());
+}
+void
+gen6_gs_visitor::gs_emit_vertex(int stream_id)
{
this->current_annotation = "gen6 emit vertex";
/* Honor max_vertex layout indication in geometry shader by ignoring any
@@ -224,6 +229,12 @@ gen6_gs_visitor::visit(ir_emit_vertex *)
void
gen6_gs_visitor::visit(ir_end_primitive *)
{
+ gs_end_primitive();
+}
+
+void
+gen6_gs_visitor::gs_end_primitive()
+{
this->current_annotation = "gen6 end primitive";
/* Calling EndPrimitive() is optional for point output. In this case we set
* the PrimEnd flag when we process EmitVertex().
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
index 27254ebb727..4cf94893261 100644
--- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -36,12 +36,14 @@ class gen6_gs_visitor : public vec4_gs_visitor
{
public:
gen6_gs_visitor(const struct brw_compiler *comp,
+ void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
- vec4_gs_visitor(comp, c, prog, mem_ctx, no_spills, shader_time_index) {}
+ vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills,
+ shader_time_index) {}
protected:
virtual void assign_binding_table_offsets();
@@ -49,6 +51,8 @@ protected:
virtual void emit_thread_end();
virtual void visit(ir_emit_vertex *);
virtual void visit(ir_end_primitive *);
+ virtual void gs_emit_vertex(int stream_id);
+ virtual void gs_end_primitive();
virtual void emit_urb_write_header(int mrf);
virtual void emit_urb_write_opcode(bool complete,
int base_mrf,
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 36734f598fe..8444c0c9bae 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -143,12 +143,11 @@ gen6_emit_3dstate_multisample(struct brw_context *brw,
ADVANCE_BATCH();
}
-
unsigned
gen6_determine_sample_mask(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- float coverage = 1.0;
+ float coverage = 1.0f;
float coverage_invert = false;
unsigned sample_mask = ~0u;
@@ -166,7 +165,7 @@ gen6_determine_sample_mask(struct brw_context *brw)
}
if (num_samples > 1) {
- int coverage_int = (int) (num_samples * coverage + 0.5);
+ int coverage_int = (int) (num_samples * coverage + 0.5f);
uint32_t coverage_bits = (1 << coverage_int) - 1;
if (coverage_invert)
coverage_bits ^= (1 << num_samples) - 1;
@@ -176,7 +175,6 @@ gen6_determine_sample_mask(struct brw_context *brw)
}
}
-
/**
* 3DSTATE_SAMPLE_MASK
*/
@@ -189,15 +187,14 @@ gen6_emit_3dstate_sample_mask(struct brw_context *brw, unsigned mask)
ADVANCE_BATCH();
}
-
-static void upload_multisample_state(struct brw_context *brw)
+static void
+upload_multisample_state(struct brw_context *brw)
{
/* BRW_NEW_NUM_SAMPLES */
gen6_emit_3dstate_multisample(brw, brw->num_samples);
gen6_emit_3dstate_sample_mask(brw, gen6_determine_sample_mask(brw));
}
-
const struct brw_tracked_state gen6_multisample_state = {
.dirty = {
.mesa = _NEW_MULTISAMPLE,
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
index ba5c944fb3d..9f4a5db3592 100644
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -86,7 +86,7 @@ static void
write_primitives_generated(struct brw_context *brw,
drm_intel_bo *query_bo, int stream, int idx)
{
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (brw->gen >= 7 && stream > 0) {
brw_store_register_mem64(brw, query_bo,
@@ -100,7 +100,7 @@ static void
write_xfb_primitives_written(struct brw_context *brw,
drm_intel_bo *bo, int stream, int idx)
{
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
if (brw->gen >= 7) {
brw_store_register_mem64(brw, bo, GEN7_SO_NUM_PRIMS_WRITTEN(stream), idx);
@@ -157,7 +157,7 @@ emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo,
/* Emit a flush to make sure various parts of the pipeline are complete and
* we get an accurate value
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
brw_store_register_mem64(brw, bo, reg, idx);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index b00517ed81e..4068f2844a2 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -383,7 +383,7 @@ upload_sf_state(struct brw_context *brw)
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
/* Clamp to the hardware limits and convert to fixed point */
- dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+ dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
/*
* Window coordinates in an FBO are inverted, which means point
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index be80d7bdfc5..3899ce9451f 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -292,5 +292,5 @@ brw_end_transform_feedback(struct gl_context *ctx,
* simplicity, just do a full flush.
*/
struct brw_context *brw = brw_context(ctx);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c b/src/mesa/drivers/dri/i965/gen6_surface_state.c
index 03e913a0a76..39de62f2304 100644
--- a/src/mesa/drivers/dri/i965/gen6_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -88,7 +88,8 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
break;
}
- const int min_array_element = layered ? 0 : irb->mt_layer;
+ const int min_array_element = irb->mt_layer;
+ assert(!layered || irb->mt_layer == 0);
surf[0] = SET_FIELD(surftype, BRW_SURFACE_TYPE) |
SET_FIELD(format, BRW_SURFACE_FORMAT);
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index 107a4f24fa6..c7311fd0b03 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -120,7 +120,7 @@ gen6_upload_urb( struct brw_context *brw )
* a workaround.
*/
if (brw->urb.gs_present && !gs_present)
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
brw->urb.gs_present = gs_present;
}
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index 7c8d8849f4e..11b9a360ced 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -101,7 +101,7 @@ gen6_upload_sf_vp(struct brw_context *brw)
}
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
- double scale[3], translate[3];
+ float scale[3], translate[3];
/* _NEW_VIEWPORT */
_mesa_get_viewport_xform(ctx, i, scale, translate);
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 2bdc82bc895..9822dc1fe79 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -645,7 +645,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
/* 3DSTATE_DEPTH_BUFFER */
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -696,7 +696,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
static void
gen7_blorp_emit_depth_disable(struct brw_context *brw)
{
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
@@ -794,6 +794,8 @@ gen7_blorp_exec(struct brw_context *brw,
}
depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset);
+ if (brw->use_resource_streamer)
+ gen7_disable_hw_binding_tables(brw);
if (params->use_wm_prog) {
uint32_t wm_surf_offset_renderbuffer;
uint32_t wm_surf_offset_texture = 0;
diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c b/src/mesa/drivers/dri/i965/gen7_disable.c
index 2c43cd77f07..bb509696d72 100644
--- a/src/mesa/drivers/dri/i965/gen7_disable.c
+++ b/src/mesa/drivers/dri/i965/gen7_disable.c
@@ -52,7 +52,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
/* Disable the TE */
@@ -85,7 +85,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 8d6d3fe1d34..497ecec8e45 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,7 +59,9 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (brw->is_haswell && prog_data->base.nr_image_params ?
+ HSW_GS_UAV_ACCESS_ENABLE : 0));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index f4f665219d6..a14d4a0c50d 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -57,7 +57,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
return;
}
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
if (!irb)
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 4fa46a8eb97..698b3d491bc 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -220,7 +220,7 @@ upload_sf_state(struct brw_context *brw)
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
/* Clamp to the hardware limits and convert to fixed point */
- dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+ dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
/* _NEW_LIGHT */
if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c
index aec4f44bb73..41573a80a52 100644
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -365,7 +365,7 @@ gen7_save_primitives_written_counters(struct brw_context *brw,
}
/* Flush any drawing so that the counters have the right values. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Emit MI_STORE_REGISTER_MEM commands to write the values. */
for (int i = 0; i < streams; i++) {
@@ -502,7 +502,7 @@ gen7_pause_transform_feedback(struct gl_context *ctx,
(struct brw_transform_feedback_object *) obj;
/* Flush any drawing so that the counters have the right values. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the SOL buffer offset register values. */
if (brw->gen < 8) {
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index d371c193577..69162171c4e 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -228,7 +228,7 @@ gen7_upload_urb(struct brw_context *brw)
remaining_space = total_wants;
if (remaining_space > 0) {
unsigned vs_additional = (unsigned)
- round(vs_wants * (((double) remaining_space) / total_wants));
+ roundf(vs_wants * (((float) remaining_space) / total_wants));
vs_chunks += vs_additional;
remaining_space -= vs_additional;
gs_chunks += remaining_space;
diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c
index b655205ec35..c75dc9964bf 100644
--- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c
@@ -53,7 +53,7 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw)
}
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
- double scale[3], translate[3];
+ float scale[3], translate[3];
_mesa_get_viewport_xform(ctx, i, scale, translate);
/* According to the "Vertex X,Y Clamping and Quantization" section of
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 4b17d06fa83..b7e48585482 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -62,6 +62,7 @@ gen7_upload_constant_state(struct brw_context *brw,
OUT_BATCH(active ? stage_state->push_const_size : 0);
OUT_BATCH(0);
}
+
/* Pointer to the constant buffer. Covered by the set of state flags
* from gen6_prepare_wm_contants
*/
@@ -95,15 +96,14 @@ gen7_upload_constant_state(struct brw_context *brw,
ADVANCE_BATCH();
- /* On SKL+ the new constants don't take effect until the next corresponding
- * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
- * that is sent
- */
+ /* On SKL+ the new constants don't take effect until the next corresponding
+ * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
+ * that is sent
+ */
if (brw->gen >= 9)
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
}
-
static void
upload_vs_state(struct brw_context *brw)
{
@@ -111,6 +111,7 @@ upload_vs_state(struct brw_context *brw)
uint32_t floating_point_mode = 0;
const int max_threads_shift = brw->is_haswell ?
HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+ const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
if (!brw->is_haswell && !brw->is_baytrail)
gen7_emit_vs_workaround_flush(brw);
@@ -125,19 +126,21 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (brw->is_haswell && prog_data->base.nr_image_params ?
+ HSW_VS_UAV_ACCESS_ENABLE : 0));
- if (brw->vs.prog_data->base.base.total_scratch) {
+ if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
+ ffs(prog_data->base.total_scratch) - 11);
} else {
OUT_BATCH(0);
}
- OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg <<
+ OUT_BATCH((prog_data->base.dispatch_grf_start_reg <<
GEN6_VS_DISPATCH_START_GRF_SHIFT) |
- (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+ (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index ea11ae845e3..fd6dab5be8b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -83,6 +83,7 @@ upload_wm_state(struct brw_context *brw)
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+ prog_data->base.nr_image_params ||
dw1 & GEN7_WM_KILL_ENABLE) {
dw1 |= GEN7_WM_DISPATCH_ENABLE;
}
@@ -106,6 +107,18 @@ upload_wm_state(struct brw_context *brw)
dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
}
+ /* BRW_NEW_FS_PROG_DATA */
+ if (prog_data->early_fragment_tests)
+ dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
+ else if (prog_data->base.nr_image_params)
+ dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
+
+ /* _NEW_BUFFERS | _NEW_COLOR */
+ if (brw->is_haswell &&
+ !(brw_color_buffer_write_enabled(brw) || writes_depth) &&
+ prog_data->base.nr_image_params)
+ dw2 |= HSW_WM_UAV_ONLY;
+
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
OUT_BATCH(dw1);
@@ -127,7 +140,7 @@ const struct brw_tracked_state gen7_wm_state = {
.emit = upload_wm_state,
};
-void
+static void
gen7_upload_ps_state(struct brw_context *brw,
const struct gl_fragment_program *fp,
const struct brw_stage_state *stage_state,
@@ -208,6 +221,9 @@ gen7_upload_ps_state(struct brw_context *brw,
_mesa_get_min_invocations_per_fragment(ctx, fp, false);
assert(min_inv_per_frag >= 1);
+ if (brw->is_haswell && prog_data->base.nr_image_params)
+ dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
+
if (prog_data->prog_offset_16 || prog_data->no_8) {
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
if (!prog_data->no_8 && min_inv_per_frag == 1) {
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 12ac97a5d14..93100a0708f 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -41,7 +41,6 @@ emit_depth_packets(struct brw_context *brw,
bool depth_writable,
struct intel_mipmap_tree *stencil_mt,
bool stencil_writable,
- uint32_t stencil_offset,
bool hiz,
uint32_t width,
uint32_t height,
@@ -57,7 +56,7 @@ emit_depth_packets(struct brw_context *brw,
return;
}
- intel_emit_depth_stall_flushes(brw);
+ brw_emit_depth_stall_flushes(brw);
/* _NEW_BUFFERS, _NEW_DEPTH, _NEW_STENCIL */
BEGIN_BATCH(8);
@@ -100,7 +99,7 @@ emit_depth_packets(struct brw_context *brw,
}
if (stencil_mt == NULL) {
- BEGIN_BATCH(5);
+ BEGIN_BATCH(5);
OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
@@ -127,8 +126,7 @@ emit_depth_packets(struct brw_context *brw,
OUT_BATCH(HSW_STENCIL_ENABLED | mocs_wb << 22 |
(2 * stencil_mt->pitch - 1));
OUT_RELOC64(stencil_mt->bo,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- stencil_offset);
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
OUT_BATCH(stencil_mt ? stencil_mt->qpitch >> 2 : 0);
ADVANCE_BATCH();
}
@@ -220,7 +218,6 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
emit_depth_packets(brw, depth_mt, brw_depthbuffer_format(brw), surftype,
ctx->Depth.Mask != 0,
stencil_mt, ctx->Stencil._WriteEnabled,
- brw->depthstencil.stencil_offset,
hiz, width, height, depth, lod, min_array_element);
}
@@ -253,10 +250,10 @@ pma_fix_enable(const struct brw_context *brw)
*/
const bool hiz_enabled = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
- /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2).
- * We always leave this set to EDSC_NORMAL (0).
+ /* BRW_NEW_FS_PROG_DATA:
+ * 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2).
*/
- const bool edsc_not_preps = true;
+ const bool edsc_not_preps = !brw->wm.prog_data->early_fragment_tests;
/* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */
const bool pixel_shader_valid = true;
@@ -439,7 +436,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
brw_depth_format(brw, mt->format),
BRW_SURFACE_2D,
true, /* depth writes */
- NULL, false, 0, /* no stencil for now */
+ NULL, false, /* no stencil for now */
true, /* hiz */
surface_width,
surface_height,
@@ -499,7 +496,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
*/
brw_emit_pipe_control_write(brw,
PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->batch.workaround_bo, 0, 0, 0);
+ brw->workaround_bo, 0, 0, 0);
/* Emit 3DSTATE_WM_HZ_OP again to disable the state overrides. */
BEGIN_BATCH(5);
diff --git a/src/mesa/drivers/dri/i965/gen8_disable.c b/src/mesa/drivers/dri/i965/gen8_disable.c
index da0d4a5fe7a..32508e377c9 100644
--- a/src/mesa/drivers/dri/i965/gen8_disable.c
+++ b/src/mesa/drivers/dri/i965/gen8_disable.c
@@ -66,7 +66,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
/* Disable the TE */
@@ -101,7 +101,7 @@ disable_stages(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
- OUT_BATCH(0);
+ OUT_BATCH(brw->hw_bt_pool.next_offset);
ADVANCE_BATCH();
BEGIN_BATCH(2);
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 26a02d3b045..81bd3b21778 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -52,7 +52,9 @@ gen8_upload_gs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4)/4) <<
GEN6_GS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (prog_data->base.nr_image_params ?
+ HSW_GS_UAV_ACCESS_ENABLE : 0));
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index a88f109c691..ae18f0f162c 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -58,7 +58,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
if (prog_data->uses_omask)
dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
- if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx))
+ if (brw->gen >= 9 && prog_data->pulls_bary)
+ dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
+
+ if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+ prog_data->base.nr_image_params)
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
BEGIN_BATCH(2);
@@ -115,6 +119,12 @@ upload_wm_state(struct brw_context *brw)
dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
+ /* BRW_NEW_FS_PROG_DATA */
+ if (brw->wm.prog_data->early_fragment_tests)
+ dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
+ else if (brw->wm.prog_data->base.nr_image_params)
+ dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2));
OUT_BATCH(dw1);
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index c2b585d0001..6b655ee493e 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -169,7 +169,7 @@ upload_sf(struct brw_context *brw)
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
/* Clamp to the hardware limits and convert to fixed point */
- dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+ dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
/* _NEW_PROGRAM | _NEW_POINT */
if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index b2d1a579815..6c4d3e197a5 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -88,12 +88,12 @@ vertical_alignment(const struct brw_context *brw,
uint32_t surf_type)
{
/* On Gen9+ vertical alignment is ignored for 1D surfaces and when
- * tr_mode is not TRMODE_NONE.
+ * tr_mode is not TRMODE_NONE. Set to an arbitrary non-reserved value.
*/
if (brw->gen > 8 &&
(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
surf_type == BRW_SURFACE_1D))
- return 0;
+ return GEN8_SURFACE_VALIGN_4;
switch (mt->align_h) {
case 4:
@@ -113,12 +113,12 @@ horizontal_alignment(const struct brw_context *brw,
uint32_t surf_type)
{
/* On Gen9+ horizontal alignment is ignored when tr_mode is not
- * TRMODE_NONE.
+ * TRMODE_NONE. Set to an arbitrary non-reserved value.
*/
if (brw->gen > 8 &&
(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE ||
gen9_use_linear_1d_layout(brw, mt)))
- return 0;
+ return GEN8_SURFACE_HALIGN_4;
switch (mt->align_w) {
case 4:
@@ -401,8 +401,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
GLenum gl_target =
rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
- /* FINISHME: Use PTE MOCS on Skylake. */
- uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE;
+ const uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_PTE : BDW_MOCS_PTE;
intel_miptree_used_for_rendering(mt);
diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c
index 2d8eeb1f10f..2692ad55999 100644
--- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c
@@ -53,7 +53,7 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw)
}
for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
- double scale[3], translate[3];
+ float scale[3], translate[3];
_mesa_get_viewport_xform(ctx, i, scale, translate);
/* _NEW_VIEWPORT: Viewport Matrix Elements */
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 28f5adddf14..8b5048bee7e 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -53,7 +53,9 @@ upload_vs_state(struct brw_context *brw)
((ALIGN(stage_state->sampler_count, 4) / 4) <<
GEN6_VS_SAMPLER_COUNT_SHIFT) |
((prog_data->base.binding_table.size_bytes / 4) <<
- GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (prog_data->base.nr_image_params ?
+ HSW_VS_UAV_ACCESS_ENABLE : 0));
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index ed659ed625e..85f20a05729 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -32,6 +32,8 @@
#include "intel_buffers.h"
#include "intel_fbo.h"
#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
#include <xf86drm.h>
#include <i915_drm.h>
@@ -44,19 +46,10 @@ intel_batchbuffer_init(struct brw_context *brw)
{
intel_batchbuffer_reset(brw);
- if (brw->gen >= 6) {
- /* We can't just use brw_state_batch to get a chunk of space for
- * the gen6 workaround because it involves actually writing to
- * the buffer, and the kernel doesn't let us write to the batch.
- */
- brw->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
- "pipe_control workaround",
- 4096, 4096);
- }
-
if (!brw->has_llc) {
brw->batch.cpu_map = malloc(BATCH_SZ);
brw->batch.map = brw->batch.cpu_map;
+ brw->batch.map_next = brw->batch.cpu_map;
}
}
@@ -77,12 +70,11 @@ intel_batchbuffer_reset(struct brw_context *brw)
drm_intel_bo_map(brw->batch.bo, true);
brw->batch.map = brw->batch.bo->virtual;
}
+ brw->batch.map_next = brw->batch.map;
brw->batch.reserved_space = BATCH_RESERVED;
brw->batch.state_batch_offset = brw->batch.bo->size;
- brw->batch.used = 0;
brw->batch.needs_sol_reset = false;
- brw->batch.pipe_controls_since_last_cs_stall = 0;
/* We don't know what ring the new batch will be sent to until we see the
* first BEGIN_BATCH or BEGIN_BATCH_BLT. Mark it as unknown.
@@ -93,7 +85,7 @@ intel_batchbuffer_reset(struct brw_context *brw)
void
intel_batchbuffer_save_state(struct brw_context *brw)
{
- brw->batch.saved.used = brw->batch.used;
+ brw->batch.saved.map_next = brw->batch.map_next;
brw->batch.saved.reloc_count =
drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
}
@@ -103,8 +95,8 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw)
{
drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
- brw->batch.used = brw->batch.saved.used;
- if (brw->batch.used == 0)
+ brw->batch.map_next = brw->batch.saved.map_next;
+ if (USED_BATCH(brw->batch) == 0)
brw->batch.ring = UNKNOWN_RING;
}
@@ -114,7 +106,6 @@ intel_batchbuffer_free(struct brw_context *brw)
free(brw->batch.cpu_map);
drm_intel_bo_unreference(brw->batch.last_bo);
drm_intel_bo_unreference(brw->batch.bo);
- drm_intel_bo_unreference(brw->batch.workaround_bo);
}
static void
@@ -133,7 +124,7 @@ do_batch_dump(struct brw_context *brw)
drm_intel_decode_set_batch_pointer(decode,
batch->bo->virtual,
batch->bo->offset64,
- batch->used);
+ USED_BATCH(*batch));
} else {
fprintf(stderr,
"WARNING: failed to map batchbuffer (%s), "
@@ -142,7 +133,7 @@ do_batch_dump(struct brw_context *brw)
drm_intel_decode_set_batch_pointer(decode,
batch->map,
batch->bo->offset64,
- batch->used);
+ USED_BATCH(*batch));
}
drm_intel_decode_set_output_file(decode, stderr);
@@ -218,10 +209,32 @@ brw_finish_batch(struct brw_context *brw)
*/
brw_emit_query_end(brw);
- /* We may also need to snapshot and disable OA counters. */
- if (brw->batch.ring == RENDER_RING)
+ if (brw->batch.ring == RENDER_RING) {
+ /* We may also need to snapshot and disable OA counters. */
brw_perf_monitor_finish_batch(brw);
+ if (brw->is_haswell) {
+ /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
+ * 3DSTATE_CC_STATE_POINTERS > "Note":
+ *
+ * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
+ * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
+ *
+ * From the example in the docs, it seems to expect a regular pipe control
+ * flush here as well. We may have done it already, but meh.
+ *
+ * See also WaAvoidRCZCounterRollover.
+ */
+ brw_emit_mi_flush(brw);
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(brw->cc.state_offset | 1);
+ ADVANCE_BATCH();
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_CS_STALL);
+ }
+ }
+
/* Mark that the current program cache BO has been used by the GPU.
* It will be reallocated if we need to put new programs in for the
* next batch.
@@ -267,6 +280,11 @@ throttle(struct brw_context *brw)
}
}
+/* Drop when RS headers get pulled to libdrm */
+#ifndef I915_EXEC_RESOURCE_STREAMER
+#define I915_EXEC_RESOURCE_STREAMER (1<<15)
+#endif
+
/* TODO: Push this whole function into bufmgr.
*/
static int
@@ -278,7 +296,7 @@ do_flush_locked(struct brw_context *brw)
if (brw->has_llc) {
drm_intel_bo_unmap(batch->bo);
} else {
- ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+ ret = drm_intel_bo_subdata(batch->bo, 0, 4 * USED_BATCH(*batch), batch->map);
if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
ret = drm_intel_bo_subdata(batch->bo,
batch->state_batch_offset,
@@ -293,7 +311,8 @@ do_flush_locked(struct brw_context *brw)
if (brw->gen >= 6 && batch->ring == BLT_RING) {
flags = I915_EXEC_BLT;
} else {
- flags = I915_EXEC_RENDER;
+ flags = I915_EXEC_RENDER |
+ (brw->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0);
}
if (batch->needs_sol_reset)
flags |= I915_EXEC_GEN7_SOL_RESET;
@@ -303,11 +322,11 @@ do_flush_locked(struct brw_context *brw)
brw_annotate_aub(brw);
if (brw->hw_ctx == NULL || batch->ring != RENDER_RING) {
- ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
- flags);
+ ret = drm_intel_bo_mrb_exec(batch->bo, 4 * USED_BATCH(*batch),
+ NULL, 0, 0, flags);
} else {
ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx,
- 4 * batch->used, flags);
+ 4 * USED_BATCH(*batch), flags);
}
}
@@ -331,7 +350,7 @@ _intel_batchbuffer_flush(struct brw_context *brw,
{
int ret;
- if (brw->batch.used == 0)
+ if (USED_BATCH(brw->batch) == 0)
return 0;
if (brw->throttle_batch[0] == NULL) {
@@ -340,7 +359,7 @@ _intel_batchbuffer_flush(struct brw_context *brw,
}
if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
- int bytes_for_commands = 4 * brw->batch.used;
+ int bytes_for_commands = 4 * USED_BATCH(brw->batch);
int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset;
int total_bytes = bytes_for_commands + bytes_for_state;
fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + "
@@ -356,7 +375,7 @@ _intel_batchbuffer_flush(struct brw_context *brw,
/* Mark the end of the buffer. */
intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
- if (brw->batch.used & 1) {
+ if (USED_BATCH(brw->batch) & 1) {
/* Round batchbuffer usage to 2 DWORDs. */
intel_batchbuffer_emit_dword(brw, MI_NOOP);
}
@@ -373,6 +392,9 @@ _intel_batchbuffer_flush(struct brw_context *brw,
drm_intel_bo_wait_rendering(brw->batch.bo);
}
+ if (brw->use_resource_streamer)
+ gen7_reset_hw_bt_pool_offsets(brw);
+
/* Start a new batch buffer. */
brw_new_batch(brw);
@@ -382,15 +404,15 @@ _intel_batchbuffer_flush(struct brw_context *brw,
/* This is the only way buffers get added to the validate list.
*/
-bool
-intel_batchbuffer_emit_reloc(struct brw_context *brw,
- drm_intel_bo *buffer,
- uint32_t read_domains, uint32_t write_domain,
- uint32_t delta)
+uint32_t
+intel_batchbuffer_reloc(struct brw_context *brw,
+ drm_intel_bo *buffer, uint32_t offset,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t delta)
{
int ret;
- ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+ ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
buffer, delta,
read_domains, write_domain);
assert(ret == 0);
@@ -400,18 +422,16 @@ intel_batchbuffer_emit_reloc(struct brw_context *brw,
* case the buffer doesn't move and we can short-circuit the relocation
* processing in the kernel
*/
- intel_batchbuffer_emit_dword(brw, buffer->offset64 + delta);
-
- return true;
+ return buffer->offset64 + delta;
}
-bool
-intel_batchbuffer_emit_reloc64(struct brw_context *brw,
- drm_intel_bo *buffer,
- uint32_t read_domains, uint32_t write_domain,
- uint32_t delta)
+uint64_t
+intel_batchbuffer_reloc64(struct brw_context *brw,
+ drm_intel_bo *buffer, uint32_t offset,
+ uint32_t read_domains, uint32_t write_domain,
+ uint32_t delta)
{
- int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+ int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset,
buffer, delta,
read_domains, write_domain);
assert(ret == 0);
@@ -421,11 +441,7 @@ intel_batchbuffer_emit_reloc64(struct brw_context *brw,
* case the buffer doesn't move and we can short-circuit the relocation
* processing in the kernel
*/
- uint64_t offset = buffer->offset64 + delta;
- intel_batchbuffer_emit_dword(brw, offset);
- intel_batchbuffer_emit_dword(brw, offset >> 32);
-
- return true;
+ return buffer->offset64 + delta;
}
@@ -435,312 +451,8 @@ intel_batchbuffer_data(struct brw_context *brw,
{
assert((bytes & 3) == 0);
intel_batchbuffer_require_space(brw, bytes, ring);
- memcpy(brw->batch.map + brw->batch.used, data, bytes);
- brw->batch.used += bytes >> 2;
-}
-
-/**
- * According to the latest documentation, any PIPE_CONTROL with the
- * "Command Streamer Stall" bit set must also have another bit set,
- * with five different options:
- *
- * - Render Target Cache Flush
- * - Depth Cache Flush
- * - Stall at Pixel Scoreboard
- * - Post-Sync Operation
- * - Depth Stall
- *
- * I chose "Stall at Pixel Scoreboard" since we've used it effectively
- * in the past, but the choice is fairly arbitrary.
- */
-static void
-gen8_add_cs_stall_workaround_bits(uint32_t *flags)
-{
- uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_WRITE_IMMEDIATE |
- PIPE_CONTROL_WRITE_DEPTH_COUNT |
- PIPE_CONTROL_WRITE_TIMESTAMP |
- PIPE_CONTROL_STALL_AT_SCOREBOARD |
- PIPE_CONTROL_DEPTH_STALL;
-
- /* If we're doing a CS stall, and don't already have one of the
- * workaround bits set, add "Stall at Pixel Scoreboard."
- */
- if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
- *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
-}
-
-/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
- *
- * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
- * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
- *
- * Note that the kernel does CS stalls between batches, so we only need
- * to count them within a batch.
- */
-static uint32_t
-gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
-{
- if (brw->gen == 7 && !brw->is_haswell) {
- if (flags & PIPE_CONTROL_CS_STALL) {
- /* If we're doing a CS stall, reset the counter and carry on. */
- brw->batch.pipe_controls_since_last_cs_stall = 0;
- return 0;
- }
-
- /* If this is the fourth pipe control without a CS stall, do one now. */
- if (++brw->batch.pipe_controls_since_last_cs_stall == 4) {
- brw->batch.pipe_controls_since_last_cs_stall = 0;
- return PIPE_CONTROL_CS_STALL;
- }
- }
- return 0;
-}
-
-/**
- * Emit a PIPE_CONTROL with various flushing flags.
- *
- * The caller is responsible for deciding what flags are appropriate for the
- * given generation.
- */
-void
-brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
-{
- if (brw->gen >= 8) {
- gen8_add_cs_stall_workaround_bits(&flags);
-
- BEGIN_BATCH(6);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
- OUT_BATCH(flags);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else if (brw->gen >= 6) {
- flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(flags);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- }
-}
-
-/**
- * Emit a PIPE_CONTROL that writes to a buffer object.
- *
- * \p flags should contain one of the following items:
- * - PIPE_CONTROL_WRITE_IMMEDIATE
- * - PIPE_CONTROL_WRITE_TIMESTAMP
- * - PIPE_CONTROL_WRITE_DEPTH_COUNT
- */
-void
-brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
- drm_intel_bo *bo, uint32_t offset,
- uint32_t imm_lower, uint32_t imm_upper)
-{
- if (brw->gen >= 8) {
- gen8_add_cs_stall_workaround_bits(&flags);
-
- BEGIN_BATCH(6);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
- OUT_BATCH(flags);
- OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- offset);
- OUT_BATCH(imm_lower);
- OUT_BATCH(imm_upper);
- ADVANCE_BATCH();
- } else if (brw->gen >= 6) {
- flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
-
- /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
- * on later platforms. We always use PPGTT on Gen7+.
- */
- unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
-
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
- OUT_BATCH(flags);
- OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- gen6_gtt | offset);
- OUT_BATCH(imm_lower);
- OUT_BATCH(imm_upper);
- ADVANCE_BATCH();
- } else {
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
- OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
- OUT_BATCH(imm_lower);
- OUT_BATCH(imm_upper);
- ADVANCE_BATCH();
- }
-}
-
-/**
- * Restriction [DevSNB, DevIVB]:
- *
- * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
- * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
- * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
- * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
- * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
- * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
- * unless SW can otherwise guarantee that the pipeline from WM onwards is
- * already flushed (e.g., via a preceding MI_FLUSH).
- */
-void
-intel_emit_depth_stall_flushes(struct brw_context *brw)
-{
- assert(brw->gen >= 6 && brw->gen <= 9);
-
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_CACHE_FLUSH);
- brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
-}
-
-/**
- * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
- * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
- * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
- * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
- * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs
- * to be sent before any combination of VS associated 3DSTATE."
- */
-void
-gen7_emit_vs_workaround_flush(struct brw_context *brw)
-{
- assert(brw->gen == 7);
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_WRITE_IMMEDIATE
- | PIPE_CONTROL_DEPTH_STALL,
- brw->batch.workaround_bo, 0,
- 0, 0);
-}
-
-
-/**
- * Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
- */
-void
-gen7_emit_cs_stall_flush(struct brw_context *brw)
-{
- brw_emit_pipe_control_write(brw,
- PIPE_CONTROL_CS_STALL
- | PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->batch.workaround_bo, 0,
- 0, 0);
-}
-
-
-/**
- * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
- * implementing two workarounds on gen6. From section 1.4.7.1
- * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
- *
- * [DevSNB-C+{W/A}] Before any depth stall flush (including those
- * produced by non-pipelined state commands), software needs to first
- * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
- * 0.
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
- * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
- *
- * And the workaround for these two requires this workaround first:
- *
- * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
- * BEFORE the pipe-control with a post-sync op and no write-cache
- * flushes.
- *
- * And this last workaround is tricky because of the requirements on
- * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
- * volume 2 part 1:
- *
- * "1 of the following must also be set:
- * - Render Target Cache Flush Enable ([12] of DW1)
- * - Depth Cache Flush Enable ([0] of DW1)
- * - Stall at Pixel Scoreboard ([1] of DW1)
- * - Depth Stall ([13] of DW1)
- * - Post-Sync Operation ([13] of DW1)
- * - Notify Enable ([8] of DW1)"
- *
- * The cache flushes require the workaround flush that triggered this
- * one, so we can't use it. Depth stall would trigger the same.
- * Post-sync nonzero is what triggered this second workaround, so we
- * can't use that one either. Notify enable is IRQs, which aren't
- * really our business. That leaves only stall at scoreboard.
- */
-void
-intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
-{
- brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
-
- brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
- brw->batch.workaround_bo, 0, 0, 0);
-}
-
-/* Emit a pipelined flush to either flush render and texture cache for
- * reading from a FBO-drawn texture, or flush so that frontbuffer
- * render appears on the screen in DRI1.
- *
- * This is also used for the always_flush_cache driconf debug option.
- */
-void
-intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
-{
- if (brw->batch.ring == BLT_RING && brw->gen >= 6) {
- BEGIN_BATCH_BLT(4);
- OUT_BATCH(MI_FLUSH_DW);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
- } else {
- int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
- if (brw->gen >= 6) {
- if (brw->gen == 9) {
- /* Hardware workaround: SKL
- *
- * Emit Pipe Control with all bits set to zero before emitting
- * a Pipe Control with VF Cache Invalidate set.
- */
- brw_emit_pipe_control_flush(brw, 0);
- }
-
- flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_VF_CACHE_INVALIDATE |
- PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
- PIPE_CONTROL_CS_STALL;
-
- if (brw->gen == 6) {
- /* Hardware workaround: SNB B-Spec says:
- *
- * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
- * Flush Enable =1, a PIPE_CONTROL with any non-zero
- * post-sync-op is required.
- */
- intel_emit_post_sync_nonzero_flush(brw);
- }
- }
- brw_emit_pipe_control_flush(brw, flags);
- }
-
- brw_render_cache_set_clear(brw);
+ memcpy(brw->batch.map_next, data, bytes);
+ brw->batch.map_next += bytes >> 2;
}
static void
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 7bdd8364346..84add927c9a 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -22,12 +22,16 @@ extern "C" {
* - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
* - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
* - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB,
- * which are 4 DWords each ==> 2 * 3 * 4 * 4 = 96 bytes
+ * which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes
* - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+. ==> 12 bytes.
* On Ironlake, it's 6 DWords, but we have some slack due to the lack of
* Sandybridge PIPE_CONTROL madness.
+ * - CC_STATE workaround on HSW (12 * 4 = 48 bytes)
+ * - 5 dwords for initial mi_flush
+ * - 2 dwords for CC state setup
+ * - 5 dwords for the required pipe control at the end
*/
-#define BATCH_RESERVED 146
+#define BATCH_RESERVED 152
struct intel_batchbuffer;
@@ -53,25 +57,20 @@ void intel_batchbuffer_data(struct brw_context *brw,
const void *data, GLuint bytes,
enum brw_gpu_ring ring);
-bool intel_batchbuffer_emit_reloc(struct brw_context *brw,
- drm_intel_bo *buffer,
- uint32_t read_domains,
- uint32_t write_domain,
- uint32_t offset);
-bool intel_batchbuffer_emit_reloc64(struct brw_context *brw,
- drm_intel_bo *buffer,
- uint32_t read_domains,
- uint32_t write_domain,
- uint32_t offset);
-void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
-void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
- drm_intel_bo *bo, uint32_t offset,
- uint32_t imm_lower, uint32_t imm_upper);
-void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
-void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
-void intel_emit_depth_stall_flushes(struct brw_context *brw);
-void gen7_emit_vs_workaround_flush(struct brw_context *brw);
-void gen7_emit_cs_stall_flush(struct brw_context *brw);
+uint32_t intel_batchbuffer_reloc(struct brw_context *brw,
+ drm_intel_bo *buffer,
+ uint32_t offset,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t delta);
+uint64_t intel_batchbuffer_reloc64(struct brw_context *brw,
+ drm_intel_bo *buffer,
+ uint32_t offset,
+ uint32_t read_domains,
+ uint32_t write_domain,
+ uint32_t delta);
+
+#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map))
static inline uint32_t float_as_int(float f)
{
@@ -93,7 +92,7 @@ static inline unsigned
intel_batchbuffer_space(struct brw_context *brw)
{
return (brw->batch.state_batch_offset - brw->batch.reserved_space)
- - brw->batch.used*4;
+ - USED_BATCH(brw->batch) * 4;
}
@@ -103,7 +102,7 @@ intel_batchbuffer_emit_dword(struct brw_context *brw, GLuint dword)
#ifdef DEBUG
assert(intel_batchbuffer_space(brw) >= 4);
#endif
- brw->batch.map[brw->batch.used++] = dword;
+ *brw->batch.map_next++ = dword;
assert(brw->batch.ring != UNKNOWN_RING);
}
@@ -144,8 +143,8 @@ intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring)
{
intel_batchbuffer_require_space(brw, n * 4, ring);
- brw->batch.emit = brw->batch.used;
#ifdef DEBUG
+ brw->batch.emit = USED_BATCH(brw->batch);
brw->batch.total = n;
#endif
}
@@ -155,7 +154,7 @@ intel_batchbuffer_advance(struct brw_context *brw)
{
#ifdef DEBUG
struct intel_batchbuffer *batch = &brw->batch;
- unsigned int _n = batch->used - batch->emit;
+ unsigned int _n = USED_BATCH(*batch) - batch->emit;
assert(batch->total != 0);
if (_n != batch->total) {
fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
@@ -166,21 +165,42 @@ intel_batchbuffer_advance(struct brw_context *brw)
#endif
}
-#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING)
-#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING)
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
-#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f)
-#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
- intel_batchbuffer_emit_reloc(brw, buf, \
- read_domains, write_domain, delta); \
+#define BEGIN_BATCH(n) do { \
+ intel_batchbuffer_begin(brw, (n), RENDER_RING); \
+ uint32_t *__map = brw->batch.map_next; \
+ brw->batch.map_next += (n)
+
+#define BEGIN_BATCH_BLT(n) do { \
+ intel_batchbuffer_begin(brw, (n), BLT_RING); \
+ uint32_t *__map = brw->batch.map_next; \
+ brw->batch.map_next += (n)
+
+#define OUT_BATCH(d) *__map++ = (d)
+#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
+
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
+ uint32_t __offset = (__map - brw->batch.map) * 4; \
+ OUT_BATCH(intel_batchbuffer_reloc(brw, (buf), __offset, \
+ (read_domains), \
+ (write_domain), \
+ (delta))); \
} while (0)
/* Handle 48-bit address relocations for Gen8+ */
-#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
- intel_batchbuffer_emit_reloc64(brw, buf, read_domains, write_domain, delta); \
+#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \
+ uint32_t __offset = (__map - brw->batch.map) * 4; \
+ uint64_t reloc64 = intel_batchbuffer_reloc64(brw, (buf), __offset, \
+ (read_domains), \
+ (write_domain), \
+ (delta)); \
+ OUT_BATCH(reloc64); \
+ OUT_BATCH(reloc64 >> 32); \
} while (0)
-#define ADVANCE_BATCH() intel_batchbuffer_advance(brw);
+#define ADVANCE_BATCH() \
+ assert(__map == brw->batch.map_next); \
+ intel_batchbuffer_advance(brw); \
+} while (0)
#ifdef __cplusplus
}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
index d3ab769356c..6d92580e725 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -27,6 +27,7 @@
#include "main/mtypes.h"
+#include "main/blit.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/colormac.h"
@@ -43,6 +44,23 @@
#define FILE_DEBUG_FLAG DEBUG_BLIT
+#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \
+({ \
+ switch (tiling) { \
+ case I915_TILING_X: \
+ CMD |= type ## _TILED_X; \
+ break; \
+ case I915_TILING_Y: \
+ if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \
+ CMD |= type ## _TILED_64K; \
+ else \
+ CMD |= type ## _TILED_Y; \
+ break; \
+ default: \
+ unreachable("not reached"); \
+ } \
+})
+
static void
intel_miptree_set_alpha_to_one(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -75,6 +93,10 @@ static uint32_t
br13_for_cpp(int cpp)
{
switch (cpp) {
+ case 16:
+ return BR13_32323232;
+ case 8:
+ return BR13_16161616;
case 4:
return BR13_8888;
case 2:
@@ -86,6 +108,64 @@ br13_for_cpp(int cpp)
}
}
+static uint32_t
+get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+ /* Alignment tables for YF/YS tiled surfaces. */
+ const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16};
+ const uint32_t bpp = cpp * 8;
+ const uint32_t shift = is_src ? 17 : 10;
+ uint32_t align;
+ int i = 0;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return 0;
+
+ /* Compute array index. */
+ assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
+ i = ffs(bpp / 8) - 1;
+
+ align = tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+ align_2d_yf[i] :
+ 4 * align_2d_yf[i];
+
+ assert(_mesa_is_pow_two(align));
+
+ /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */
+ if (align == 16)
+ align = 32;
+
+ return (ffs(align) - 6) << shift;
+}
+
+static uint32_t
+get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) {
+ /* Vertical alignment tables for YF/YS tiled surfaces. */
+ const unsigned align_2d_yf[] = {64, 32, 32, 16, 16};
+ const uint32_t bpp = cpp * 8;
+ const uint32_t shift = is_src ? 15 : 8;
+ uint32_t align;
+ int i = 0;
+
+ if (tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return 0;
+
+ /* Compute array index. */
+ assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp));
+ i = ffs(bpp / 8) - 1;
+
+ align = tr_mode == INTEL_MIPTREE_TRMODE_YF ?
+ align_2d_yf[i] :
+ 4 * align_2d_yf[i];
+
+ assert(_mesa_is_pow_two(align));
+
+ /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */
+ if (align == 16 || align == 32)
+ align = 64;
+
+ return (ffs(align) - 7) << shift;
+}
+
/**
* Emits the packet for switching the blitter from X to Y tiled or back.
*
@@ -96,9 +176,10 @@ br13_for_cpp(int cpp)
* tiling state would leak into other unsuspecting applications (like the X
* server).
*/
-static void
+static uint32_t *
set_blitter_tiling(struct brw_context *brw,
- bool dst_y_tiled, bool src_y_tiled)
+ bool dst_y_tiled, bool src_y_tiled,
+ uint32_t *__map)
{
assert(brw->gen >= 6);
@@ -113,19 +194,19 @@ set_blitter_tiling(struct brw_context *brw,
OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
(dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
(src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
+ return __map;
}
+#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map)
-#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \
+#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \
BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \
if (dst_y_tiled || src_y_tiled) \
- set_blitter_tiling(brw, dst_y_tiled, src_y_tiled); \
- } while (0)
+ SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled)
-#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \
+#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \
if (dst_y_tiled || src_y_tiled) \
- set_blitter_tiling(brw, false, false); \
- ADVANCE_BATCH(); \
- } while (0)
+ SET_BLITTER_TILING(brw, false, false); \
+ ADVANCE_BATCH()
static int
blt_pitch(struct intel_mipmap_tree *mt)
@@ -278,9 +359,11 @@ intel_miptree_blit(struct brw_context *brw,
src_pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
+ src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
+ dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
width, height,
@@ -313,6 +396,112 @@ alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling)
return true;
}
+static bool
+can_fast_copy_blit(struct brw_context *brw,
+ drm_intel_bo *src_buffer,
+ int16_t src_x, int16_t src_y,
+ uintptr_t src_offset, uint32_t src_pitch,
+ uint32_t src_tiling, uint32_t src_tr_mode,
+ drm_intel_bo *dst_buffer,
+ int16_t dst_x, int16_t dst_y,
+ uintptr_t dst_offset, uint32_t dst_pitch,
+ uint32_t dst_tiling, uint32_t dst_tr_mode,
+ int16_t w, int16_t h, uint32_t cpp)
+{
+ const bool dst_tiling_none = dst_tiling == I915_TILING_NONE;
+ const bool src_tiling_none = src_tiling == I915_TILING_NONE;
+
+ if (brw->gen < 9)
+ return false;
+
+ if (src_buffer->handle == dst_buffer->handle &&
+ _mesa_regions_overlap(src_x, src_y, src_x + w, src_y + h,
+ dst_x, dst_y, dst_x + w, dst_y + h))
+ return false;
+
+ /* Enable fast copy blit only if the surfaces are Yf/Ys tiled.
+ * FIXME: Based on performance data, remove this condition later to
+ * enable for all types of surfaces.
+ */
+ if (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE &&
+ dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)
+ return false;
+
+ /* For all surface types buffers must be cacheline-aligned. */
+ if ((dst_offset | src_offset) & 63)
+ return false;
+
+ /* Color depth greater than 128 bits not supported. */
+ if (cpp > 16)
+ return false;
+
+ /* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15
+ * of the destination pitch must be zero.
+ */
+ if ((src_pitch >> 15 & 1) != 0 || (dst_pitch >> 15 & 1) != 0)
+ return false;
+
+ /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */
+ if ((src_tiling_none && src_pitch % 16 != 0) ||
+ (dst_tiling_none && dst_pitch % 16 != 0))
+ return false;
+
+ /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+ * (X direction width of the Tile). This means the pitch value will
+ * always be Cache Line aligned (64byte multiple).
+ */
+ if ((!dst_tiling_none && dst_pitch % 64 != 0) ||
+ (!src_tiling_none && src_pitch % 64 != 0))
+ return false;
+
+ return true;
+}
+
+static uint32_t
+xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode,
+ uint32_t dst_tiling, uint32_t dst_tr_mode,
+ uint32_t cpp, bool use_fast_copy_blit)
+{
+ uint32_t CMD = 0;
+
+ if (use_fast_copy_blit) {
+ CMD = XY_FAST_COPY_BLT_CMD;
+
+ if (dst_tiling != I915_TILING_NONE)
+ SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST);
+
+ if (src_tiling != I915_TILING_NONE)
+ SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC);
+
+ CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */);
+ CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */);
+
+ CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */);
+ CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */);
+
+ } else {
+ assert(cpp <= 4);
+ switch (cpp) {
+ case 1:
+ case 2:
+ CMD = XY_SRC_COPY_BLT_CMD;
+ break;
+ case 4:
+ CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ if (dst_tiling != I915_TILING_NONE)
+ CMD |= XY_DST_TILED;
+
+ if (src_tiling != I915_TILING_NONE)
+ CMD |= XY_SRC_TILED;
+ }
+ return CMD;
+}
+
/* Copy BitBlt
*/
bool
@@ -322,10 +511,12 @@ intelEmitCopyBlit(struct brw_context *brw,
drm_intel_bo *src_buffer,
GLuint src_offset,
uint32_t src_tiling,
+ uint32_t src_tr_mode,
GLshort dst_pitch,
drm_intel_bo *dst_buffer,
GLuint dst_offset,
uint32_t dst_tiling,
+ uint32_t dst_tr_mode,
GLshort src_x, GLshort src_y,
GLshort dst_x, GLshort dst_y,
GLshort w, GLshort h,
@@ -337,18 +528,11 @@ intelEmitCopyBlit(struct brw_context *brw,
drm_intel_bo *aper_array[3];
bool dst_y_tiled = dst_tiling == I915_TILING_Y;
bool src_y_tiled = src_tiling == I915_TILING_Y;
-
- if (!alignment_valid(brw, dst_offset, dst_tiling))
- return false;
- if (!alignment_valid(brw, src_offset, src_tiling))
- return false;
+ bool use_fast_copy_blit = false;
if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
return false;
- assert(!dst_y_tiled || (dst_pitch % 128) == 0);
- assert(!src_y_tiled || (src_pitch % 128) == 0);
-
/* do space check before going any further */
do {
aper_array[0] = brw->batch.bo;
@@ -373,52 +557,98 @@ intelEmitCopyBlit(struct brw_context *brw,
src_buffer, src_pitch, src_offset, src_x, src_y,
dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
- /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
- * the low bits. Offsets must be naturally aligned.
- */
- if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
- dst_pitch % 4 != 0 || dst_offset % cpp != 0)
- return false;
+ use_fast_copy_blit = can_fast_copy_blit(brw,
+ src_buffer,
+ src_x, src_y,
+ src_offset, src_pitch,
+ src_tiling, src_tr_mode,
+ dst_buffer,
+ dst_x, dst_y,
+ dst_offset, dst_pitch,
+ dst_tiling, dst_tr_mode,
+ w, h, cpp);
+ assert(use_fast_copy_blit ||
+ (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE &&
+ dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE));
+
+ if (use_fast_copy_blit) {
+ /* When two sequential fast copy blits have different source surfaces,
+ * but their destinations refer to the same destination surfaces and
+ * therefore destinations overlap it is imperative that a flush be
+ * inserted between the two blits.
+ *
+ * FIXME: Figure out a way to avoid flushing when not required.
+ */
+ brw_emit_mi_flush(brw);
+
+ assert(cpp <= 16);
+ BR13 = br13_for_cpp(cpp);
+
+ if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ BR13 |= XY_FAST_SRC_TRMODE_YF;
+
+ if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF)
+ BR13 |= XY_FAST_DST_TRMODE_YF;
+
+ CMD = xy_blit_cmd(src_tiling, src_tr_mode,
+ dst_tiling, dst_tr_mode,
+ cpp, use_fast_copy_blit);
+
+ /* For tiled source and destination, pitch value should be specified
+ * as a number of Dwords.
+ */
+ if (dst_tiling != I915_TILING_NONE)
+ dst_pitch /= 4;
+
+ if (src_tiling != I915_TILING_NONE)
+ src_pitch /= 4;
- /* For big formats (such as floating point), do the copy using 16 or 32bpp
- * and multiply the coordinates.
- */
- if (cpp > 4) {
- if (cpp % 4 == 2) {
- dst_x *= cpp / 2;
- dst_x2 *= cpp / 2;
- src_x *= cpp / 2;
- cpp = 2;
- } else {
- assert(cpp % 4 == 0);
- dst_x *= cpp / 4;
- dst_x2 *= cpp / 4;
- src_x *= cpp / 4;
- cpp = 4;
+ } else {
+ assert(!dst_y_tiled || (dst_pitch % 128) == 0);
+ assert(!src_y_tiled || (src_pitch % 128) == 0);
+
+ /* For big formats (such as floating point), do the copy using 16 or
+ * 32bpp and multiply the coordinates.
+ */
+ if (cpp > 4) {
+ if (cpp % 4 == 2) {
+ dst_x *= cpp / 2;
+ dst_x2 *= cpp / 2;
+ src_x *= cpp / 2;
+ cpp = 2;
+ } else {
+ assert(cpp % 4 == 0);
+ dst_x *= cpp / 4;
+ dst_x2 *= cpp / 4;
+ src_x *= cpp / 4;
+ cpp = 4;
+ }
}
- }
- BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+ if (!alignment_valid(brw, dst_offset, dst_tiling))
+ return false;
+ if (!alignment_valid(brw, src_offset, src_tiling))
+ return false;
- switch (cpp) {
- case 1:
- case 2:
- CMD = XY_SRC_COPY_BLT_CMD;
- break;
- case 4:
- CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
- break;
- default:
- return false;
- }
+ /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
+ * the low bits. Offsets must be naturally aligned.
+ */
+ if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
+ dst_pitch % 4 != 0 || dst_offset % cpp != 0)
+ return false;
- if (dst_tiling != I915_TILING_NONE) {
- CMD |= XY_DST_TILED;
- dst_pitch /= 4;
- }
- if (src_tiling != I915_TILING_NONE) {
- CMD |= XY_SRC_TILED;
- src_pitch /= 4;
+ assert(cpp <= 4);
+ BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+
+ CMD = xy_blit_cmd(src_tiling, src_tr_mode,
+ dst_tiling, dst_tr_mode,
+ cpp, use_fast_copy_blit);
+
+ if (dst_tiling != I915_TILING_NONE)
+ dst_pitch /= 4;
+
+ if (src_tiling != I915_TILING_NONE)
+ src_pitch /= 4;
}
if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
@@ -460,7 +690,7 @@ intelEmitCopyBlit(struct brw_context *brw,
ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return true;
}
@@ -544,7 +774,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw,
intel_batchbuffer_data(brw, src_bits, dwords * 4, BLT_RING);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return true;
}
@@ -576,7 +806,9 @@ intel_emit_linear_blit(struct brw_context *brw,
dst_x = dst_offset % 64;
ok = intelEmitCopyBlit(brw, 1,
pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
src_x, 0, /* src x/y */
dst_x, 0, /* dst x/y */
pitch, height, /* w, h */
@@ -595,7 +827,9 @@ intel_emit_linear_blit(struct brw_context *brw,
if (size != 0) {
ok = intelEmitCopyBlit(brw, 1,
pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+ INTEL_MIPTREE_TRMODE_NONE,
src_x, 0, /* src x/y */
dst_x, 0, /* dst x/y */
size, 1, /* w, h */
@@ -667,5 +901,5 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
ADVANCE_BATCH_TILED(dst_y_tiled, false);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h
index 2287c379c4e..c3d19a5a20e 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@@ -32,19 +32,21 @@
bool
intelEmitCopyBlit(struct brw_context *brw,
- GLuint cpp,
- GLshort src_pitch,
- drm_intel_bo *src_buffer,
- GLuint src_offset,
- uint32_t src_tiling,
- GLshort dst_pitch,
- drm_intel_bo *dst_buffer,
- GLuint dst_offset,
- uint32_t dst_tiling,
- GLshort srcx, GLshort srcy,
- GLshort dstx, GLshort dsty,
- GLshort w, GLshort h,
- GLenum logicop );
+ GLuint cpp,
+ GLshort src_pitch,
+ drm_intel_bo *src_buffer,
+ GLuint src_offset,
+ uint32_t src_tiling,
+ uint32_t src_tr_mode,
+ GLshort dst_pitch,
+ drm_intel_bo *dst_buffer,
+ GLuint dst_offset,
+ uint32_t dst_tiling,
+ uint32_t dst_tr_mode,
+ GLshort srcx, GLshort srcy,
+ GLshort dstx, GLshort dsty,
+ GLshort w, GLshort h,
+ GLenum logicop);
bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst);
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index 627c487f0e7..ff05b5cd0e7 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -560,7 +560,7 @@ brw_unmap_buffer(struct gl_context *ctx,
* flush. Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
drm_intel_bo_unreference(intel_obj->range_map_bo[index]);
intel_obj->range_map_bo[index] = NULL;
@@ -632,7 +632,7 @@ brw_copy_buffer_subdata(struct gl_context *ctx,
* flush. Once again, we wish for a domain tracker in libdrm to cover
* usage inside of a batchbuffer.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
void
diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c
index f4c7eff2904..3706704bf1a 100644
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -126,9 +126,11 @@ copy_image_with_blitter(struct brw_context *brw,
src_mt->pitch,
src_mt->bo, src_mt->offset,
src_mt->tiling,
+ src_mt->tr_mode,
dst_mt->pitch,
dst_mt->bo, dst_mt->offset,
dst_mt->tiling,
+ dst_mt->tr_mode,
src_x, src_y,
dst_x, dst_y,
src_width, src_height,
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index 75cf7854eff..58f41bfd55d 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -79,11 +79,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
{
uint64_t flags[] = {
[MESA_SHADER_VERTEX] = DEBUG_VS,
+ [MESA_SHADER_TESS_CTRL] = 0,
+ [MESA_SHADER_TESS_EVAL] = 0,
[MESA_SHADER_GEOMETRY] = DEBUG_GS,
[MESA_SHADER_FRAGMENT] = DEBUG_WM,
[MESA_SHADER_COMPUTE] = DEBUG_CS,
};
- STATIC_ASSERT(MESA_SHADER_STAGES == 4);
+ STATIC_ASSERT(MESA_SHADER_STAGES == 6);
return flags[stage];
}
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index c99677c7197..3bc28a12026 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -64,10 +64,10 @@ can_do_pipelined_register_writes(struct brw_context *brw)
/* Set a value in a BO to a known quantity. The workaround BO already
* exists and doesn't contain anything important, so we may as well use it.
*/
- drm_intel_bo_map(brw->batch.workaround_bo, true);
- data = brw->batch.workaround_bo->virtual;
+ drm_intel_bo_map(brw->workaround_bo, true);
+ data = brw->workaround_bo->virtual;
data[offset] = 0xffffffff;
- drm_intel_bo_unmap(brw->batch.workaround_bo);
+ drm_intel_bo_unmap(brw->workaround_bo);
/* Write the register. */
BEGIN_BATCH(3);
@@ -76,13 +76,13 @@ can_do_pipelined_register_writes(struct brw_context *brw)
OUT_BATCH(expected_value);
ADVANCE_BATCH();
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the register's value back to the buffer. */
BEGIN_BATCH(3);
OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
OUT_BATCH(reg);
- OUT_RELOC(brw->batch.workaround_bo,
+ OUT_RELOC(brw->workaround_bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
offset * sizeof(uint32_t));
ADVANCE_BATCH();
@@ -90,10 +90,10 @@ can_do_pipelined_register_writes(struct brw_context *brw)
intel_batchbuffer_flush(brw);
/* Check whether the value got written. */
- drm_intel_bo_map(brw->batch.workaround_bo, false);
- data = brw->batch.workaround_bo->virtual;
+ drm_intel_bo_map(brw->workaround_bo, false);
+ data = brw->workaround_bo->virtual;
bool success = data[offset] == expected_value;
- drm_intel_bo_unmap(brw->batch.workaround_bo);
+ drm_intel_bo_unmap(brw->workaround_bo);
result = success;
@@ -120,10 +120,10 @@ can_write_oacontrol(struct brw_context *brw)
/* Set a value in a BO to a known quantity. The workaround BO already
* exists and doesn't contain anything important, so we may as well use it.
*/
- drm_intel_bo_map(brw->batch.workaround_bo, true);
- data = brw->batch.workaround_bo->virtual;
+ drm_intel_bo_map(brw->workaround_bo, true);
+ data = brw->workaround_bo->virtual;
data[offset] = 0xffffffff;
- drm_intel_bo_unmap(brw->batch.workaround_bo);
+ drm_intel_bo_unmap(brw->workaround_bo);
/* Write OACONTROL. */
BEGIN_BATCH(3);
@@ -132,18 +132,18 @@ can_write_oacontrol(struct brw_context *brw)
OUT_BATCH(expected_value);
ADVANCE_BATCH();
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the register's value back to the buffer. */
BEGIN_BATCH(3);
OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
OUT_BATCH(OACONTROL);
- OUT_RELOC(brw->batch.workaround_bo,
+ OUT_RELOC(brw->workaround_bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
offset * sizeof(uint32_t));
ADVANCE_BATCH();
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Set OACONTROL back to zero (everything off). */
BEGIN_BATCH(3);
@@ -155,10 +155,10 @@ can_write_oacontrol(struct brw_context *brw)
intel_batchbuffer_flush(brw);
/* Check whether the value got written. */
- drm_intel_bo_map(brw->batch.workaround_bo, false);
- data = brw->batch.workaround_bo->virtual;
+ drm_intel_bo_map(brw->workaround_bo, false);
+ data = brw->workaround_bo->virtual;
bool success = data[offset] == expected_value;
- drm_intel_bo_unmap(brw->batch.workaround_bo);
+ drm_intel_bo_unmap(brw->workaround_bo);
result = success;
@@ -284,8 +284,6 @@ intelInitExtensions(struct gl_context *ctx)
}
if (brw->gen >= 6) {
- uint64_t dummy;
-
ctx->Extensions.ARB_blend_func_extended =
brw->optionCache.info == NULL ||
!driQueryOptionb(&brw->optionCache, "disable_blend_func_extended");
@@ -311,13 +309,14 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.OES_depth_texture_cube_map = true;
/* Test if the kernel has the ioctl. */
- if (brw->bufmgr && drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0)
+ if (brw->intelScreen->hw_has_timestamp)
ctx->Extensions.ARB_timer_query = true;
/* Only enable this in core profile because other parts of Mesa behave
* slightly differently when the extension is enabled.
*/
if (ctx->API == API_OPENGL_CORE) {
+ ctx->Extensions.ARB_shader_subroutine = true;
ctx->Extensions.ARB_viewport_array = true;
ctx->Extensions.AMD_vertex_shader_viewport_index = true;
}
@@ -331,6 +330,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_framebuffer_no_attachments = true;
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_shader_atomic_counters = true;
+ ctx->Extensions.ARB_shader_image_load_store = true;
ctx->Extensions.ARB_texture_compression_bptc = true;
ctx->Extensions.ARB_texture_view = true;
@@ -351,6 +351,7 @@ intelInitExtensions(struct gl_context *ctx)
if (ctx->API == API_OPENGL_CORE) {
ctx->Extensions.ARB_viewport_array = true;
ctx->Extensions.AMD_vertex_shader_viewport_index = true;
+ ctx->Extensions.ARB_shader_subroutine = true;
}
}
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 1b3a72f3ec2..72648b01e33 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -310,7 +310,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend
intel_miptree_release(&irb->mt);
DBG("%s: %s: %s (%dx%d)\n", __func__,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(internalFormat),
_mesa_get_format_name(rb->Format), width, height);
if (width == 0 || height == 0)
@@ -551,10 +551,12 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw,
irb->mt_layer = layer_multiplier * layer;
- if (layered) {
- irb->layer_count = image->TexObject->NumLayers ?: mt->level[level].depth / layer_multiplier;
- } else {
+ if (!layered) {
irb->layer_count = 1;
+ } else if (image->TexObject->NumLayers > 0) {
+ irb->layer_count = image->TexObject->NumLayers;
+ } else {
+ irb->layer_count = mt->level[level].depth / layer_multiplier;
}
intel_miptree_reference(&irb->mt, mt);
@@ -1020,6 +1022,9 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
struct intel_mipmap_tree *new_mt;
int width, height, depth;
+ uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+ MIPTREE_LAYOUT_TILING_ANY;
+
intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
@@ -1028,8 +1033,7 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
intel_image->base.Base.Level,
width, height, depth,
irb->mt->num_samples,
- INTEL_MIPTREE_TILING_ANY,
- MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
+ layout_flags);
if (intel_miptree_wants_hiz_buffer(brw, new_mt)) {
intel_miptree_alloc_hiz(brw, new_mt);
@@ -1076,7 +1080,7 @@ brw_render_cache_set_check_flush(struct brw_context *brw, drm_intel_bo *bo)
if (!_mesa_set_search(brw->render_cache, bo))
return;
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
}
/**
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 6aa969a4930..e85c3f00c7b 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -272,7 +272,6 @@ intel_miptree_create_layout(struct brw_context *brw,
GLuint height0,
GLuint depth0,
GLuint num_samples,
- enum intel_miptree_tiling_mode requested,
uint32_t layout_flags)
{
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
@@ -280,7 +279,7 @@ intel_miptree_create_layout(struct brw_context *brw,
return NULL;
DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__,
- _mesa_lookup_enum_by_nr(target),
+ _mesa_enum_to_string(target),
_mesa_get_format_name(format),
first_level, last_level, depth0, mt);
@@ -454,8 +453,10 @@ intel_miptree_create_layout(struct brw_context *brw,
(brw->has_separate_stencil &&
intel_miptree_wants_hiz_buffer(brw, mt)))) {
uint32_t stencil_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
- if (brw->gen == 6)
- stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD;
+ if (brw->gen == 6) {
+ stencil_flags |= MIPTREE_LAYOUT_FORCE_ALL_SLICE_AT_LOD |
+ MIPTREE_LAYOUT_TILING_ANY;
+ }
mt->stencil_mt = intel_miptree_create(brw,
mt->target,
@@ -466,7 +467,6 @@ intel_miptree_create_layout(struct brw_context *brw,
mt->logical_height0,
mt->logical_depth0,
num_samples,
- INTEL_MIPTREE_TILING_ANY,
stencil_flags);
if (!mt->stencil_mt) {
@@ -510,7 +510,7 @@ intel_miptree_create_layout(struct brw_context *brw,
assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
}
- brw_miptree_layout(brw, mt, requested, layout_flags);
+ brw_miptree_layout(brw, mt, layout_flags);
if (mt->disable_aux_buffers)
assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS);
@@ -558,6 +558,53 @@ intel_lower_compressed_format(struct brw_context *brw, mesa_format format)
}
}
+/* This function computes Yf/Ys tiled bo size, alignment and pitch. */
+static unsigned long
+intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment,
+ unsigned long *pitch)
+{
+ const uint32_t bpp = mt->cpp * 8;
+ const uint32_t aspect_ratio = (bpp == 16 || bpp == 64) ? 2 : 1;
+ uint32_t tile_width, tile_height;
+ unsigned long stride, size, aligned_y;
+
+ assert(mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE);
+
+ switch (bpp) {
+ case 8:
+ tile_height = 64;
+ break;
+ case 16:
+ case 32:
+ tile_height = 32;
+ break;
+ case 64:
+ case 128:
+ tile_height = 16;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YS)
+ tile_height *= 4;
+
+ aligned_y = ALIGN(mt->total_height, tile_height);
+ stride = mt->total_width * mt->cpp;
+ tile_width = tile_height * mt->cpp * aspect_ratio;
+ stride = ALIGN(stride, tile_width);
+ size = stride * aligned_y;
+
+ if (mt->tr_mode == INTEL_MIPTREE_TRMODE_YF) {
+ assert(size % 4096 == 0);
+ *alignment = 4096;
+ } else {
+ assert(size % (64 * 1024) == 0);
+ *alignment = 64 * 1024;
+ }
+ *pitch = stride;
+ return size;
+}
struct intel_mipmap_tree *
intel_miptree_create(struct brw_context *brw,
@@ -569,7 +616,6 @@ intel_miptree_create(struct brw_context *brw,
GLuint height0,
GLuint depth0,
GLuint num_samples,
- enum intel_miptree_tiling_mode requested_tiling,
uint32_t layout_flags)
{
struct intel_mipmap_tree *mt;
@@ -587,7 +633,7 @@ intel_miptree_create(struct brw_context *brw,
mt = intel_miptree_create_layout(brw, target, format,
first_level, last_level, width0,
height0, depth0, num_samples,
- requested_tiling, layout_flags);
+ layout_flags);
/*
* pitch == 0 || height == 0 indicates the null texture
*/
@@ -616,10 +662,22 @@ intel_miptree_create(struct brw_context *brw,
alloc_flags |= BO_ALLOC_FOR_RENDER;
unsigned long pitch;
- mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", total_width,
- total_height, mt->cpp, &mt->tiling,
- &pitch, alloc_flags);
mt->etc_format = etc_format;
+
+ if (mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
+ unsigned alignment = 0;
+ unsigned long size;
+ size = intel_get_yf_ys_bo_size(mt, &alignment, &pitch);
+ assert(size);
+ mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree",
+ size, alignment);
+ } else {
+ mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree",
+ total_width, total_height, mt->cpp,
+ &mt->tiling, &pitch,
+ alloc_flags);
+ }
+
mt->pitch = pitch;
/* If the BO is too large to fit in the aperture, we need to use the
@@ -698,17 +756,16 @@ intel_miptree_create_for_bo(struct brw_context *brw,
target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
- /* 'requested' parameter of intel_miptree_create_layout() is relevant
- * only for non bo miptree. Tiling for bo is already computed above.
- * So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is
- * just a place holder and will not make any change to the miptree
- * tiling format.
+ /* The BO already has a tiling format and we shouldn't confuse the lower
+ * layers by making it try to find a tiling format again.
*/
+ assert((layout_flags & MIPTREE_LAYOUT_TILING_ANY) == 0);
+ assert((layout_flags & MIPTREE_LAYOUT_TILING_NONE) == 0);
+
layout_flags |= MIPTREE_LAYOUT_FOR_BO;
mt = intel_miptree_create_layout(brw, target, format,
0, 0,
width, height, depth, 0,
- INTEL_MIPTREE_TILING_ANY,
layout_flags);
if (!mt)
return NULL;
@@ -816,11 +873,13 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw,
uint32_t depth = 1;
bool ok;
GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
+ const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+ MIPTREE_LAYOUT_TILING_ANY;
+
mt = intel_miptree_create(brw, target, format, 0, 0,
width, height, depth, num_samples,
- INTEL_MIPTREE_TILING_ANY,
- MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
+ layout_flags);
if (!mt)
goto fail;
@@ -1325,6 +1384,8 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
*
* "The MCS surface must be stored as Tile Y."
*/
+ const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+ MIPTREE_LAYOUT_TILING_Y;
mt->mcs_mt = intel_miptree_create(brw,
mt->target,
format,
@@ -1334,8 +1395,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
mt->logical_height0,
mt->logical_depth0,
0 /* num_samples */,
- INTEL_MIPTREE_TILING_Y,
- MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
+ mcs_flags);
/* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
*
@@ -1383,9 +1443,11 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
unsigned mcs_height =
ALIGN(mt->logical_height0, height_divisor) / height_divisor;
assert(mt->logical_depth0 == 1);
- uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
- if (brw->gen >= 8)
+ uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+ MIPTREE_LAYOUT_TILING_Y;
+ if (brw->gen >= 8) {
layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
+ }
mt->mcs_mt = intel_miptree_create(brw,
mt->target,
format,
@@ -1395,7 +1457,6 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
mcs_height,
mt->logical_depth0,
0 /* num_samples */,
- INTEL_MIPTREE_TILING_Y,
layout_flags);
return mt->mcs_mt;
@@ -1456,21 +1517,23 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
/* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
* adjustments required for Z_Height and Z_Width based on multisampling.
*/
- switch (mt->num_samples) {
- case 0:
- case 1:
- break;
- case 2:
- case 4:
- z_width *= 2;
- z_height *= 2;
- break;
- case 8:
- z_width *= 4;
- z_height *= 2;
- break;
- default:
- unreachable("unsupported sample count");
+ if (brw->gen < 9) {
+ switch (mt->num_samples) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ case 4:
+ z_width *= 2;
+ z_height *= 2;
+ break;
+ case 8:
+ z_width *= 4;
+ z_height *= 2;
+ break;
+ default:
+ unreachable("unsupported sample count");
+ }
}
const unsigned vertical_align = 8; /* 'j' in the docs */
@@ -1646,6 +1709,7 @@ intel_hiz_miptree_buf_create(struct brw_context *brw,
if (!buf)
return NULL;
+ layout_flags |= MIPTREE_LAYOUT_TILING_ANY;
buf->mt = intel_miptree_create(brw,
mt->target,
mt->format,
@@ -1655,7 +1719,6 @@ intel_hiz_miptree_buf_create(struct brw_context *brw,
mt->logical_height0,
mt->logical_depth0,
mt->num_samples,
- INTEL_MIPTREE_TILING_ANY,
layout_flags);
if (!buf->mt) {
free(buf);
@@ -2086,7 +2149,7 @@ intel_miptree_map_blit(struct brw_context *brw,
map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
0, 0,
map->w, map->h, 1,
- 0, INTEL_MIPTREE_TILING_NONE, 0);
+ 0, MIPTREE_LAYOUT_TILING_NONE);
if (!map->mt) {
fprintf(stderr, "Failed to allocate blit temporary\n");
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index bde6daa4e2d..790d3129207 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -516,12 +516,6 @@ struct intel_mipmap_tree
GLuint refcount;
};
-enum intel_miptree_tiling_mode {
- INTEL_MIPTREE_TILING_ANY,
- INTEL_MIPTREE_TILING_Y,
- INTEL_MIPTREE_TILING_NONE,
-};
-
void
intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
struct intel_mipmap_tree *mt,
@@ -541,6 +535,11 @@ enum {
MIPTREE_LAYOUT_FOR_BO = 1 << 2,
MIPTREE_LAYOUT_DISABLE_AUX = 1 << 3,
MIPTREE_LAYOUT_FORCE_HALIGN16 = 1 << 4,
+
+ MIPTREE_LAYOUT_TILING_Y = 1 << 5,
+ MIPTREE_LAYOUT_TILING_NONE = 1 << 6,
+ MIPTREE_LAYOUT_TILING_ANY = MIPTREE_LAYOUT_TILING_Y |
+ MIPTREE_LAYOUT_TILING_NONE,
};
struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
@@ -552,7 +551,6 @@ struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
GLuint height0,
GLuint depth0,
GLuint num_samples,
- enum intel_miptree_tiling_mode,
uint32_t flags);
struct intel_mipmap_tree *
@@ -771,7 +769,6 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
void
brw_miptree_layout(struct brw_context *brw,
struct intel_mipmap_tree *mt,
- enum intel_miptree_tiling_mode requested,
uint32_t layout_flags);
void *intel_miptree_map_raw(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 30380570d62..3fe506e3cf1 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -247,7 +247,7 @@ intelReadPixels(struct gl_context * ctx,
* rendered to via a PBO at any point, so it seems better to just
* flush here unconditionally.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return;
}
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index bd14e189da3..b4283da9633 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -47,6 +47,9 @@
/* Load a value from memory into a register. Only available on Gen7+. */
#define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23))
# define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22)
+/* Haswell RS control */
+#define MI_RS_CONTROL (CMD_MI | (0x6 << 23))
+#define MI_RS_STORE_DATA_IMM (CMD_MI | (0x2b << 23))
/* Manipulate the predicate bit based on some register values. Only on Gen7+ */
#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23))
@@ -102,6 +105,8 @@
#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22))
+#define XY_FAST_COPY_BLT_CMD (CMD_2D | (0x42 << 22))
+
#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22))
# define XY_TEXT_BYTE_PACKED (1 << 16)
@@ -111,10 +116,24 @@
#define XY_SRC_TILED (1 << 15)
#define XY_DST_TILED (1 << 11)
+/* BR00 */
+#define XY_FAST_SRC_TILED_64K (3 << 20)
+#define XY_FAST_SRC_TILED_Y (2 << 20)
+#define XY_FAST_SRC_TILED_X (1 << 20)
+
+#define XY_FAST_DST_TILED_64K (3 << 13)
+#define XY_FAST_DST_TILED_Y (2 << 13)
+#define XY_FAST_DST_TILED_X (1 << 13)
+
/* BR13 */
#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
#define BR13_8888 (0x3 << 24)
+#define BR13_16161616 (0x4 << 24)
+#define BR13_32323232 (0x5 << 24)
+
+#define XY_FAST_SRC_TRMODE_YF (1 << 31)
+#define XY_FAST_DST_TRMODE_YF (1 << 30)
/* Pipeline Statistics Counter Registers */
#define IA_VERTICES_COUNT 0x2310
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index de14696bd76..a164c6985dc 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -229,6 +229,12 @@ static struct intel_image_format intel_image_formats[] = {
{ __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
{ { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
+ { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
+
+ { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
+
{ __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
{ { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
{ 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
@@ -1123,6 +1129,50 @@ intel_detect_swizzling(struct intel_screen *screen)
return true;
}
+static int
+intel_detect_timestamp(struct intel_screen *screen)
+{
+ uint64_t dummy = 0, last = 0;
+ int upper, lower, loops;
+
+ /* On 64bit systems, some old kernels trigger a hw bug resulting in the
+ * TIMESTAMP register being shifted and the low 32bits always zero.
+ *
+ * More recent kernels offer an interface to read the full 36bits
+ * everywhere.
+ */
+ if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
+ return 3;
+
+ /* Determine if we have a 32bit or 64bit kernel by inspecting the
+ * upper 32bits for a rapidly changing timestamp.
+ */
+ if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &last))
+ return 0;
+
+ upper = lower = 0;
+ for (loops = 0; loops < 10; loops++) {
+ /* The TIMESTAMP should change every 80ns, so several round trips
+ * through the kernel should be enough to advance it.
+ */
+ if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
+ return 0;
+
+ upper += (dummy >> 32) != (last >> 32);
+ if (upper > 1) /* beware 32bit counter overflow */
+ return 2; /* upper dword holds the low 32bits of the timestamp */
+
+ lower += (dummy & 0xffffffff) != (last & 0xffffffff);
+ if (lower > 1)
+ return 1; /* timestamp is unshifted */
+
+ last = dummy;
+ }
+
+ /* No advancement? No timestamp! */
+ return 0;
+}
+
/**
* Return array of MSAA modes supported by the hardware. The array is
* zero-terminated and sorted in decreasing order.
@@ -1309,11 +1359,6 @@ set_max_gl_versions(struct intel_screen *screen)
}
}
-/* drop when libdrm 2.4.61 is released */
-#ifndef I915_PARAM_REVISION
-#define I915_PARAM_REVISION 32
-#endif
-
static int
brw_get_revision(int fd)
{
@@ -1332,6 +1377,11 @@ brw_get_revision(int fd)
return revision;
}
+/* Drop when RS headers get pulled to libdrm */
+#ifndef I915_PARAM_HAS_RESOURCE_STREAMER
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#endif
+
/**
* This is the driver specific part of the createNewScreen entry point.
* Called when using DRI2.
@@ -1378,6 +1428,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7;
intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+ intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
@@ -1423,6 +1474,15 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->compiler = brw_compiler_create(intelScreen,
intelScreen->devinfo);
+ if (intelScreen->devinfo->has_resource_streamer) {
+ int val = -1;
+ getparam.param = I915_PARAM_HAS_RESOURCE_STREAMER;
+ getparam.value = &val;
+
+ drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
+ intelScreen->has_resource_streamer = val > 0;
+ }
+
return (const __DRIconfig**) intel_screen_make_configs(psp);
}
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index 742b3d30eee..fd5143eecba 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -52,6 +52,13 @@ struct intel_screen
bool hw_has_swizzling;
+ int hw_has_timestamp;
+
+ /**
+ * Does the kernel support resource streamer?
+ */
+ bool has_resource_streamer;
+
/**
* Does the kernel support context reset notifications?
*/
diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c
index 3cfa7e593ab..c44c4beceef 100644
--- a/src/mesa/drivers/dri/i965/intel_syncobj.c
+++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
@@ -69,7 +69,7 @@ brw_fence_insert(struct brw_context *brw, struct brw_fence *fence)
assert(!fence->batch_bo);
assert(!fence->signalled);
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
fence->batch_bo = brw->batch.bo;
drm_intel_bo_reference(fence->batch_bo);
intel_batchbuffer_flush(brw);
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
index b0181ad1d75..e16b0def0d4 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.c
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -145,7 +145,7 @@ intel_alloc_texture_storage(struct gl_context *ctx,
0, levels - 1,
width, height, depth,
num_samples,
- INTEL_MIPTREE_TILING_ANY, 0);
+ MIPTREE_LAYOUT_TILING_ANY);
if (intel_texobj->mt == NULL) {
return false;
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index ebe84b664d4..93a8cdee0cb 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -80,8 +80,7 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
height,
depth,
intelImage->base.Base.NumSamples,
- INTEL_MIPTREE_TILING_ANY,
- layout_flags);
+ layout_flags | MIPTREE_LAYOUT_TILING_ANY);
}
static void
@@ -98,8 +97,8 @@ intelTexImage(struct gl_context * ctx,
DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
__func__, _mesa_get_format_name(texImage->TexFormat),
- _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
- _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
+ _mesa_enum_to_string(texImage->TexObject->Target),
+ _mesa_enum_to_string(format), _mesa_enum_to_string(type),
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
/* Allocate storage for texture data. */
@@ -472,39 +471,44 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
}
static void
-intel_get_tex_image(struct gl_context *ctx,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage) {
+intel_get_tex_sub_image(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLenum format, GLenum type, GLvoid *pixels,
+ struct gl_texture_image *texImage)
+{
struct brw_context *brw = brw_context(ctx);
bool ok;
DBG("%s\n", __func__);
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
- if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
- texImage->Width, texImage->Height,
- texImage->Depth, format, type,
+ if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage,
+ xoffset, yoffset, zoffset,
+ width, height, depth, format, type,
pixels, &ctx->Pack)) {
/* Flush to guarantee coherency between the render cache and other
* caches the PBO could potentially be bound to after this point.
* See the related comment in intelReadPixels() for a more detailed
* explanation.
*/
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
return;
}
perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
}
- ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, 0, 0,
- texImage->Width, texImage->Height,
+ ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
+ width, height,
format, type, pixels, &ctx->Pack);
if(ok)
return;
- _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage);
+ _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels, texImage);
DBG("%s - DONE\n", __func__);
}
@@ -515,5 +519,5 @@ intelInitTextureImageFuncs(struct dd_function_table *functions)
functions->TexImage = intelTexImage;
functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image;
- functions->GetTexImage = intel_get_tex_image;
+ functions->GetTexSubImage = intel_get_tex_sub_image;
}
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 7507f7669a0..31e511f0b7b 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -206,8 +206,8 @@ intelTexSubImage(struct gl_context * ctx,
DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
__func__, _mesa_get_format_name(texImage->TexFormat),
- _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
- _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
+ _mesa_enum_to_string(texImage->TexObject->Target),
+ _mesa_enum_to_string(format), _mesa_enum_to_string(type),
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
ok = _mesa_meta_pbo_TexSubImage(ctx, dims, texImage,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 4991c2997ef..d3fb252b5d5 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -136,6 +136,8 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit)
_mesa_get_format_name(firstImage->base.Base.TexFormat),
width, height, depth, validate_last_level + 1);
+ const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
+ MIPTREE_LAYOUT_TILING_ANY;
intelObj->mt = intel_miptree_create(brw,
intelObj->base.Target,
firstImage->base.Base.TexFormat,
@@ -145,8 +147,7 @@ intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit)
height,
depth,
0 /* num_samples */,
- INTEL_MIPTREE_TILING_ANY,
- MIPTREE_LAYOUT_ACCELERATED_UPLOAD);
+ layout_flags);
if (!intelObj->mt)
return false;
}
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 8010fb4f610..ba67bc59e19 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -283,10 +283,10 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
fs_reg zero(0.0f);
- bld.ADD(offset(dest, 2), src0, src1);
+ bld.ADD(offset(dest, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dest, src2)
->regs_written = 4;
- bld.CMP(bld.null_reg_f(), offset(dest, 2), zero, BRW_CONDITIONAL_GE);
+ bld.CMP(bld.null_reg_f(), offset(dest, bld, 2), zero, BRW_CONDITIONAL_GE);
/* = Before =
*
diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
index 3ef0cb319eb..1caa0b50ec6 100644
--- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp
@@ -367,10 +367,10 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::vec2_type);
- bld.ADD(offset(dst0, 2), src0, src1);
+ bld.ADD(offset(dst0, bld, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dst0, src2)
->regs_written = 4;
- set_saturate(true, bld.MOV(dst1, offset(dst0, 2)));
+ set_saturate(true, bld.MOV(dst1, offset(dst0, bld, 2)));
/* = Before =
*
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index 84e43fa75cd..fbd9fa8f19b 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -53,7 +53,8 @@ public:
}
protected:
- virtual dst_reg *make_reg_for_system_value(ir_variable *ir)
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type)
{
unreachable("Not reached");
}
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index de2afd39cfe..a3055fcc851 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -56,7 +56,8 @@ public:
}
protected:
- virtual dst_reg *make_reg_for_system_value(ir_variable *ir)
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type)
{
unreachable("Not reached");
}
diff --git a/src/mesa/drivers/dri/nouveau/Makefile.am b/src/mesa/drivers/dri/nouveau/Makefile.am
index 61af95a7dbc..01e34a8e3c3 100644
--- a/src/mesa/drivers/dri/nouveau/Makefile.am
+++ b/src/mesa/drivers/dri/nouveau/Makefile.am
@@ -38,8 +38,8 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
$(DEFINES) \
$(VISIBILITY_CFLAGS) \
- $(NOUVEAU_CFLAGS)
+ $(NVVIEUX_CFLAGS)
noinst_LTLIBRARIES = libnouveau_dri.la
libnouveau_dri_la_SOURCES = $(NOUVEAU_C_FILES)
-libnouveau_dri_la_LIBADD = $(NOUVEAU_LIBS)
+libnouveau_dri_la_LIBADD = $(NVVIEUX_LIBS)
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
index 0753c3a0019..755de2c4b68 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
@@ -338,7 +338,6 @@ TAG(swtnl_init)(struct gl_context *ctx)
NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
_tnl_need_projected_coords(ctx, GL_FALSE);
_tnl_allow_vertex_fog(ctx, GL_FALSE);
- _tnl_wakeup(ctx);
swtnl_alloc_vertices(ctx);
}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
index c85acec1268..a3fbad07e66 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
@@ -223,6 +223,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx,
GLboolean index_bounds_valid,
GLuint min_index, GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect);
static GLboolean
@@ -455,6 +456,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx,
GLboolean index_bounds_valid,
GLuint min_index, GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
struct nouveau_render_state *render = to_render_state(ctx);
@@ -492,6 +494,7 @@ TAG(vbo_check_render_prims)(struct gl_context *ctx,
GLboolean index_bounds_valid,
GLuint min_index, GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
struct nouveau_context *nctx = to_nouveau_context(ctx);
@@ -501,12 +504,12 @@ TAG(vbo_check_render_prims)(struct gl_context *ctx,
if (nctx->fallback == HWTNL)
TAG(vbo_render_prims)(ctx, prims, nr_prims, ib,
index_bounds_valid, min_index, max_index,
- tfb_vertcount, indirect);
+ tfb_vertcount, stream, indirect);
if (nctx->fallback == SWTNL)
_tnl_draw_prims(ctx, prims, nr_prims, ib,
index_bounds_valid, min_index, max_index,
- tfb_vertcount, indirect);
+ tfb_vertcount, stream, indirect);
}
void
diff --git a/src/mesa/drivers/dri/nouveau/nv04_render.c b/src/mesa/drivers/dri/nouveau/nv04_render.c
index 30e9f9aad96..3b7f7829044 100644
--- a/src/mesa/drivers/dri/nouveau/nv04_render.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_render.c
@@ -285,7 +285,6 @@ nv04_render_init(struct gl_context *ctx)
_tnl_init_vertices(ctx, tnl->vb.Size,
NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
_tnl_allow_pixel_fog(ctx, GL_FALSE);
- _tnl_wakeup(ctx);
}
void
diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c
index 3adc69423cd..d68a53e67f7 100644
--- a/src/mesa/drivers/dri/r200/r200_blit.c
+++ b/src/mesa/drivers/dri/r200/r200_blit.c
@@ -28,6 +28,7 @@
#include "radeon_common.h"
#include "r200_context.h"
#include "r200_blit.h"
+#include "r200_tex.h"
static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
int reg, int count)
@@ -40,22 +41,42 @@ static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
/* common formats supported as both textures and render targets */
unsigned r200_check_blit(mesa_format mesa_format, uint32_t dst_pitch)
{
- /* XXX others? BE/LE? */
- switch (mesa_format) {
- case MESA_FORMAT_B8G8R8A8_UNORM:
- case MESA_FORMAT_B8G8R8X8_UNORM:
- case MESA_FORMAT_B5G6R5_UNORM:
- case MESA_FORMAT_B4G4R4A4_UNORM:
- case MESA_FORMAT_B5G5R5A1_UNORM:
- case MESA_FORMAT_A_UNORM8:
- case MESA_FORMAT_L_UNORM8:
- case MESA_FORMAT_I_UNORM8:
- /* swizzled */
- case MESA_FORMAT_A8B8G8R8_UNORM:
- case MESA_FORMAT_R8G8B8A8_UNORM:
+ /* XXX others? */
+ if (_mesa_little_endian()) {
+ switch (mesa_format) {
+ case MESA_FORMAT_B8G8R8A8_UNORM:
+ case MESA_FORMAT_B8G8R8X8_UNORM:
+ case MESA_FORMAT_B5G6R5_UNORM:
+ case MESA_FORMAT_B4G4R4A4_UNORM:
+ case MESA_FORMAT_B5G5R5A1_UNORM:
+ case MESA_FORMAT_A_UNORM8:
+ case MESA_FORMAT_L_UNORM8:
+ case MESA_FORMAT_I_UNORM8:
+ /* swizzled - probably can't happen with the disabled Choose8888TexFormat code */
+ case MESA_FORMAT_A8B8G8R8_UNORM:
+ case MESA_FORMAT_R8G8B8A8_UNORM:
break;
- default:
+ default:
return 0;
+ }
+ }
+ else {
+ switch (mesa_format) {
+ case MESA_FORMAT_A8R8G8B8_UNORM:
+ case MESA_FORMAT_X8R8G8B8_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
+ case MESA_FORMAT_A_UNORM8:
+ case MESA_FORMAT_L_UNORM8:
+ case MESA_FORMAT_I_UNORM8:
+ /* swizzled - probably can't happen with the disabled Choose8888TexFormat code */
+ case MESA_FORMAT_R8G8B8A8_UNORM:
+ case MESA_FORMAT_A8B8G8R8_UNORM:
+ break;
+ default:
+ return 0;
+ }
}
/* Rendering to small buffer doesn't work.
@@ -112,41 +133,11 @@ static void inline emit_tx_setup(struct r200_context *r200,
assert(height <= 2048);
assert(offset % 32 == 0);
- /* XXX others? BE/LE? */
- switch (src_mesa_format) {
- case MESA_FORMAT_B8G8R8A8_UNORM:
- txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_A8B8G8R8_UNORM:
- txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_R8G8B8A8_UNORM:
- txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_B8G8R8X8_UNORM:
- txformat |= R200_TXFORMAT_ARGB8888;
- break;
- case MESA_FORMAT_B5G6R5_UNORM:
- txformat |= R200_TXFORMAT_RGB565;
- break;
- case MESA_FORMAT_B4G4R4A4_UNORM:
- txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_B5G5R5A1_UNORM:
- txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_A_UNORM8:
- case MESA_FORMAT_I_UNORM8:
- txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_L_UNORM8:
- txformat |= R200_TXFORMAT_I8;
- break;
- case MESA_FORMAT_L8A8_UNORM:
- txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP;
- break;
- default:
- break;
+ if (_mesa_little_endian()) {
+ txformat |= tx_table_le[src_mesa_format].format;
+ }
+ else {
+ txformat |= tx_table_be[src_mesa_format].format;
}
if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
@@ -155,11 +146,19 @@ static void inline emit_tx_setup(struct r200_context *r200,
offset |= R200_TXO_MICRO_TILE;
switch (dst_mesa_format) {
+ /* le */
case MESA_FORMAT_B8G8R8A8_UNORM:
case MESA_FORMAT_B8G8R8X8_UNORM:
case MESA_FORMAT_B5G6R5_UNORM:
case MESA_FORMAT_B4G4R4A4_UNORM:
case MESA_FORMAT_B5G5R5A1_UNORM:
+ /* be */
+ case MESA_FORMAT_A8R8G8B8_UNORM:
+ case MESA_FORMAT_X8R8G8B8_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
+ /* little and big */
case MESA_FORMAT_A_UNORM8:
case MESA_FORMAT_L_UNORM8:
case MESA_FORMAT_I_UNORM8:
@@ -183,6 +182,9 @@ static void inline emit_tx_setup(struct r200_context *r200,
END_BATCH();
break;
case MESA_FORMAT_A8B8G8R8_UNORM:
+ case MESA_FORMAT_R8G8B8A8_UNORM:
+ if ((dst_mesa_format == MESA_FORMAT_A8B8G8R8_UNORM && _mesa_little_endian()) ||
+ (dst_mesa_format == MESA_FORMAT_R8G8B8A8_UNORM && !_mesa_little_endian())) {
BEGIN_BATCH(10);
OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
RADEON_TEX_BLEND_0_ENABLE));
@@ -190,6 +192,8 @@ static void inline emit_tx_setup(struct r200_context *r200,
R200_TXC_ARG_B_ZERO |
R200_TXC_ARG_C_R0_COLOR |
R200_TXC_OP_MADD));
+ /* XXX I don't think this can work. This is output rotation, and alpha contains
+ * red, not alpha (we'd write gbrr). */
OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
R200_TXC_OUTPUT_ROTATE_GBA |
R200_TXC_OUTPUT_REG_R0));
@@ -201,8 +205,16 @@ static void inline emit_tx_setup(struct r200_context *r200,
(R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) |
R200_TXA_OUTPUT_REG_R0));
END_BATCH();
- break;
- case MESA_FORMAT_R8G8B8A8_UNORM:
+ }
+ else {
+ /* XXX pretty sure could do this with just 2 instead of 4 instructions.
+ * Like so:
+ * 1st: use RGA output rotation, rgb arg replicate b, a arg r, write mask rb.
+ * That's just one instruction in fact but I'm not entirely sure it works
+ * if some of those incoming r0 components are never written (due to mask)
+ * in the shader itself to r0.
+ * In any case this case (and the one above) may not be reachable with
+ * disabled Choose8888TexFormat code. */
BEGIN_BATCH(34);
OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
RADEON_TEX_BLEND_0_ENABLE |
@@ -272,7 +284,8 @@ static void inline emit_tx_setup(struct r200_context *r200,
OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 |
R200_TXA_OUTPUT_REG_R0));
END_BATCH();
- break;
+ }
+ break;
}
BEGIN_BATCH(18);
@@ -306,21 +319,27 @@ static inline void emit_cb_setup(struct r200_context *r200,
uint32_t dst_format = 0;
BATCH_LOCALS(&r200->radeon);
- /* XXX others? BE/LE? */
switch (mesa_format) {
+ /* The first of each pair is for little, the second for big endian */
case MESA_FORMAT_B8G8R8A8_UNORM:
+ case MESA_FORMAT_A8R8G8B8_UNORM:
case MESA_FORMAT_B8G8R8X8_UNORM:
+ case MESA_FORMAT_X8R8G8B8_UNORM:
+ /* These two are valid both for little and big endian (swizzled) */
case MESA_FORMAT_A8B8G8R8_UNORM:
case MESA_FORMAT_R8G8B8A8_UNORM:
dst_format = RADEON_COLOR_FORMAT_ARGB8888;
break;
case MESA_FORMAT_B5G6R5_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
dst_format = RADEON_COLOR_FORMAT_RGB565;
break;
case MESA_FORMAT_B4G4R4A4_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
dst_format = RADEON_COLOR_FORMAT_ARGB4444;
break;
case MESA_FORMAT_B5G5R5A1_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
dst_format = RADEON_COLOR_FORMAT_ARGB1555;
break;
case MESA_FORMAT_A_UNORM8:
@@ -547,5 +566,21 @@ unsigned r200_blit(struct gl_context *ctx,
radeonFlush(ctx);
+ /* We submitted those packets outside our state atom mechanism. Thus
+ * make sure the atoms are resubmitted the next time. */
+ r200->hw.cst.dirty = GL_TRUE;
+ r200->hw.ctx.dirty = GL_TRUE;
+ r200->hw.vap.dirty = GL_TRUE;
+ r200->hw.msk.dirty = GL_TRUE;
+ r200->hw.pix[0].dirty = GL_TRUE;
+ r200->hw.pix[1].dirty = GL_TRUE;
+ r200->hw.pix[2].dirty = GL_TRUE;
+ r200->hw.pix[3].dirty = GL_TRUE;
+ r200->hw.sci.dirty = GL_TRUE;
+ r200->hw.set.dirty = GL_TRUE;
+ r200->hw.tex[0].dirty = GL_TRUE;
+ r200->hw.vte.dirty = GL_TRUE;
+ r200->hw.vtx.dirty = GL_TRUE;
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index fb15082114f..2a42ab3f4c8 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -225,18 +225,9 @@ GLboolean r200CreateContext( gl_api api,
rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
"def_max_anisotropy");
- if ( sPriv->drm_version.major == 1
- && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
- if ( sPriv->drm_version.minor < 13 )
- fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- "disabling.\n", sPriv->drm_version.minor );
- else
- rmesa->using_hyperz = GL_TRUE;
- }
+ if (driQueryOptionb( &rmesa->radeon.optionCache, "hyperz"))
+ rmesa->using_hyperz = GL_TRUE;
- if ( sPriv->drm_version.minor >= 15 )
- rmesa->texmicrotile = GL_TRUE;
-
/* Init default driver functions then plug in our R200-specific functions
* (the texture functions are especially important)
*/
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
index eb498f7406b..c02a4f399ee 100644
--- a/src/mesa/drivers/dri/r200/r200_context.h
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -109,7 +109,6 @@ struct r200_texture_state {
#define CTX_RB3D_COLOROFFSET 11
#define CTX_CMD_2 12 /* why */
#define CTX_RB3D_COLORPITCH 13 /* why */
-#define CTX_STATE_SIZE_OLDDRM 14
#define CTX_CMD_3 14
#define CTX_RB3D_BLENDCOLOR 15
#define CTX_RB3D_ABLENDCNTL 16
@@ -167,9 +166,6 @@ struct r200_texture_state {
#define TEX_PP_TXSIZE 4 /*2c0c*/
#define TEX_PP_TXPITCH 5 /*2c10*/
#define TEX_PP_BORDER_COLOR 6 /*2c14*/
-#define TEX_CMD_1_OLDDRM 7
-#define TEX_PP_TXOFFSET_OLDDRM 8 /*2d00 */
-#define TEX_STATE_SIZE_OLDDRM 9
#define TEX_PP_CUBIC_FACES 7
#define TEX_PP_TXMULTI_CTL 8
#define TEX_CMD_1_NEWDRM 9
@@ -618,7 +614,6 @@ struct r200_context {
struct r200_swtcl_info swtcl;
GLboolean using_hyperz;
- GLboolean texmicrotile;
struct ati_fragment_shader *afs_loaded;
};
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
index 6fe70b5c9d0..cca176d7f9b 100644
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -1546,7 +1546,7 @@ void r200UpdateWindow( struct gl_context *ctx )
GLfloat xoffset = 0;
GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
- double scale[3], translate[3];
+ float scale[3], translate[3];
GLfloat y_scale, y_bias;
if (render_to_fbo) {
@@ -1669,7 +1669,7 @@ static void r200Enable( struct gl_context *ctx, GLenum cap, GLboolean state )
if ( R200_DEBUG & RADEON_STATE )
fprintf( stderr, "%s( %s = %s )\n", __func__,
- _mesa_lookup_enum_by_nr( cap ),
+ _mesa_enum_to_string( cap ),
state ? "GL_TRUE" : "GL_FALSE" );
switch ( cap ) {
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
index d9d1a0ed227..ad64f788b9f 100644
--- a/src/mesa/drivers/dri/r200/r200_state_init.c
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -254,7 +254,7 @@ CHECK( never, GL_FALSE, 0 )
CHECK( tex_any, ctx->Texture._MaxEnabledTexImageUnit != -1, 0 )
CHECK( tf, (ctx->Texture._MaxEnabledTexImageUnit != -1 && !ctx->ATIFragmentShader._Enabled), 0 );
CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
- CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
+CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
@@ -453,12 +453,15 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
else switch (rrb->base.Base.Format) {
case MESA_FORMAT_B5G6R5_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
break;
case MESA_FORMAT_B4G4R4A4_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
break;
case MESA_FORMAT_B5G5R5A1_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
break;
default:
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
index 083a1840d9e..feee0b2ba3f 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.c
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -68,9 +68,9 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"%s(tex %p) sw %s, tw %s, rw %s\n",
__func__, t,
- _mesa_lookup_enum_by_nr(swrap),
- _mesa_lookup_enum_by_nr(twrap),
- _mesa_lookup_enum_by_nr(rwrap));
+ _mesa_enum_to_string(swrap),
+ _mesa_enum_to_string(twrap),
+ _mesa_enum_to_string(rwrap));
t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
@@ -225,8 +225,8 @@ static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"%s(tex %p) minf %s, maxf %s, anisotropy %d.\n",
__func__, t,
- _mesa_lookup_enum_by_nr(minf),
- _mesa_lookup_enum_by_nr(magf),
+ _mesa_enum_to_string(minf),
+ _mesa_enum_to_string(magf),
anisotropy);
if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) {
@@ -302,7 +302,7 @@ static void r200TexEnv( struct gl_context *ctx, GLenum target,
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n",
- __func__, _mesa_lookup_enum_by_nr( pname ) );
+ __func__, _mesa_enum_to_string( pname ) );
/* This is incorrect: Need to maintain this data for each of
* GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
@@ -384,7 +384,7 @@ static void r200TexParameter( struct gl_context *ctx,
radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE,
"%s(%p, tex %p) pname %s\n",
__func__, ctx, texObj,
- _mesa_lookup_enum_by_nr( pname ) );
+ _mesa_enum_to_string( pname ) );
switch ( pname ) {
case GL_TEXTURE_MIN_FILTER:
@@ -415,7 +415,7 @@ static void r200DeleteTexture(struct gl_context * ctx, struct gl_texture_object
radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL,
"%s( %p (target = %s) )\n", __func__,
(void *)texObj,
- _mesa_lookup_enum_by_nr(texObj->Target));
+ _mesa_enum_to_string(texObj->Target));
if (rmesa) {
int i;
@@ -473,7 +473,7 @@ static struct gl_texture_object *r200NewTextureObject(struct gl_context * ctx,
radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
"%s(%p) target %s, new texture %p.\n",
__func__, ctx,
- _mesa_lookup_enum_by_nr(target), t);
+ _mesa_enum_to_string(target), t);
_mesa_initialize_texture_object(ctx, &t->base, name, target);
t->base.Sampler.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
index d7e91d1a0c8..a8c31b741ed 100644
--- a/src/mesa/drivers/dri/r200/r200_tex.h
+++ b/src/mesa/drivers/dri/r200/r200_tex.h
@@ -52,4 +52,68 @@ extern void r200TexUpdateParameters(struct gl_context *ctx, GLuint unit);
extern void set_re_cntl_d3d( struct gl_context *ctx, int unit, GLboolean use_d3d );
+struct tx_table {
+ GLuint format, filter;
+};
+
+/* Note the tables (have to) contain invalid entries (if they are only valid
+ * for either be/le) */
+static const struct tx_table tx_table_be[] =
+{
+ [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+ [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+ [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_I8, 0 },
+ [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YVYU422, R200_YUV_TO_RGB },
+ [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_VYUY422, R200_YUV_TO_RGB },
+ [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_DXT1, 0 },
+ [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_DXT23 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_DXT45 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+};
+
+static const struct tx_table tx_table_le[] =
+{
+ [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 },
+ [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+ [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
+ [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_I8, 0 },
+ [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YVYU422, R200_YUV_TO_RGB },
+ [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_VYUY422, R200_YUV_TO_RGB },
+ [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_DXT1, 0 },
+ [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_DXT23 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_DXT45 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+};
+
+
+
#endif /* __R200_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index ab84d1752ba..441ac730d4c 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -49,80 +49,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r200_tex.h"
#include "r200_tcl.h"
-
-#define R200_TXFORMAT_A8 R200_TXFORMAT_I8
-#define R200_TXFORMAT_L8 R200_TXFORMAT_I8
-#define R200_TXFORMAT_AL88 R200_TXFORMAT_AI88
-#define R200_TXFORMAT_YCBCR R200_TXFORMAT_YVYU422
-#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
-#define R200_TXFORMAT_RGB_DXT1 R200_TXFORMAT_DXT1
-#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1
-#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
-#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
-
#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
&& (tx_table_be[f].format != 0xffffffff) )
-struct tx_table {
- GLuint format, filter;
-};
-
-static const struct tx_table tx_table_be[] =
-{
- [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_BGR_UNORM8 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
- [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
- [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_L8, 0 },
- [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB },
- [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB },
- [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 },
- [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-};
-
-static const struct tx_table tx_table_le[] =
-{
- [ MESA_FORMAT_A8B8G8R8_UNORM ] = { R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_R8G8B8A8_UNORM ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_B8G8R8A8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A8R8G8B8_UNORM ] = { R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_BGR_UNORM8 ] = { R200_TXFORMAT_ARGB8888, 0 },
- [ MESA_FORMAT_B5G6R5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
- [ MESA_FORMAT_R5G6B5_UNORM ] = { R200_TXFORMAT_RGB565, 0 },
- [ MESA_FORMAT_B4G4R4A4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A4R4G4B4_UNORM ] = { R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_B5G5R5A1_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A1R5G5B5_UNORM ] = { R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_L8A8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A8L8_UNORM ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A_UNORM8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_L_UNORM8 ] = { R200_TXFORMAT_L8, 0 },
- [ MESA_FORMAT_I_UNORM8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB },
- [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB },
- [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 },
- [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-};
-
/* ================================================================
* Texture combine functions
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c
index 0de17514e05..0b0f06f0edb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_blit.c
+++ b/src/mesa/drivers/dri/radeon/radeon_blit.c
@@ -28,6 +28,7 @@
#include "radeon_common.h"
#include "radeon_context.h"
#include "radeon_blit.h"
+#include "radeon_tex.h"
static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
int reg, int count)
@@ -40,19 +41,36 @@ static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
/* common formats supported as both textures and render targets */
unsigned r100_check_blit(mesa_format mesa_format, uint32_t dst_pitch)
{
- /* XXX others? BE/LE? */
- switch (mesa_format) {
- case MESA_FORMAT_B8G8R8A8_UNORM:
- case MESA_FORMAT_B8G8R8X8_UNORM:
- case MESA_FORMAT_B5G6R5_UNORM:
- case MESA_FORMAT_B4G4R4A4_UNORM:
- case MESA_FORMAT_B5G5R5A1_UNORM:
- case MESA_FORMAT_A_UNORM8:
- case MESA_FORMAT_L_UNORM8:
- case MESA_FORMAT_I_UNORM8:
+ /* XXX others? */
+ if (_mesa_little_endian()) {
+ switch (mesa_format) {
+ case MESA_FORMAT_B8G8R8A8_UNORM:
+ case MESA_FORMAT_B8G8R8X8_UNORM:
+ case MESA_FORMAT_B5G6R5_UNORM:
+ case MESA_FORMAT_B4G4R4A4_UNORM:
+ case MESA_FORMAT_B5G5R5A1_UNORM:
+ case MESA_FORMAT_A_UNORM8:
+ case MESA_FORMAT_L_UNORM8:
+ case MESA_FORMAT_I_UNORM8:
break;
- default:
+ default:
+ return 0;
+ }
+ }
+ else {
+ switch (mesa_format) {
+ case MESA_FORMAT_A8R8G8B8_UNORM:
+ case MESA_FORMAT_X8R8G8B8_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
+ case MESA_FORMAT_A_UNORM8:
+ case MESA_FORMAT_L_UNORM8:
+ case MESA_FORMAT_I_UNORM8:
+ break;
+ default:
return 0;
+ }
}
/* Rendering to small buffer doesn't work.
@@ -106,40 +124,8 @@ static void inline emit_tx_setup(struct r100_context *r100,
assert(height <= 2048);
assert(offset % 32 == 0);
- /* XXX others? BE/LE? */
- switch (mesa_format) {
- case MESA_FORMAT_B8G8R8A8_UNORM:
- txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_A8B8G8R8_UNORM:
- txformat |= RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_B8G8R8X8_UNORM:
- txformat |= RADEON_TXFORMAT_ARGB8888;
- break;
- case MESA_FORMAT_B5G6R5_UNORM:
- txformat |= RADEON_TXFORMAT_RGB565;
- break;
- case MESA_FORMAT_B4G4R4A4_UNORM:
- txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_B5G5R5A1_UNORM:
- txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_A_UNORM8:
- case MESA_FORMAT_I_UNORM8:
- txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
- break;
- case MESA_FORMAT_L_UNORM8:
- txformat |= RADEON_TXFORMAT_I8;
- break;
- case MESA_FORMAT_L8A8_UNORM:
- txformat |= RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP;
- break;
- default:
- break;
- }
-
+ txformat |= tx_table[mesa_format].format;
+
if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
offset |= RADEON_TXO_MACRO_TILE;
if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
@@ -184,19 +170,25 @@ static inline void emit_cb_setup(struct r100_context *r100,
uint32_t dst_format = 0;
BATCH_LOCALS(&r100->radeon);
- /* XXX others? BE/LE? */
+ /* XXX others? */
switch (mesa_format) {
+ /* The first of each pair is for little, the second for big endian. */
case MESA_FORMAT_B8G8R8A8_UNORM:
+ case MESA_FORMAT_A8R8G8B8_UNORM:
case MESA_FORMAT_B8G8R8X8_UNORM:
+ case MESA_FORMAT_X8R8G8B8_UNORM:
dst_format = RADEON_COLOR_FORMAT_ARGB8888;
break;
case MESA_FORMAT_B5G6R5_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
dst_format = RADEON_COLOR_FORMAT_RGB565;
break;
case MESA_FORMAT_B4G4R4A4_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
dst_format = RADEON_COLOR_FORMAT_ARGB4444;
break;
case MESA_FORMAT_B5G5R5A1_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
dst_format = RADEON_COLOR_FORMAT_ARGB1555;
break;
case MESA_FORMAT_A_UNORM8:
@@ -425,5 +417,13 @@ unsigned r100_blit(struct gl_context *ctx,
radeonFlush(ctx);
+ /* We submitted those packets outside our state atom mechanism. Thus
+ * make sure they are all resubmitted the next time. */
+ r100->hw.ctx.dirty = GL_TRUE;
+ r100->hw.msk.dirty = GL_TRUE;
+ r100->hw.set.dirty = GL_TRUE;
+ r100->hw.tex[0].dirty = GL_TRUE;
+ r100->hw.txr[0].dirty = GL_TRUE;
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 2a8bd6c9edc..fde89214ed2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -164,7 +164,7 @@ uint32_t radeonGetAge(radeonContextPtr radeon)
gp.param = RADEON_PARAM_LAST_CLEAR;
gp.value = (int *)&age;
- ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+ ret = drmCommandWriteRead(radeon->radeonScreen->driScreen->fd, DRM_RADEON_GETPARAM,
&gp, sizeof(gp));
if (ret) {
fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __func__,
@@ -343,7 +343,7 @@ void radeonDrawBuffer( struct gl_context *ctx, GLenum mode )
{
if (RADEON_DEBUG & RADEON_DRI)
fprintf(stderr, "%s %s\n", __func__,
- _mesa_lookup_enum_by_nr( mode ));
+ _mesa_enum_to_string( mode ));
if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
radeonContextPtr radeon = RADEON_CONTEXT(ctx);
@@ -358,8 +358,8 @@ void radeonDrawBuffer( struct gl_context *ctx, GLenum mode )
* that the front-buffer has actually been allocated.
*/
if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
- radeon_update_renderbuffers(radeon->dri.context,
- radeon->dri.context->driDrawablePriv, GL_FALSE);
+ radeon_update_renderbuffers(radeon->driContext,
+ radeon->driContext->driDrawablePriv, GL_FALSE);
}
}
@@ -375,8 +375,8 @@ void radeonReadBuffer( struct gl_context *ctx, GLenum mode )
|| (mode == GL_FRONT);
if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
- radeon_update_renderbuffers(rmesa->dri.context,
- rmesa->dri.context->driReadablePriv, GL_FALSE);
+ radeon_update_renderbuffers(rmesa->driContext,
+ rmesa->driContext->driReadablePriv, GL_FALSE);
}
}
/* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
@@ -399,7 +399,7 @@ void radeon_window_moved(radeonContextPtr radeon)
void radeon_viewport(struct gl_context *ctx)
{
radeonContextPtr radeon = RADEON_CONTEXT(ctx);
- __DRIcontext *driContext = radeon->dri.context;
+ __DRIcontext *driContext = radeon->driContext;
void (*old_viewport)(struct gl_context *ctx);
if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
@@ -693,6 +693,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa)
{
GLuint size;
struct drm_radeon_gem_info mminfo = { 0 };
+ int fd = rmesa->radeonScreen->driScreen->fd;
/* Initialize command buffer */
size = 256 * driQueryOptioni(&rmesa->optionCache,
@@ -711,8 +712,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa)
"Allocating %d bytes command buffer (max state is %d bytes)\n",
size * 4, rmesa->hw.max_state_size * 4);
- rmesa->cmdbuf.csm =
- radeon_cs_manager_gem_ctor(rmesa->radeonScreen->driScreen->fd);
+ rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
if (rmesa->cmdbuf.csm == NULL) {
/* FIXME: fatal error */
return;
@@ -725,7 +725,7 @@ void rcommonInitCmdBuf(radeonContextPtr rmesa)
(void (*)(void *))rmesa->glCtx.Driver.Flush, &rmesa->glCtx);
- if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO,
+ if (!drmCommandWriteRead(fd, DRM_RADEON_GEM_INFO,
&mminfo, sizeof(mminfo))) {
radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM,
mminfo.vram_visible);
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 9699dcbfcdc..4660d98c9a2 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -162,10 +162,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
_mesa_meta_init(ctx);
/* DRI fields */
- radeon->dri.context = driContextPriv;
- radeon->dri.screen = sPriv;
- radeon->dri.fd = sPriv->fd;
- radeon->dri.drmMinor = sPriv->drm_version.minor;
+ radeon->driContext = driContextPriv;
/* Setup IRQs */
fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
@@ -194,6 +191,29 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
radeon_init_dma(radeon);
+ /* _mesa_initialize_context calls _mesa_init_queryobj which
+ * initializes all of the counter sizes to 64. The counters on r100
+ * and r200 are only 32-bits for occlusion queries. Those are the
+ * only counters, so set the other sizes to zero.
+ */
+ radeon->glCtx.Const.QueryCounterBits.SamplesPassed = 32;
+
+ radeon->glCtx.Const.QueryCounterBits.TimeElapsed = 0;
+ radeon->glCtx.Const.QueryCounterBits.Timestamp = 0;
+ radeon->glCtx.Const.QueryCounterBits.PrimitivesGenerated = 0;
+ radeon->glCtx.Const.QueryCounterBits.PrimitivesWritten = 0;
+ radeon->glCtx.Const.QueryCounterBits.VerticesSubmitted = 0;
+ radeon->glCtx.Const.QueryCounterBits.PrimitivesSubmitted = 0;
+ radeon->glCtx.Const.QueryCounterBits.VsInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.TessPatches = 0;
+ radeon->glCtx.Const.QueryCounterBits.TessInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.GsInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.GsPrimitives = 0;
+ radeon->glCtx.Const.QueryCounterBits.FsInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.ComputeInvocations = 0;
+ radeon->glCtx.Const.QueryCounterBits.ClInPrimitives = 0;
+ radeon->glCtx.Const.QueryCounterBits.ClOutPrimitives = 0;
+
return GL_TRUE;
}
@@ -302,7 +322,7 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
*/
void radeon_prepare_render(radeonContextPtr radeon)
{
- __DRIcontext *driContext = radeon->dri.context;
+ __DRIcontext *driContext = radeon->driContext;
__DRIdrawable *drawable;
__DRIscreen *screen;
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index dc72592b90c..d142a871b40 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -342,17 +342,6 @@ struct radeon_store {
int elts_start;
};
-struct radeon_dri_mirror {
- __DRIcontext *context; /* DRI context */
- __DRIscreen *screen; /* DRI screen */
-
- drm_context_t hwContext;
- drm_hw_lock_t *hwLock;
- int hwLockCount;
- int fd;
- int drmMinor;
-};
-
typedef void (*radeon_tri_func) (radeonContextPtr,
radeonVertex *,
radeonVertex *, radeonVertex *);
@@ -385,6 +374,7 @@ struct radeon_cmdbuf {
struct radeon_context {
struct gl_context glCtx; /**< base class, must be first */
+ __DRIcontext *driContext; /* DRI context */
radeonScreenPtr radeonScreen; /* Screen private DRI data */
/* Texture object bookkeeping
@@ -407,9 +397,6 @@ struct radeon_context {
/* Drawable information */
unsigned int lastStamp;
- /* Mirrors of some DRI state */
- struct radeon_dri_mirror dri;
-
/* Busy waiting */
GLuint do_usleeps;
GLuint do_irqs;
@@ -502,12 +489,12 @@ static inline radeonContextPtr RADEON_CONTEXT(struct gl_context *ctx)
static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon)
{
- return radeon->dri.context->driDrawablePriv;
+ return radeon->driContext->driDrawablePriv;
}
static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon)
{
- return radeon->dri.context->driReadablePriv;
+ return radeon->driContext->driReadablePriv;
}
extern const char const *radeonVendorString;
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index d4d19354b6d..a9e2ab563d3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -191,16 +191,8 @@ r100CreateContext( gl_api api,
rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
"def_max_anisotropy");
- if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
- if ( sPriv->drm_version.minor < 13 )
- fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
- "disabling.\n", sPriv->drm_version.minor );
- else
- rmesa->using_hyperz = GL_TRUE;
- }
-
- if ( sPriv->drm_version.minor >= 15 )
- rmesa->texmicrotile = GL_TRUE;
+ if (driQueryOptionb(&rmesa->radeon.optionCache, "hyperz"))
+ rmesa->using_hyperz = GL_TRUE;
/* Init default driver functions then plug in our Radeon-specific functions
* (the texture functions are especially important)
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index 40325327813..badabd9508c 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -426,7 +426,6 @@ struct r100_context {
struct r100_swtcl_info swtcl;
GLboolean using_hyperz;
- GLboolean texmicrotile;
/* Performance counters
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index ef62d097bae..5eece518c95 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -169,6 +169,7 @@ radeon_map_renderbuffer_s8z24(struct gl_context *ctx,
rrb->map_buffer = malloc(w * h * 4);
ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
assert(!ret);
+ (void) ret;
untiled_s8z24_map = rrb->map_buffer;
tiled_s8z24_map = rrb->bo->ptr;
@@ -207,6 +208,7 @@ radeon_map_renderbuffer_z16(struct gl_context *ctx,
rrb->map_buffer = malloc(w * h * 2);
ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
assert(!ret);
+ (void) ret;
untiled_z16_map = rrb->map_buffer;
tiled_z16_map = rrb->bo->ptr;
@@ -324,6 +326,7 @@ radeon_map_renderbuffer(struct gl_context *ctx,
ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
assert(!ret);
+ (void) ret;
map = rrb->bo->ptr;
stride = rrb->map_pitch;
@@ -416,7 +419,6 @@ radeon_unmap_renderbuffer(struct gl_context *ctx,
{
struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
- GLboolean ok;
if ((rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_DEPTH_ALWAYS_TILED) && !rrb->has_surface) {
if (rb->Format == MESA_FORMAT_Z24_UNORM_S8_UINT || rb->Format == MESA_FORMAT_Z24_UNORM_X8_UINT) {
@@ -438,6 +440,7 @@ radeon_unmap_renderbuffer(struct gl_context *ctx,
radeon_bo_unmap(rrb->map_bo);
if (rrb->map_mode & GL_MAP_WRITE_BIT) {
+ GLboolean ok;
ok = rmesa->vtbl.blit(ctx, rrb->map_bo, 0,
rb->Format, rrb->map_pitch / rrb->cpp,
rrb->map_w, rrb->map_h,
@@ -449,6 +452,7 @@ radeon_unmap_renderbuffer(struct gl_context *ctx,
rrb->map_w, rrb->map_h,
GL_FALSE);
assert(ok);
+ (void) ok;
}
radeon_bo_unref(rrb->map_bo);
@@ -700,7 +704,7 @@ radeon_bind_framebuffer(struct gl_context * ctx, GLenum target,
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"%s(%p, fb %p, target %s) \n",
__func__, ctx, fb,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
radeon_draw_buffer(ctx, fb);
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index 28591cad895..c71766d0a3e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -276,7 +276,7 @@ static void calculate_min_max_lod(struct gl_sampler_object *samp, struct gl_text
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"%s(%p) target %s, min %d, max %d.\n",
__func__, tObj,
- _mesa_lookup_enum_by_nr(tObj->Target),
+ _mesa_enum_to_string(tObj->Target),
minLod, maxLod);
/* save these values */
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
index 6998444fb66..e115b749da5 100644
--- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -212,7 +212,7 @@ radeonReadPixels(struct gl_context * ctx,
*/
radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
"Falling back to sw for ReadPixels (format %s, type %s)\n",
- _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(format), _mesa_enum_to_string(type));
if (ctx->NewState)
_mesa_update_state(ctx);
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 45d9b2b8c0b..98b4741b456 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -135,36 +135,26 @@ DRI_CONF_END
static int
radeonGetParam(__DRIscreen *sPriv, int param, void *value)
{
- int ret;
- drm_radeon_getparam_t gp = { 0 };
struct drm_radeon_info info = { 0 };
- if (sPriv->drm_version.major >= 2) {
- info.value = (uint64_t)(uintptr_t)value;
- switch (param) {
- case RADEON_PARAM_DEVICE_ID:
- info.request = RADEON_INFO_DEVICE_ID;
- break;
- case RADEON_PARAM_NUM_GB_PIPES:
- info.request = RADEON_INFO_NUM_GB_PIPES;
- break;
- case RADEON_PARAM_NUM_Z_PIPES:
- info.request = RADEON_INFO_NUM_Z_PIPES;
- break;
- case RADEON_INFO_TILE_CONFIG:
- info.request = RADEON_INFO_TILE_CONFIG;
- break;
- default:
- return -EINVAL;
- }
- ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
- } else {
- gp.param = param;
- gp.value = value;
-
- ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+ info.value = (uint64_t)(uintptr_t)value;
+ switch (param) {
+ case RADEON_PARAM_DEVICE_ID:
+ info.request = RADEON_INFO_DEVICE_ID;
+ break;
+ case RADEON_PARAM_NUM_GB_PIPES:
+ info.request = RADEON_INFO_NUM_GB_PIPES;
+ break;
+ case RADEON_PARAM_NUM_Z_PIPES:
+ info.request = RADEON_INFO_NUM_Z_PIPES;
+ break;
+ case RADEON_INFO_TILE_CONFIG:
+ info.request = RADEON_INFO_TILE_CONFIG;
+ break;
+ default:
+ return -EINVAL;
}
- return ret;
+ return drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
}
#if defined(RADEON_R100)
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index cba3d9c9689..74c1fc6c902 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -1354,7 +1354,7 @@ void radeonUpdateWindow( struct gl_context *ctx )
GLfloat xoffset = 0.0;
GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
- double scale[3], translate[3];
+ float scale[3], translate[3];
GLfloat y_scale, y_bias;
if (render_to_fbo) {
@@ -1452,7 +1452,7 @@ static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state )
if ( RADEON_DEBUG & RADEON_STATE )
fprintf( stderr, "%s( %s = %s )\n", __func__,
- _mesa_lookup_enum_by_nr( cap ),
+ _mesa_enum_to_string( cap ),
state ? "GL_TRUE" : "GL_FALSE" );
switch ( cap ) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
index c800edfc7be..5e2f41fdb4a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state_init.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -336,12 +336,15 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
else switch (rrb->base.Base.Format) {
case MESA_FORMAT_B5G6R5_UNORM:
+ case MESA_FORMAT_R5G6B5_UNORM:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
break;
case MESA_FORMAT_B4G4R4A4_UNORM:
+ case MESA_FORMAT_A4R4G4B4_UNORM:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
break;
case MESA_FORMAT_B5G5R5A1_UNORM:
+ case MESA_FORMAT_A1R5G5B5_UNORM:
atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
break;
default:
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index 8a1fbab39f8..2fbd353297b 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -442,7 +442,7 @@ static GLboolean radeon_run_render( struct gl_context *ctx,
radeon_print(RADEON_SWRENDER, RADEON_NORMAL,
"radeon_render.c: prim %s %d..%d\n",
- _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK),
+ _mesa_enum_to_string(prim & PRIM_MODE_MASK),
start, start+length);
if (length)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
index 353fdb00ec8..0955a135de8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -263,7 +263,7 @@ static void radeonTexEnv( struct gl_context *ctx, GLenum target,
if ( RADEON_DEBUG & RADEON_STATE ) {
fprintf( stderr, "%s( %s )\n",
- __func__, _mesa_lookup_enum_by_nr( pname ) );
+ __func__, _mesa_enum_to_string( pname ) );
}
switch ( pname ) {
@@ -335,7 +335,7 @@ static void radeonTexParameter( struct gl_context *ctx,
radeonTexObj* t = radeon_tex_obj(texObj);
radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __func__,
- _mesa_lookup_enum_by_nr( pname ) );
+ _mesa_enum_to_string( pname ) );
switch ( pname ) {
case GL_TEXTURE_BASE_LEVEL:
@@ -359,7 +359,7 @@ static void radeonDeleteTexture( struct gl_context *ctx,
radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
"%s( %p (target = %s) )\n", __func__, (void *)texObj,
- _mesa_lookup_enum_by_nr( texObj->Target ) );
+ _mesa_enum_to_string( texObj->Target ) );
if ( rmesa ) {
radeon_firevertices(&rmesa->radeon);
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
index fa57c08987d..f8ec432755a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex.h
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -51,4 +51,39 @@ extern void radeonTexUpdateParameters(struct gl_context *ctx, GLuint unit);
extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+struct tx_table {
+ GLuint format, filter;
+};
+
+/* XXX verify this table against MESA_FORMAT_x values */
+static const struct tx_table tx_table[] =
+{
+ [ MESA_FORMAT_NONE ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_A8B8G8R8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
+ [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
+ [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
+ [ MESA_FORMAT_B4G4R4A4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A4R4G4B4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_B5G5R5A1_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A1R5G5B5_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_L8A8_UNORM ] = { RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A8L8_UNORM ] = { RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_A_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_L_UNORM8 ] = { RADEON_TXFORMAT_I8, 0 },
+ [ MESA_FORMAT_I_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_YCBCR ] = { RADEON_TXFORMAT_YVYU422, RADEON_YUV_TO_RGB },
+ [ MESA_FORMAT_YCBCR_REV ] = { RADEON_TXFORMAT_VYUY422, RADEON_YUV_TO_RGB },
+ [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
+ [ MESA_FORMAT_RGB_DXT1 ] = { RADEON_TXFORMAT_DXT1, 0 },
+ [ MESA_FORMAT_RGBA_DXT1 ] = { RADEON_TXFORMAT_DXT1 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_RGBA_DXT3 ] = { RADEON_TXFORMAT_DXT23 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+ [ MESA_FORMAT_RGBA_DXT5 ] = { RADEON_TXFORMAT_DXT45 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
+};
+
+
#endif /* __RADEON_TEX_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 45667efb65f..ec835f248eb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -53,53 +53,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_tcl.h"
-#define RADEON_TXFORMAT_A8 RADEON_TXFORMAT_I8
-#define RADEON_TXFORMAT_L8 RADEON_TXFORMAT_I8
-#define RADEON_TXFORMAT_AL88 RADEON_TXFORMAT_AI88
-#define RADEON_TXFORMAT_YCBCR RADEON_TXFORMAT_YVYU422
-#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
-#define RADEON_TXFORMAT_RGB_DXT1 RADEON_TXFORMAT_DXT1
-#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
-#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
-#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
-
#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
&& (tx_table[f].format != 0xffffffff) )
-struct tx_table {
- GLuint format, filter;
-};
-
-/* XXX verify this table against MESA_FORMAT_x values */
-static const struct tx_table tx_table[] =
-{
- [ MESA_FORMAT_NONE ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_A8B8G8R8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_R8G8B8A8_UNORM ] = { RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_B8G8R8A8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A8R8G8B8_UNORM ] = { RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_BGR_UNORM8 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
- [ MESA_FORMAT_B5G6R5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
- [ MESA_FORMAT_R5G6B5_UNORM ] = { RADEON_TXFORMAT_RGB565, 0 },
- [ MESA_FORMAT_B4G4R4A4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A4R4G4B4_UNORM ] = { RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_B5G5R5A1_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A1R5G5B5_UNORM ] = { RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_L8A8_UNORM ] = { RADEON_TXFORMAT_AL88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A8L8_UNORM ] = { RADEON_TXFORMAT_AL88 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_A_UNORM8 ] = { RADEON_TXFORMAT_A8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_L_UNORM8 ] = { RADEON_TXFORMAT_L8, 0 },
- [ MESA_FORMAT_I_UNORM8 ] = { RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_YCBCR ] = { RADEON_TXFORMAT_YCBCR, RADEON_YUV_TO_RGB },
- [ MESA_FORMAT_YCBCR_REV ] = { RADEON_TXFORMAT_YCBCR_REV, RADEON_YUV_TO_RGB },
- [ MESA_FORMAT_RGB_FXT1 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_RGBA_FXT1 ] = { 0xffffffff, 0 },
- [ MESA_FORMAT_RGB_DXT1 ] = { RADEON_TXFORMAT_RGB_DXT1, 0 },
- [ MESA_FORMAT_RGBA_DXT1 ] = { RADEON_TXFORMAT_RGBA_DXT1 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_RGBA_DXT3 ] = { RADEON_TXFORMAT_RGBA_DXT3 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
- [ MESA_FORMAT_RGBA_DXT5 ] = { RADEON_TXFORMAT_RGBA_DXT5 | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 },
-};
-
/* ================================================================
* Texture combine functions
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index edfd48b283b..4794ddae069 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -224,7 +224,19 @@ static mesa_format radeonChoose8888TexFormat(radeonContextPtr rmesa,
const GLuint ui = 1;
const GLubyte littleEndian = *((const GLubyte *)&ui);
- if (fbo)
+
+ /* Unfortunately, regardless the fbo flag, we might still be asked to
+ * attach a texture to a fbo later, which then won't succeed if we chose
+ * one which isn't renderable. And unlike more exotic formats, apps aren't
+ * really prepared for the incomplete framebuffer this results in (they'd
+ * have to retry with same internalFormat even, just different
+ * srcFormat/srcType, which can't really be expected anyway).
+ * Ideally, we'd defer format selection until later (if the texture is
+ * used as a rt it's likely there's never data uploaded to it before attached
+ * to a fbo), but this isn't really possible, so for now just always use
+ * a renderable format.
+ */
+ if (1 || fbo)
return _radeon_texformat_argb8888;
if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
@@ -267,8 +279,8 @@ mesa_format radeonChooseTextureFormat(struct gl_context * ctx,
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"%s InternalFormat=%s(%d) type=%s format=%s\n",
__func__,
- _mesa_lookup_enum_by_nr(internalFormat), internalFormat,
- _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+ _mesa_enum_to_string(internalFormat), internalFormat,
+ _mesa_enum_to_string(type), _mesa_enum_to_string(format));
radeon_print(RADEON_TEXTURE, RADEON_TRACE,
"%s do32bpt=%d force16bpt=%d\n",
__func__, do32bpt, force16bpt);
@@ -531,7 +543,7 @@ void radeon_image_target_texture_2d(struct gl_context *ctx, GLenum target,
__DRIscreen *screen;
__DRIimage *image;
- screen = radeon->dri.screen;
+ screen = radeon->radeonScreen->driScreen;
image = screen->dri2.image->lookupEGLImage(screen, image_handle,
screen->loaderPrivate);
if (image == NULL)
diff --git a/src/mesa/drivers/dri/swrast/Makefile.am b/src/mesa/drivers/dri/swrast/Makefile.am
index bfc3c10e334..9d21d9ea4dc 100644
--- a/src/mesa/drivers/dri/swrast/Makefile.am
+++ b/src/mesa/drivers/dri/swrast/Makefile.am
@@ -24,7 +24,6 @@
include Makefile.sources
AM_CFLAGS = \
- -D__NOT_HAVE_DRM_H \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/ \
-I$(top_srcdir)/src/mapi \
@@ -33,6 +32,7 @@ AM_CFLAGS = \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_builddir)/src/mesa/drivers/dri/common \
+ $(LIBDRM_CFLAGS) \
$(DEFINES) \
$(VISIBILITY_CFLAGS)
diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c
index 022523eb00b..5c7dcac3841 100644
--- a/src/mesa/drivers/osmesa/osmesa.c
+++ b/src/mesa/drivers/osmesa/osmesa.c
@@ -1124,7 +1124,7 @@ static struct name_function functions[] = {
{ "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext },
{ "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent },
{ "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext },
- { "OSMesaPixelsStore", (OSMESAproc) OSMesaPixelStore },
+ { "OSMesaPixelStore", (OSMESAproc) OSMesaPixelStore },
{ "OSMesaGetIntegerv", (OSMESAproc) OSMesaGetIntegerv },
{ "OSMesaGetDepthBuffer", (OSMESAproc) OSMesaGetDepthBuffer },
{ "OSMesaGetColorBuffer", (OSMESAproc) OSMesaGetColorBuffer },
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index 9c2e29e6472..53c8fb893b5 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -69,6 +69,25 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
return false;
}
+ /* The spec argues that this is allowed because a tess ctrl shader
+ * without a tess eval shader can be used with transform feedback.
+ * However, glBeginTransformFeedback doesn't allow GL_PATCHES and
+ * therefore doesn't allow tessellation.
+ *
+ * Further investigation showed that this is indeed a spec bug and
+ * a tess ctrl shader without a tess eval shader shouldn't have been
+ * allowed, because there is no API in GL 4.0 that can make use this
+ * to produce something useful.
+ *
+ * Also, all vendors except one don't support a tess ctrl shader without
+ * a tess eval shader anyway.
+ */
+ if (ctx->TessCtrlProgram._Current && !ctx->TessEvalProgram._Current) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(tess eval shader is missing)", function);
+ return false;
+ }
+
/* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec
* says:
*
@@ -127,6 +146,9 @@ _mesa_is_valid_prim_mode(struct gl_context *ctx, GLenum mode)
if (mode <= GL_TRIANGLE_STRIP_ADJACENCY)
return _mesa_has_geometry_shaders(ctx);
+ if (mode == GL_PATCHES)
+ return _mesa_has_tessellation(ctx);
+
return false;
}
@@ -136,6 +158,7 @@ _mesa_is_valid_prim_mode(struct gl_context *ctx, GLenum mode)
* etc? Also, do additional checking related to transformation feedback.
* Note: this function cannot be called during glNewList(GL_COMPILE) because
* this code depends on current transform feedback state.
+ * Also, do additional checking related to tessellation shaders.
*/
GLboolean
_mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name)
@@ -170,11 +193,29 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name)
* TRIANGLES_ADJACENCY_ARB and <mode> is not
* TRIANGLES_ADJACENCY_ARB or TRIANGLE_STRIP_ADJACENCY_ARB.
*
+ * The GL spec doesn't mention any interaction with tessellation, which
+ * is clearly a spec bug. The same rule should apply, but instead of
+ * the draw primitive mode, the tessellation evaluation shader primitive
+ * mode should be used for the checking.
*/
if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) {
const GLenum geom_mode =
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]->Geom.InputType;
- switch (mode) {
+ struct gl_shader_program *tes =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+ GLenum mode_before_gs = mode;
+
+ if (tes) {
+ if (tes->TessEval.PointMode)
+ mode_before_gs = GL_POINTS;
+ else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
+ mode_before_gs = GL_LINES;
+ else
+ /* the GL_QUADS mode generates triangles too */
+ mode_before_gs = GL_TRIANGLES;
+ }
+
+ switch (mode_before_gs) {
case GL_POINTS:
valid_enum = (geom_mode == GL_POINTS);
break;
@@ -209,12 +250,42 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name)
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(mode=%s vs geometry shader input %s)",
name,
- _mesa_lookup_prim_by_nr(mode),
+ _mesa_lookup_prim_by_nr(mode_before_gs),
_mesa_lookup_prim_by_nr(geom_mode));
return GL_FALSE;
}
}
+ /* From the OpenGL 4.0 (Core Profile) spec (section 2.12):
+ *
+ * "Tessellation operates only on patch primitives. If tessellation is
+ * active, any command that transfers vertices to the GL will
+ * generate an INVALID_OPERATION error if the primitive mode is not
+ * PATCHES.
+ * Patch primitives are not supported by pipeline stages below the
+ * tessellation evaluation shader. If there is no active program
+ * object or the active program object does not contain a tessellation
+ * evaluation shader, the error INVALID_OPERATION is generated by any
+ * command that transfers vertices to the GL if the primitive mode is
+ * PATCHES."
+ *
+ */
+ if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] ||
+ ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]) {
+ if (mode != GL_PATCHES) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "only GL_PATCHES valid with tessellation");
+ return GL_FALSE;
+ }
+ }
+ else {
+ if (mode == GL_PATCHES) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "GL_PATCHES only valid with tessellation");
+ return GL_FALSE;
+ }
+ }
+
/* From the GL_EXT_transform_feedback spec:
*
* "The error INVALID_OPERATION is generated if Begin, or any command
@@ -247,6 +318,17 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name)
pass = GL_FALSE;
}
}
+ else if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]) {
+ struct gl_shader_program *tes =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (tes->TessEval.PointMode)
+ pass = ctx->TransformFeedback.Mode == GL_POINTS;
+ else if (tes->TessEval.PrimitiveMode == GL_ISOLINES)
+ pass = ctx->TransformFeedback.Mode == GL_LINES;
+ else
+ pass = ctx->TransformFeedback.Mode == GL_TRIANGLES;
+ }
else {
switch (mode) {
case GL_POINTS:
@@ -291,7 +373,7 @@ valid_elements_type(struct gl_context *ctx, GLenum type, const char *name)
default:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(type = %s)", name,
- _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(type));
return false;
}
}
diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c
index 9fc35520a38..935ba05b7cc 100644
--- a/src/mesa/main/atifragshader.c
+++ b/src/mesa/main/atifragshader.c
@@ -132,21 +132,21 @@ static void debug_op(GLint optype, GLuint arg_count, GLenum op, GLuint dst,
op_name = atifs_ops[(arg_count-1)+(optype?3:0)];
- fprintf(stderr, "%s(%s, %s", op_name, _mesa_lookup_enum_by_nr(op),
- _mesa_lookup_enum_by_nr(dst));
+ fprintf(stderr, "%s(%s, %s", op_name, _mesa_enum_to_string(op),
+ _mesa_enum_to_string(dst));
if (!optype)
fprintf(stderr, ", %d", dstMask);
fprintf(stderr, ", %s", create_dst_mod_str(dstMod));
- fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg1),
- _mesa_lookup_enum_by_nr(arg1Rep), arg1Mod);
+ fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg1),
+ _mesa_enum_to_string(arg1Rep), arg1Mod);
if (arg_count>1)
- fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg2),
- _mesa_lookup_enum_by_nr(arg2Rep), arg2Mod);
+ fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg2),
+ _mesa_enum_to_string(arg2Rep), arg2Mod);
if (arg_count>2)
- fprintf(stderr, ", %s, %s, %d", _mesa_lookup_enum_by_nr(arg3),
- _mesa_lookup_enum_by_nr(arg3Rep), arg3Mod);
+ fprintf(stderr, ", %s, %s, %d", _mesa_enum_to_string(arg3),
+ _mesa_enum_to_string(arg3Rep), arg3Mod);
fprintf(stderr,")\n");
@@ -383,7 +383,7 @@ _mesa_EndFragmentShaderATI(void)
for (j = 0; j < MAX_NUM_PASSES_ATI; j++) {
for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
GLuint op = curProg->SetupInst[j][i].Opcode;
- const char *op_enum = op > 5 ? _mesa_lookup_enum_by_nr(op) : "0";
+ const char *op_enum = op > 5 ? _mesa_enum_to_string(op) : "0";
GLuint src = curProg->SetupInst[j][i].src;
GLuint swizzle = curProg->SetupInst[j][i].swizzle;
fprintf(stderr, "%2d %04X %s %d %04X\n", i, op, op_enum, src,
@@ -392,8 +392,8 @@ _mesa_EndFragmentShaderATI(void)
for (i = 0; i < curProg->numArithInstr[j]; i++) {
GLuint op0 = curProg->Instructions[j][i].Opcode[0];
GLuint op1 = curProg->Instructions[j][i].Opcode[1];
- const char *op0_enum = op0 > 5 ? _mesa_lookup_enum_by_nr(op0) : "0";
- const char *op1_enum = op1 > 5 ? _mesa_lookup_enum_by_nr(op1) : "0";
+ const char *op0_enum = op0 > 5 ? _mesa_enum_to_string(op0) : "0";
+ const char *op1_enum = op1 > 5 ? _mesa_enum_to_string(op1) : "0";
GLuint count0 = curProg->Instructions[j][i].ArgCount[0];
GLuint count1 = curProg->Instructions[j][i].ArgCount[1];
fprintf(stderr, "%2d %04X %s %d %04X %s %d\n", i, op0, op0_enum, count0,
@@ -477,8 +477,8 @@ _mesa_PassTexCoordATI(GLuint dst, GLuint coord, GLenum swizzle)
#if MESA_DEBUG_ATI_FS
_mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__,
- _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(coord),
- _mesa_lookup_enum_by_nr(swizzle));
+ _mesa_enum_to_string(dst), _mesa_enum_to_string(coord),
+ _mesa_enum_to_string(swizzle));
#endif
}
@@ -550,8 +550,8 @@ _mesa_SampleMapATI(GLuint dst, GLuint interp, GLenum swizzle)
#if MESA_DEBUG_ATI_FS
_mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__,
- _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(interp),
- _mesa_lookup_enum_by_nr(swizzle));
+ _mesa_enum_to_string(dst), _mesa_enum_to_string(interp),
+ _mesa_enum_to_string(swizzle));
#endif
}
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 53626e38be9..08f13178f84 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -937,7 +937,7 @@ _mesa_PopAttrib(void)
if (MESA_VERBOSE & VERBOSE_API) {
_mesa_debug(ctx, "glPopAttrib %s\n",
- _mesa_lookup_enum_by_nr(attr->kind));
+ _mesa_enum_to_string(attr->kind));
}
switch (attr->kind) {
diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index d869fa2aa09..4fc32962425 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -128,28 +128,28 @@ validate_blend_factors(struct gl_context *ctx, const char *func,
if (!legal_src_factor(ctx, sfactorRGB)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(sfactorRGB = %s)", func,
- _mesa_lookup_enum_by_nr(sfactorRGB));
+ _mesa_enum_to_string(sfactorRGB));
return GL_FALSE;
}
if (!legal_dst_factor(ctx, dfactorRGB)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(dfactorRGB = %s)", func,
- _mesa_lookup_enum_by_nr(dfactorRGB));
+ _mesa_enum_to_string(dfactorRGB));
return GL_FALSE;
}
if (sfactorA != sfactorRGB && !legal_src_factor(ctx, sfactorA)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(sfactorA = %s)", func,
- _mesa_lookup_enum_by_nr(sfactorA));
+ _mesa_enum_to_string(sfactorA));
return GL_FALSE;
}
if (dfactorA != dfactorRGB && !legal_dst_factor(ctx, dfactorA)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(dfactorA = %s)", func,
- _mesa_lookup_enum_by_nr(dfactorA));
+ _mesa_enum_to_string(dfactorA));
return GL_FALSE;
}
@@ -208,10 +208,10 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB,
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendFuncSeparate %s %s %s %s\n",
- _mesa_lookup_enum_by_nr(sfactorRGB),
- _mesa_lookup_enum_by_nr(dfactorRGB),
- _mesa_lookup_enum_by_nr(sfactorA),
- _mesa_lookup_enum_by_nr(dfactorA));
+ _mesa_enum_to_string(sfactorRGB),
+ _mesa_enum_to_string(dfactorRGB),
+ _mesa_enum_to_string(sfactorA),
+ _mesa_enum_to_string(dfactorA));
if (!validate_blend_factors(ctx, "glBlendFuncSeparate",
sfactorRGB, dfactorRGB,
@@ -342,7 +342,7 @@ _mesa_BlendEquation( GLenum mode )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquation(%s)\n",
- _mesa_lookup_enum_by_nr(mode));
+ _mesa_enum_to_string(mode));
if (!legal_blend_equation(ctx, mode)) {
_mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation");
@@ -385,7 +385,7 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquationi(%u, %s)\n",
- buf, _mesa_lookup_enum_by_nr(mode));
+ buf, _mesa_enum_to_string(mode));
if (buf >= ctx->Const.MaxDrawBuffers) {
_mesa_error(ctx, GL_INVALID_VALUE, "glBlendFuncSeparatei(buffer=%u)",
@@ -421,8 +421,8 @@ _mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquationSeparateEXT(%s %s)\n",
- _mesa_lookup_enum_by_nr(modeRGB),
- _mesa_lookup_enum_by_nr(modeA));
+ _mesa_enum_to_string(modeRGB),
+ _mesa_enum_to_string(modeA));
if ( (modeRGB != modeA) && !ctx->Extensions.EXT_blend_equation_separate ) {
_mesa_error(ctx, GL_INVALID_OPERATION,
@@ -476,8 +476,8 @@ _mesa_BlendEquationSeparateiARB(GLuint buf, GLenum modeRGB, GLenum modeA)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBlendEquationSeparatei(%u, %s %s)\n", buf,
- _mesa_lookup_enum_by_nr(modeRGB),
- _mesa_lookup_enum_by_nr(modeA));
+ _mesa_enum_to_string(modeRGB),
+ _mesa_enum_to_string(modeA));
if (buf >= ctx->Const.MaxDrawBuffers) {
_mesa_error(ctx, GL_INVALID_VALUE, "glBlendEquationSeparatei(buffer=%u)",
@@ -567,7 +567,10 @@ _mesa_AlphaFunc( GLenum func, GLclampf ref )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glAlphaFunc(%s, %f)\n",
- _mesa_lookup_enum_by_nr(func), ref);
+ _mesa_enum_to_string(func), ref);
+
+ if (ctx->Color.AlphaFunc == func && ctx->Color.AlphaRefUnclamped == ref)
+ return; /* no change */
switch (func) {
case GL_NEVER:
@@ -578,9 +581,6 @@ _mesa_AlphaFunc( GLenum func, GLclampf ref )
case GL_NOTEQUAL:
case GL_GEQUAL:
case GL_ALWAYS:
- if (ctx->Color.AlphaFunc == func && ctx->Color.AlphaRefUnclamped == ref)
- return; /* no change */
-
FLUSH_VERTICES(ctx, _NEW_COLOR);
ctx->Color.AlphaFunc = func;
ctx->Color.AlphaRefUnclamped = ref;
@@ -613,7 +613,7 @@ _mesa_LogicOp( GLenum opcode )
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glLogicOp(%s)\n", _mesa_lookup_enum_by_nr(opcode));
+ _mesa_debug(ctx, "glLogicOp(%s)\n", _mesa_enum_to_string(opcode));
switch (opcode) {
case GL_CLEAR:
@@ -790,7 +790,7 @@ _mesa_ClampColor(GLenum target, GLenum clamp)
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "glClampColor(%s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
}
static GLboolean
@@ -930,12 +930,10 @@ void _mesa_init_color( struct gl_context * ctx )
ctx->Color._ClampFragmentColor = GL_FALSE;
ctx->Color.ClampReadColor = GL_FIXED_ONLY_ARB;
- if (ctx->API == API_OPENGLES2) {
- /* GLES 3 behaves as though GL_FRAMEBUFFER_SRGB is always enabled. */
- ctx->Color.sRGBEnabled = GL_TRUE;
- } else {
- ctx->Color.sRGBEnabled = GL_FALSE;
- }
+ /* GLES 1/2/3 behaves as though GL_FRAMEBUFFER_SRGB is always enabled
+ * if EGL_KHR_gl_colorspace has been used to request sRGB.
+ */
+ ctx->Color.sRGBEnabled = _mesa_is_gles(ctx);
}
/*@}*/
diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c
index db8fee5a414..a32f1a42aea 100644
--- a/src/mesa/main/blit.c
+++ b/src/mesa/main/blit.c
@@ -37,6 +37,7 @@
#include "framebuffer.h"
#include "glformats.h"
#include "mtypes.h"
+#include "macros.h"
#include "state.h"
@@ -59,6 +60,31 @@ find_attachment(const struct gl_framebuffer *fb,
/**
+ * \return true if two regions overlap, false otherwise
+ */
+bool
+_mesa_regions_overlap(int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1)
+{
+ if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
+ return false; /* dst completely right of src */
+
+ if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
+ return false; /* dst completely left of src */
+
+ if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
+ return false; /* dst completely above src */
+
+ if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
+ return false; /* dst completely below src */
+
+ return true; /* some overlap */
+}
+
+
+/**
* Helper function for checking if the datatypes of color buffers are
* compatible for glBlitFramebuffer. From the 3.1 spec, page 198:
*
@@ -186,7 +212,7 @@ _mesa_blit_framebuffer(struct gl_context *ctx,
if (!is_valid_blit_filter(ctx, filter)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid filter %s)", func,
- _mesa_lookup_enum_by_nr(filter));
+ _mesa_enum_to_string(filter));
return;
}
@@ -194,7 +220,7 @@ _mesa_blit_framebuffer(struct gl_context *ctx,
filter == GL_SCALED_RESOLVE_NICEST_EXT) &&
(readFb->Visual.samples == 0 || drawFb->Visual.samples > 0)) {
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s: invalid samples)", func,
- _mesa_lookup_enum_by_nr(filter));
+ _mesa_enum_to_string(filter));
return;
}
@@ -522,7 +548,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
" %d, %d, %d, %d, 0x%x, %s)\n",
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- mask, _mesa_lookup_enum_by_nr(filter));
+ mask, _mesa_enum_to_string(filter));
_mesa_blit_framebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
srcX0, srcY0, srcX1, srcY1,
@@ -547,7 +573,7 @@ _mesa_BlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer,
readFramebuffer, drawFramebuffer,
srcX0, srcY0, srcX1, srcY1,
dstX0, dstY0, dstX1, dstY1,
- mask, _mesa_lookup_enum_by_nr(filter));
+ mask, _mesa_enum_to_string(filter));
/*
* According to PDF page 533 of the OpenGL 4.5 core spec (30.10.2014,
diff --git a/src/mesa/main/blit.h b/src/mesa/main/blit.h
index 54b946e3192..88dd4a9ec8d 100644
--- a/src/mesa/main/blit.h
+++ b/src/mesa/main/blit.h
@@ -28,6 +28,12 @@
#include "glheader.h"
+extern bool
+_mesa_regions_overlap(int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1);
+
extern void
_mesa_blit_framebuffer(struct gl_context *ctx,
struct gl_framebuffer *readFb,
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 66dee680258..1cdea937f91 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -91,8 +91,9 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
case GL_COPY_WRITE_BUFFER:
return &ctx->CopyWriteBuffer;
case GL_DRAW_INDIRECT_BUFFER:
- if (ctx->API == API_OPENGL_CORE &&
- ctx->Extensions.ARB_draw_indirect) {
+ if ((ctx->API == API_OPENGL_CORE &&
+ ctx->Extensions.ARB_draw_indirect) ||
+ _mesa_is_gles31(ctx)) {
return &ctx->DrawIndirectBuffer;
}
break;
@@ -112,6 +113,11 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
return &ctx->UniformBuffer;
}
break;
+ case GL_SHADER_STORAGE_BUFFER:
+ if (ctx->Extensions.ARB_shader_storage_buffer_object) {
+ return &ctx->ShaderStorageBuffer;
+ }
+ break;
case GL_ATOMIC_COUNTER_BUFFER:
if (ctx->Extensions.ARB_shader_atomic_counters) {
return &ctx->AtomicBuffer;
@@ -831,6 +837,9 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->UniformBuffer,
ctx->Shared->NullBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer,
+ ctx->Shared->NullBufferObj);
+
_mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer,
ctx->Shared->NullBufferObj);
@@ -845,6 +854,14 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
ctx->UniformBufferBindings[i].Size = -1;
}
+ for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) {
+ _mesa_reference_buffer_object(ctx,
+ &ctx->ShaderStorageBufferBindings[i].BufferObject,
+ ctx->Shared->NullBufferObj);
+ ctx->ShaderStorageBufferBindings[i].Offset = -1;
+ ctx->ShaderStorageBufferBindings[i].Size = -1;
+ }
+
for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->AtomicBufferBindings[i].BufferObject,
@@ -867,6 +884,8 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
_mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL);
+ _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, NULL);
+
_mesa_reference_buffer_object(ctx, &ctx->AtomicBuffer, NULL);
_mesa_reference_buffer_object(ctx, &ctx->DrawIndirectBuffer, NULL);
@@ -877,6 +896,12 @@ _mesa_free_buffer_objects( struct gl_context *ctx )
NULL);
}
+ for (i = 0; i < MAX_COMBINED_SHADER_STORAGE_BUFFERS; i++) {
+ _mesa_reference_buffer_object(ctx,
+ &ctx->ShaderStorageBufferBindings[i].BufferObject,
+ NULL);
+ }
+
for (i = 0; i < MAX_COMBINED_ATOMIC_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->AtomicBufferBindings[i].BufferObject,
@@ -1158,7 +1183,7 @@ _mesa_BindBuffer(GLenum target, GLuint buffer)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBindBuffer(%s, %u)\n",
- _mesa_lookup_enum_by_nr(target), buffer);
+ _mesa_enum_to_string(target), buffer);
bind_buffer_object(ctx, target, buffer);
}
@@ -1240,6 +1265,17 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
_mesa_BindBuffer( GL_UNIFORM_BUFFER, 0 );
}
+ /* unbind SSBO binding points */
+ for (j = 0; j < ctx->Const.MaxShaderStorageBufferBindings; j++) {
+ if (ctx->ShaderStorageBufferBindings[j].BufferObject == bufObj) {
+ _mesa_BindBufferBase(GL_SHADER_STORAGE_BUFFER, j, 0);
+ }
+ }
+
+ if (ctx->ShaderStorageBuffer == bufObj) {
+ _mesa_BindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+ }
+
/* unbind Atomci Buffer binding points */
for (j = 0; j < ctx->Const.MaxAtomicBufferBindings; j++) {
if (ctx->AtomicBufferBindings[j].BufferObject == bufObj) {
@@ -1500,9 +1536,9 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "%s(%s, %ld, %p, %s)\n",
func,
- _mesa_lookup_enum_by_nr(target),
+ _mesa_enum_to_string(target),
(long int) size, data,
- _mesa_lookup_enum_by_nr(usage));
+ _mesa_enum_to_string(usage));
if (size < 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s(size < 0)", func);
@@ -1535,7 +1571,7 @@ _mesa_buffer_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
if (!valid_usage) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid usage: %s)", func,
- _mesa_lookup_enum_by_nr(usage));
+ _mesa_enum_to_string(usage));
return;
}
@@ -1990,7 +2026,7 @@ get_buffer_parameter(struct gl_context *ctx,
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname: %s)", func,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return false;
}
@@ -2337,7 +2373,7 @@ _mesa_map_buffer_range(struct gl_context *ctx,
if (offset + length > bufObj->Size) {
_mesa_error(ctx, GL_INVALID_VALUE,
- "%s(offset %ld + length %ld > buffer_size %ld)", func,
+ "%s(offset %td + length %td > buffer_size %td)", func,
offset, length, bufObj->Size);
return NULL;
}
@@ -2999,6 +3035,33 @@ set_ubo_binding(struct gl_context *ctx,
}
/**
+ * Binds a buffer object to a shader storage buffer binding point.
+ *
+ * The caller is responsible for flushing vertices and updating
+ * NewDriverState.
+ */
+static void
+set_ssbo_binding(struct gl_context *ctx,
+ struct gl_shader_storage_buffer_binding *binding,
+ struct gl_buffer_object *bufObj,
+ GLintptr offset,
+ GLsizeiptr size,
+ GLboolean autoSize)
+{
+ _mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj);
+
+ binding->Offset = offset;
+ binding->Size = size;
+ binding->AutomaticSize = autoSize;
+
+ /* If this is a real buffer object, mark it has having been used
+ * at some point as a SSBO.
+ */
+ if (size >= 0)
+ bufObj->UsageHistory |= USAGE_SHADER_STORAGE_BUFFER;
+}
+
+/**
* Binds a buffer object to a uniform buffer binding point.
*
* Unlike set_ubo_binding(), this function also flushes vertices
@@ -3030,6 +3093,37 @@ bind_uniform_buffer(struct gl_context *ctx,
}
/**
+ * Binds a buffer object to a shader storage buffer binding point.
+ *
+ * Unlike set_ssbo_binding(), this function also flushes vertices
+ * and updates NewDriverState. It also checks if the binding
+ * has actually changed before updating it.
+ */
+static void
+bind_shader_storage_buffer(struct gl_context *ctx,
+ GLuint index,
+ struct gl_buffer_object *bufObj,
+ GLintptr offset,
+ GLsizeiptr size,
+ GLboolean autoSize)
+{
+ struct gl_shader_storage_buffer_binding *binding =
+ &ctx->ShaderStorageBufferBindings[index];
+
+ if (binding->BufferObject == bufObj &&
+ binding->Offset == offset &&
+ binding->Size == size &&
+ binding->AutomaticSize == autoSize) {
+ return;
+ }
+
+ FLUSH_VERTICES(ctx, 0);
+ ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+ set_ssbo_binding(ctx, binding, bufObj, offset, size, autoSize);
+}
+
+/**
* Bind a region of a buffer object to a uniform block binding point.
* \param index the uniform buffer binding point index
* \param bufObj the buffer object
@@ -3064,6 +3158,40 @@ bind_buffer_range_uniform_buffer(struct gl_context *ctx,
bind_uniform_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
}
+/**
+ * Bind a region of a buffer object to a shader storage block binding point.
+ * \param index the shader storage buffer binding point index
+ * \param bufObj the buffer object
+ * \param offset offset to the start of buffer object region
+ * \param size size of the buffer object region
+ */
+static void
+bind_buffer_range_shader_storage_buffer(struct gl_context *ctx,
+ GLuint index,
+ struct gl_buffer_object *bufObj,
+ GLintptr offset,
+ GLsizeiptr size)
+{
+ if (index >= ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(index=%d)", index);
+ return;
+ }
+
+ if (offset & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glBindBufferRange(offset misaligned %d/%d)", (int) offset,
+ ctx->Const.ShaderStorageBufferOffsetAlignment);
+ return;
+ }
+
+ if (bufObj == ctx->Shared->NullBufferObj) {
+ offset = -1;
+ size = -1;
+ }
+
+ _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj);
+ bind_shader_storage_buffer(ctx, index, bufObj, offset, size, GL_FALSE);
+}
/**
* Bind a buffer object to a uniform block binding point.
@@ -3088,6 +3216,28 @@ bind_buffer_base_uniform_buffer(struct gl_context *ctx,
}
/**
+ * Bind a buffer object to a shader storage block binding point.
+ * As above, but offset = 0.
+ */
+static void
+bind_buffer_base_shader_storage_buffer(struct gl_context *ctx,
+ GLuint index,
+ struct gl_buffer_object *bufObj)
+{
+ if (index >= ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferBase(index=%d)", index);
+ return;
+ }
+
+ _mesa_reference_buffer_object(ctx, &ctx->ShaderStorageBuffer, bufObj);
+
+ if (bufObj == ctx->Shared->NullBufferObj)
+ bind_shader_storage_buffer(ctx, index, bufObj, -1, -1, GL_TRUE);
+ else
+ bind_shader_storage_buffer(ctx, index, bufObj, 0, 0, GL_TRUE);
+}
+
+/**
* Binds a buffer object to an atomic buffer binding point.
*
* The caller is responsible for validating the offset,
@@ -3219,6 +3369,35 @@ error_check_bind_uniform_buffers(struct gl_context *ctx,
return true;
}
+static bool
+error_check_bind_shader_storage_buffers(struct gl_context *ctx,
+ GLuint first, GLsizei count,
+ const char *caller)
+{
+ if (!ctx->Extensions.ARB_shader_storage_buffer_object) {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "%s(target=GL_SHADER_STORAGE_BUFFER)", caller);
+ return false;
+ }
+
+ /* The ARB_multi_bind_spec says:
+ *
+ * "An INVALID_OPERATION error is generated if <first> + <count> is
+ * greater than the number of target-specific indexed binding points,
+ * as described in section 6.7.1."
+ */
+ if (first + count > ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(first=%u + count=%d > the value of "
+ "GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS=%u)",
+ caller, first, count,
+ ctx->Const.MaxShaderStorageBufferBindings);
+ return false;
+ }
+
+ return true;
+}
+
/**
* Unbind all uniform buffers in the range
* <first> through <first>+<count>-1
@@ -3234,6 +3413,22 @@ unbind_uniform_buffers(struct gl_context *ctx, GLuint first, GLsizei count)
bufObj, -1, -1, GL_TRUE);
}
+/**
+ * Unbind all shader storage buffers in the range
+ * <first> through <first>+<count>-1
+ */
+static void
+unbind_shader_storage_buffers(struct gl_context *ctx, GLuint first,
+ GLsizei count)
+{
+ struct gl_buffer_object *bufObj = ctx->Shared->NullBufferObj;
+ GLint i;
+
+ for (i = 0; i < count; i++)
+ set_ssbo_binding(ctx, &ctx->ShaderStorageBufferBindings[first + i],
+ bufObj, -1, -1, GL_TRUE);
+}
+
static void
bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count,
const GLuint *buffers)
@@ -3301,6 +3496,73 @@ bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count,
}
static void
+bind_shader_storage_buffers_base(struct gl_context *ctx, GLuint first,
+ GLsizei count, const GLuint *buffers)
+{
+ GLint i;
+
+ if (!error_check_bind_shader_storage_buffers(ctx, first, count,
+ "glBindBuffersBase"))
+ return;
+
+ /* Assume that at least one binding will be changed */
+ FLUSH_VERTICES(ctx, 0);
+ ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+ if (!buffers) {
+ /* The ARB_multi_bind spec says:
+ *
+ * "If <buffers> is NULL, all bindings from <first> through
+ * <first>+<count>-1 are reset to their unbound (zero) state."
+ */
+ unbind_shader_storage_buffers(ctx, first, count);
+ return;
+ }
+
+ /* Note that the error semantics for multi-bind commands differ from
+ * those of other GL commands.
+ *
+ * The Issues section in the ARB_multi_bind spec says:
+ *
+ * "(11) Typically, OpenGL specifies that if an error is generated by a
+ * command, that command has no effect. This is somewhat
+ * unfortunate for multi-bind commands, because it would require a
+ * first pass to scan the entire list of bound objects for errors
+ * and then a second pass to actually perform the bindings.
+ * Should we have different error semantics?
+ *
+ * RESOLVED: Yes. In this specification, when the parameters for
+ * one of the <count> binding points are invalid, that binding point
+ * is not updated and an error will be generated. However, other
+ * binding points in the same command will be updated if their
+ * parameters are valid and no other error occurs."
+ */
+
+ _mesa_begin_bufferobj_lookups(ctx);
+
+ for (i = 0; i < count; i++) {
+ struct gl_shader_storage_buffer_binding *binding =
+ &ctx->ShaderStorageBufferBindings[first + i];
+ struct gl_buffer_object *bufObj;
+
+ if (binding->BufferObject && binding->BufferObject->Name == buffers[i])
+ bufObj = binding->BufferObject;
+ else
+ bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i,
+ "glBindBuffersBase");
+
+ if (bufObj) {
+ if (bufObj == ctx->Shared->NullBufferObj)
+ set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_TRUE);
+ else
+ set_ssbo_binding(ctx, binding, bufObj, 0, 0, GL_TRUE);
+ }
+ }
+
+ _mesa_end_bufferobj_lookups(ctx);
+}
+
+static void
bind_uniform_buffers_range(struct gl_context *ctx, GLuint first, GLsizei count,
const GLuint *buffers,
const GLintptr *offsets, const GLsizeiptr *sizes)
@@ -3405,6 +3667,112 @@ bind_uniform_buffers_range(struct gl_context *ctx, GLuint first, GLsizei count,
_mesa_end_bufferobj_lookups(ctx);
}
+static void
+bind_shader_storage_buffers_range(struct gl_context *ctx, GLuint first,
+ GLsizei count, const GLuint *buffers,
+ const GLintptr *offsets,
+ const GLsizeiptr *sizes)
+{
+ GLint i;
+
+ if (!error_check_bind_shader_storage_buffers(ctx, first, count,
+ "glBindBuffersRange"))
+ return;
+
+ /* Assume that at least one binding will be changed */
+ FLUSH_VERTICES(ctx, 0);
+ ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer;
+
+ if (!buffers) {
+ /* The ARB_multi_bind spec says:
+ *
+ * "If <buffers> is NULL, all bindings from <first> through
+ * <first>+<count>-1 are reset to their unbound (zero) state.
+ * In this case, the offsets and sizes associated with the
+ * binding points are set to default values, ignoring
+ * <offsets> and <sizes>."
+ */
+ unbind_shader_storage_buffers(ctx, first, count);
+ return;
+ }
+
+ /* Note that the error semantics for multi-bind commands differ from
+ * those of other GL commands.
+ *
+ * The Issues section in the ARB_multi_bind spec says:
+ *
+ * "(11) Typically, OpenGL specifies that if an error is generated by a
+ * command, that command has no effect. This is somewhat
+ * unfortunate for multi-bind commands, because it would require a
+ * first pass to scan the entire list of bound objects for errors
+ * and then a second pass to actually perform the bindings.
+ * Should we have different error semantics?
+ *
+ * RESOLVED: Yes. In this specification, when the parameters for
+ * one of the <count> binding points are invalid, that binding point
+ * is not updated and an error will be generated. However, other
+ * binding points in the same command will be updated if their
+ * parameters are valid and no other error occurs."
+ */
+
+ _mesa_begin_bufferobj_lookups(ctx);
+
+ for (i = 0; i < count; i++) {
+ struct gl_shader_storage_buffer_binding *binding =
+ &ctx->ShaderStorageBufferBindings[first + i];
+ struct gl_buffer_object *bufObj;
+
+ if (!bind_buffers_check_offset_and_size(ctx, i, offsets, sizes))
+ continue;
+
+ /* The ARB_multi_bind spec says:
+ *
+ * "An INVALID_VALUE error is generated by BindBuffersRange if any
+ * pair of values in <offsets> and <sizes> does not respectively
+ * satisfy the constraints described for those parameters for the
+ * specified target, as described in section 6.7.1 (per binding)."
+ *
+ * Section 6.7.1 refers to table 6.5, which says:
+ *
+ * "┌───────────────────────────────────────────────────────────────┐
+ * │ Shader storage buffer array bindings (see sec. 7.8) │
+ * ├─────────────────────┬─────────────────────────────────────────┤
+ * │ ... │ ... │
+ * │ offset restriction │ multiple of value of SHADER_STORAGE_- │
+ * │ │ BUFFER_OFFSET_ALIGNMENT │
+ * │ ... │ ... │
+ * │ size restriction │ none │
+ * └─────────────────────┴─────────────────────────────────────────┘"
+ */
+ if (offsets[i] & (ctx->Const.ShaderStorageBufferOffsetAlignment - 1)) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glBindBuffersRange(offsets[%u]=%" PRId64
+ " is misaligned; it must be a multiple of the value of "
+ "GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT=%u when "
+ "target=GL_SHADER_STORAGE_BUFFER)",
+ i, (int64_t) offsets[i],
+ ctx->Const.ShaderStorageBufferOffsetAlignment);
+ continue;
+ }
+
+ if (binding->BufferObject && binding->BufferObject->Name == buffers[i])
+ bufObj = binding->BufferObject;
+ else
+ bufObj = _mesa_multi_bind_lookup_bufferobj(ctx, buffers, i,
+ "glBindBuffersRange");
+
+ if (bufObj) {
+ if (bufObj == ctx->Shared->NullBufferObj)
+ set_ssbo_binding(ctx, binding, bufObj, -1, -1, GL_FALSE);
+ else
+ set_ssbo_binding(ctx, binding, bufObj,
+ offsets[i], sizes[i], GL_FALSE);
+ }
+ }
+
+ _mesa_end_bufferobj_lookups(ctx);
+}
+
static bool
error_check_bind_xfb_buffers(struct gl_context *ctx,
struct gl_transform_feedback_object *tfObj,
@@ -3894,6 +4262,9 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
case GL_UNIFORM_BUFFER:
bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size);
return;
+ case GL_SHADER_STORAGE_BUFFER:
+ bind_buffer_range_shader_storage_buffer(ctx, index, bufObj, offset, size);
+ return;
case GL_ATOMIC_COUNTER_BUFFER:
bind_atomic_buffer(ctx, index, bufObj, offset, size,
"glBindBufferRange");
@@ -3960,6 +4331,9 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint buffer)
case GL_UNIFORM_BUFFER:
bind_buffer_base_uniform_buffer(ctx, index, bufObj);
return;
+ case GL_SHADER_STORAGE_BUFFER:
+ bind_buffer_base_shader_storage_buffer(ctx, index, bufObj);
+ return;
case GL_ATOMIC_COUNTER_BUFFER:
bind_atomic_buffer(ctx, index, bufObj, 0, 0,
"glBindBufferBase");
@@ -3984,13 +4358,17 @@ _mesa_BindBuffersRange(GLenum target, GLuint first, GLsizei count,
case GL_UNIFORM_BUFFER:
bind_uniform_buffers_range(ctx, first, count, buffers, offsets, sizes);
return;
+ case GL_SHADER_STORAGE_BUFFER:
+ bind_shader_storage_buffers_range(ctx, first, count, buffers, offsets,
+ sizes);
+ return;
case GL_ATOMIC_COUNTER_BUFFER:
bind_atomic_buffers_range(ctx, first, count, buffers,
offsets, sizes);
return;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glBindBuffersRange(target=%s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
break;
}
}
@@ -4008,12 +4386,15 @@ _mesa_BindBuffersBase(GLenum target, GLuint first, GLsizei count,
case GL_UNIFORM_BUFFER:
bind_uniform_buffers_base(ctx, first, count, buffers);
return;
+ case GL_SHADER_STORAGE_BUFFER:
+ bind_shader_storage_buffers_base(ctx, first, count, buffers);
+ return;
case GL_ATOMIC_COUNTER_BUFFER:
bind_atomic_buffers_base(ctx, first, count, buffers);
return;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glBindBuffersBase(target=%s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
break;
}
}
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 0536266d756..93588a2ee18 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -251,7 +251,7 @@ _mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
FLUSH_VERTICES(ctx, 0);
if (MESA_VERBOSE & VERBOSE_API) {
- _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer));
+ _mesa_debug(ctx, "%s %s\n", caller, _mesa_enum_to_string(buffer));
}
if (buffer == GL_NONE) {
@@ -264,14 +264,14 @@ _mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
if (destMask == BAD_MASK) {
/* totally bogus buffer */
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)", caller,
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
destMask &= supportedMask;
if (destMask == 0x0) {
/* none of the named color buffers exist! */
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffer %s)",
- caller, _mesa_lookup_enum_by_nr(buffer));
+ caller, _mesa_enum_to_string(buffer));
return;
}
}
@@ -411,7 +411,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
*/
if (destMask[output] == BAD_MASK) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
- caller, _mesa_lookup_enum_by_nr(buffers[output]));
+ caller, _mesa_enum_to_string(buffers[output]));
return;
}
@@ -427,7 +427,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
*/
if (_mesa_bitcount(destMask[output]) > 1) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
- caller, _mesa_lookup_enum_by_nr(buffers[output]));
+ caller, _mesa_enum_to_string(buffers[output]));
return;
}
@@ -445,7 +445,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
if (destMask[output] == 0) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(unsupported buffer %s)",
- caller, _mesa_lookup_enum_by_nr(buffers[output]));
+ caller, _mesa_enum_to_string(buffers[output]));
return;
}
@@ -459,7 +459,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
buffers[output] != GL_COLOR_ATTACHMENT0 + output) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(unsupported buffer %s)",
- caller, _mesa_lookup_enum_by_nr(buffers[output]));
+ caller, _mesa_enum_to_string(buffers[output]));
return;
}
@@ -471,7 +471,7 @@ _mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb,
if (destMask[output] & usedBufferMask) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(duplicated buffer %s)",
- caller, _mesa_lookup_enum_by_nr(buffers[output]));
+ caller, _mesa_enum_to_string(buffers[output]));
return;
}
@@ -700,7 +700,7 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
FLUSH_VERTICES(ctx, 0);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "%s %s\n", caller, _mesa_lookup_enum_by_nr(buffer));
+ _mesa_debug(ctx, "%s %s\n", caller, _mesa_enum_to_string(buffer));
if (buffer == GL_NONE) {
/* This is legal--it means that no buffer should be bound for reading. */
@@ -712,14 +712,14 @@ _mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb,
if (srcBuffer == -1) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(invalid buffer %s)", caller,
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
supportedMask = supported_buffer_bitmask(ctx, fb);
if (((1 << srcBuffer) & supportedMask) == 0) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(invalid buffer %s)", caller,
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
}
diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c
index 426caea4709..3bfcc5c0e39 100644
--- a/src/mesa/main/clear.c
+++ b/src/mesa/main/clear.c
@@ -325,6 +325,18 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value)
_mesa_update_state( ctx );
}
+ /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers'
+ * of the OpenGL 4.5 spec states:
+ *
+ * "An INVALID_ENUM error is generated by ClearBufferiv and
+ * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL."
+ */
+ if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)");
+ return;
+ }
+
switch (buffer) {
case GL_STENCIL:
/* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
@@ -395,7 +407,7 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value)
return;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)",
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
}
@@ -485,7 +497,7 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value)
return;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)",
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
}
@@ -596,7 +608,7 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value)
return;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)",
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
}
@@ -636,7 +648,7 @@ _mesa_ClearBufferfi(GLenum buffer, GLint drawbuffer,
if (buffer != GL_DEPTH_STENCIL) {
_mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfi(buffer=%s)",
- _mesa_lookup_enum_by_nr(buffer));
+ _mesa_enum_to_string(buffer));
return;
}
diff --git a/src/mesa/main/condrender.c b/src/mesa/main/condrender.c
index 77e4b95ee8f..46c6036d2a5 100644
--- a/src/mesa/main/condrender.c
+++ b/src/mesa/main/condrender.c
@@ -87,7 +87,7 @@ _mesa_BeginConditionalRender(GLuint queryId, GLenum mode)
/* fallthrough - invalid */
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glBeginConditionalRender(mode=%s)",
- _mesa_lookup_enum_by_nr(mode));
+ _mesa_enum_to_string(mode));
return;
}
@@ -184,7 +184,7 @@ _mesa_check_conditional_render(struct gl_context *ctx)
default:
_mesa_problem(ctx, "Bad cond render mode %s in "
" _mesa_check_conditional_render()",
- _mesa_lookup_enum_by_nr(ctx->Query.CondRenderMode));
+ _mesa_enum_to_string(ctx->Query.CondRenderMode));
return GL_TRUE;
}
}
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 9c3baf4c6aa..b35031db3c9 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -171,8 +171,10 @@
#define MAX_PROGRAM_LOCAL_PARAMS 4096
#define MAX_UNIFORMS 4096
#define MAX_UNIFORM_BUFFERS 15 /* + 1 default uniform buffer */
+#define MAX_SHADER_STORAGE_BUFFERS 7 /* + 1 default shader storage buffer */
/* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
#define MAX_COMBINED_UNIFORM_BUFFERS (MAX_UNIFORM_BUFFERS * 6)
+#define MAX_COMBINED_SHADER_STORAGE_BUFFERS (MAX_SHADER_STORAGE_BUFFERS * 6)
#define MAX_ATOMIC_COUNTERS 4096
/* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
#define MAX_COMBINED_ATOMIC_BUFFERS (MAX_UNIFORM_BUFFERS * 6)
@@ -272,6 +274,12 @@
#define MAX_VERTEX_STREAMS 4
/*@}*/
+/** For GL_ARB_shader_subroutine */
+/*@{*/
+#define MAX_SUBROUTINES 256
+#define MAX_SUBROUTINE_UNIFORM_LOCATIONS 1024
+/*@}*/
+
/** For GL_INTEL_performance_query */
/*@{*/
#define MAX_PERFQUERY_QUERY_NAME_LENGTH 256
@@ -294,6 +302,14 @@
/** For GL_ARB_pipeline_statistics_query */
#define MAX_PIPELINE_STATISTICS 11
+/** For GL_ARB_tessellation_shader */
+/*@{*/
+#define MAX_TESS_GEN_LEVEL 64
+#define MAX_PATCH_VERTICES 32
+#define MAX_TESS_PATCH_COMPONENTS 120
+#define MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS 4096
+/*@}*/
+
/*
* Color channel component order
*
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 79fa01849e0..888c461d1c2 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -120,6 +120,7 @@
#include "shaderobj.h"
#include "shaderimage.h"
#include "util/simple_list.h"
+#include "util/strtod.h"
#include "state.h"
#include "stencil.h"
#include "texcompress_s3tc.h"
@@ -338,31 +339,6 @@ _mesa_destroy_visual( struct gl_config *vis )
/**
- * This is lame. gdb only seems to recognize enum types that are
- * actually used somewhere. We want to be able to print/use enum
- * values such as TEXTURE_2D_INDEX in gdb. But we don't actually use
- * the gl_texture_index type anywhere. Thus, this lame function.
- */
-static void
-dummy_enum_func(void)
-{
- gl_buffer_index bi = BUFFER_FRONT_LEFT;
- gl_face_index fi = FACE_POS_X;
- gl_frag_result fr = FRAG_RESULT_DEPTH;
- gl_texture_index ti = TEXTURE_2D_ARRAY_INDEX;
- gl_vert_attrib va = VERT_ATTRIB_POS;
- gl_varying_slot vs = VARYING_SLOT_POS;
-
- (void) bi;
- (void) fi;
- (void) fr;
- (void) ti;
- (void) va;
- (void) vs;
-}
-
-
-/**
* One-time initialization mutex lock.
*
* \sa Used by one_time_init().
@@ -370,6 +346,16 @@ dummy_enum_func(void)
mtx_t OneTimeLock = _MTX_INITIALIZER_NP;
+/**
+ * Calls all the various one-time-fini functions in Mesa
+ */
+
+static void
+one_time_fini(void)
+{
+ _mesa_destroy_shader_compiler();
+ _mesa_locale_fini();
+}
/**
* Calls all the various one-time-init functions in Mesa.
@@ -391,13 +377,14 @@ one_time_init( struct gl_context *ctx )
if (!api_init_mask) {
GLuint i;
- /* do some implementation tests */
- assert( sizeof(GLbyte) == 1 );
- assert( sizeof(GLubyte) == 1 );
- assert( sizeof(GLshort) == 2 );
- assert( sizeof(GLushort) == 2 );
- assert( sizeof(GLint) == 4 );
- assert( sizeof(GLuint) == 4 );
+ STATIC_ASSERT(sizeof(GLbyte) == 1);
+ STATIC_ASSERT(sizeof(GLubyte) == 1);
+ STATIC_ASSERT(sizeof(GLshort) == 2);
+ STATIC_ASSERT(sizeof(GLushort) == 2);
+ STATIC_ASSERT(sizeof(GLint) == 4);
+ STATIC_ASSERT(sizeof(GLuint) == 4);
+
+ _mesa_locale_init();
_mesa_one_time_init_extension_overrides();
@@ -407,6 +394,8 @@ one_time_init( struct gl_context *ctx )
_mesa_ubyte_to_float_color_tab[i] = (float) i / 255.0F;
}
+ atexit(one_time_fini);
+
#if defined(DEBUG) && defined(__DATE__) && defined(__TIME__)
if (MESA_VERBOSE != 0) {
_mesa_debug(ctx, "Mesa %s DEBUG build %s %s\n",
@@ -429,13 +418,6 @@ one_time_init( struct gl_context *ctx )
api_init_mask |= 1 << ctx->API;
mtx_unlock(&OneTimeLock);
-
- /* Hopefully atexit() is widely available. If not, we may need some
- * #ifdef tests here.
- */
- atexit(_mesa_destroy_shader_compiler);
-
- dummy_enum_func();
}
@@ -496,6 +478,8 @@ init_program_limits(struct gl_constants *consts, gl_shader_stage stage,
prog->MaxInputComponents = 16 * 4; /* old limit not to break tnl and swrast */
prog->MaxOutputComponents = 0; /* value not used */
break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_VERTEX_GENERIC_ATTRIBS;
@@ -554,6 +538,8 @@ init_program_limits(struct gl_constants *consts, gl_shader_stage stage,
prog->MaxAtomicBuffers = 0;
prog->MaxAtomicCounters = 0;
+
+ prog->MaxShaderStorageBlocks = 8;
}
@@ -615,6 +601,12 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
consts->MaxUniformBlockSize = 16384;
consts->UniformBufferOffsetAlignment = 1;
+ /** GL_ARB_shader_storage_buffer_object */
+ consts->MaxCombinedShaderStorageBlocks = 8;
+ consts->MaxShaderStorageBufferBindings = 8;
+ consts->MaxShaderStorageBlockSize = 128 * 1024 * 1024; /* 2^27 */
+ consts->ShaderStorageBufferOffsetAlignment = 256;
+
/* GL_ARB_explicit_uniform_location, GL_MAX_UNIFORM_LOCATIONS */
consts->MaxUserAssignableUniformLocations =
4 * MESA_SHADER_STAGES * MAX_UNIFORMS;
@@ -724,6 +716,14 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
/** GL_KHR_context_flush_control */
consts->ContextReleaseBehavior = GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH;
+
+ /** GL_ARB_tessellation_shader */
+ consts->MaxTessGenLevel = MAX_TESS_GEN_LEVEL;
+ consts->MaxPatchVertices = MAX_PATCH_VERTICES;
+ consts->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits = MAX_TEXTURE_IMAGE_UNITS;
+ consts->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits = MAX_TEXTURE_IMAGE_UNITS;
+ consts->MaxTessPatchComponents = MAX_TESS_PATCH_COMPONENTS;
+ consts->MaxTessControlTotalOutputComponents = MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS;
}
@@ -1331,6 +1331,8 @@ _mesa_free_context_data( struct gl_context *ctx )
_mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
_mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, NULL);
+ _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, NULL);
+ _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, NULL);
_mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
_mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h
index 6f3c941016f..0f7529ad975 100644
--- a/src/mesa/main/context.h
+++ b/src/mesa/main/context.h
@@ -343,6 +343,26 @@ _mesa_has_compute_shaders(const struct gl_context *ctx)
(ctx->API == API_OPENGLES2 && ctx->Version >= 31);
}
+/**
+ * Checks if the context supports shader subroutines.
+ */
+static inline bool
+_mesa_has_shader_subroutine(const struct gl_context *ctx)
+{
+ return ctx->API == API_OPENGL_CORE &&
+ (ctx->Version >= 40 || ctx->Extensions.ARB_shader_subroutine);
+}
+
+/**
+ * Checks if the context supports tessellation.
+ */
+static inline GLboolean
+_mesa_has_tessellation(const struct gl_context *ctx)
+{
+ return ctx->API == API_OPENGL_CORE &&
+ ctx->Extensions.ARB_tessellation_shader;
+}
+
#ifdef __cplusplus
}
diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c
index e8732c6175b..05bc50dd2c6 100644
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -93,7 +93,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
default:
_mesa_error(ctx, GL_INVALID_ENUM,
"glCopyImageSubData(%sTarget = %s)", dbg_prefix,
- _mesa_lookup_enum_by_nr(*target));
+ _mesa_enum_to_string(*target));
return false;
}
@@ -159,7 +159,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
if ((*tex_obj)->Target != *target) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glCopyImageSubData(%sTarget = %s)", dbg_prefix,
- _mesa_lookup_enum_by_nr(*target));
+ _mesa_enum_to_string(*target));
return false;
}
@@ -416,9 +416,9 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
_mesa_debug(ctx, "glCopyImageSubData(%u, %s, %d, %d, %d, %d, "
"%u, %s, %d, %d, %d, %d, "
"%d, %d, %d)\n",
- srcName, _mesa_lookup_enum_by_nr(srcTarget), srcLevel,
+ srcName, _mesa_enum_to_string(srcTarget), srcLevel,
srcX, srcY, srcZ,
- dstName, _mesa_lookup_enum_by_nr(dstTarget), dstLevel,
+ dstName, _mesa_enum_to_string(dstTarget), dstLevel,
dstX, dstY, dstZ,
srcWidth, srcHeight, srcWidth);
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index d783e34222f..87eb63ea374 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -232,11 +232,13 @@ struct dd_function_table {
/**
- * Called by glGetTexImage().
+ * Called by glGetTexImage(), glGetTextureSubImage().
*/
- void (*GetTexImage)( struct gl_context *ctx,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage );
+ void (*GetTexSubImage)(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLvoid *pixels,
+ struct gl_texture_image *texImage);
/**
* Called by glClearTex[Sub]Image
@@ -326,16 +328,19 @@ struct dd_function_table {
void (*CompressedTexSubImage)(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLint xoffset, GLint yoffset, GLint zoffset,
- GLsizei width, GLint height, GLint depth,
+ GLsizei width, GLsizei height, GLsizei depth,
GLenum format,
GLsizei imageSize, const GLvoid *data);
/**
* Called by glGetCompressedTexImage.
*/
- void (*GetCompressedTexImage)(struct gl_context *ctx,
- struct gl_texture_image *texImage,
- GLvoid *data);
+ void (*GetCompressedTexSubImage)(struct gl_context *ctx,
+ struct gl_texture_image *texImage,
+ GLint xoffset, GLint yoffset,
+ GLint zoffset, GLsizei width,
+ GLsizei height, GLsizei depth,
+ GLvoid *data);
/*@}*/
/**
diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
index c93e84a04d0..5ca7d5ce500 100644
--- a/src/mesa/main/debug.c
+++ b/src/mesa/main/debug.c
@@ -272,7 +272,9 @@ write_texture_image(struct gl_texture_object *texObj,
store = ctx->Pack; /* save */
ctx->Pack = ctx->DefaultPacking;
- ctx->Driver.GetTexImage(ctx, GL_RGBA, GL_UNSIGNED_BYTE, buffer, img);
+ ctx->Driver.GetTexSubImage(ctx,
+ 0, 0, 0, img->Width, img->Height, img->Depth,
+ GL_RGBA, GL_UNSIGNED_BYTE, buffer, img);
/* make filename */
_mesa_snprintf(s, sizeof(s), "/tmp/tex%u.l%u.f%u.ppm", texObj->Name, level, face);
@@ -411,7 +413,7 @@ dump_renderbuffer(const struct gl_renderbuffer *rb, GLboolean writeImage)
{
printf("Renderbuffer %u: %u x %u IntFormat = %s\n",
rb->Name, rb->Width, rb->Height,
- _mesa_lookup_enum_by_nr(rb->InternalFormat));
+ _mesa_enum_to_string(rb->InternalFormat));
if (writeImage) {
_mesa_write_renderbuffer_image(rb);
}
diff --git a/src/mesa/main/depth.c b/src/mesa/main/depth.c
index bb4591cf152..c3534407599 100644
--- a/src/mesa/main/depth.c
+++ b/src/mesa/main/depth.c
@@ -63,7 +63,7 @@ _mesa_DepthFunc( GLenum func )
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_lookup_enum_by_nr(func));
+ _mesa_debug(ctx, "glDepthFunc %s\n", _mesa_enum_to_string(func));
if (ctx->Depth.Func == func)
return;
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c
index aafe486fb60..5554738d1a3 100644
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -9000,7 +9000,7 @@ _mesa_NewList(GLuint name, GLenum mode)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glNewList %u %s\n", name,
- _mesa_lookup_enum_by_nr(mode));
+ _mesa_enum_to_string(mode));
if (name == 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glNewList");
@@ -9688,7 +9688,7 @@ _mesa_initialize_save_table(const struct gl_context *ctx)
static const char *
enum_string(GLenum k)
{
- return _mesa_lookup_enum_by_nr(k);
+ return _mesa_enum_to_string(k);
}
@@ -9827,19 +9827,19 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
break;
case OPCODE_BIND_TEXTURE:
fprintf(f, "BindTexture %s %d\n",
- _mesa_lookup_enum_by_nr(n[1].ui), n[2].ui);
+ _mesa_enum_to_string(n[1].ui), n[2].ui);
break;
case OPCODE_SHADE_MODEL:
- fprintf(f, "ShadeModel %s\n", _mesa_lookup_enum_by_nr(n[1].ui));
+ fprintf(f, "ShadeModel %s\n", _mesa_enum_to_string(n[1].ui));
break;
case OPCODE_MAP1:
fprintf(f, "Map1 %s %.3f %.3f %d %d\n",
- _mesa_lookup_enum_by_nr(n[1].ui),
+ _mesa_enum_to_string(n[1].ui),
n[2].f, n[3].f, n[4].i, n[5].i);
break;
case OPCODE_MAP2:
fprintf(f, "Map2 %s %.3f %.3f %.3f %.3f %d %d %d %d\n",
- _mesa_lookup_enum_by_nr(n[1].ui),
+ _mesa_enum_to_string(n[1].ui),
n[2].f, n[3].f, n[4].f, n[5].f,
n[6].i, n[7].i, n[8].i, n[9].i);
break;
@@ -9918,7 +9918,7 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
case OPCODE_PROVOKING_VERTEX:
fprintf(f, "ProvokingVertex %s\n",
- _mesa_lookup_enum_by_nr(n[1].ui));
+ _mesa_enum_to_string(n[1].ui));
break;
/*
diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c
index 55035f214b3..720a082ce6d 100644
--- a/src/mesa/main/drawpix.c
+++ b/src/mesa/main/drawpix.c
@@ -53,10 +53,10 @@ _mesa_DrawPixels( GLsizei width, GLsizei height,
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glDrawPixels(%d, %d, %s, %s, %p) // to %s at %d, %d\n",
width, height,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type),
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type),
pixels,
- _mesa_lookup_enum_by_nr(ctx->DrawBuffer->ColorDrawBuffer[0]),
+ _mesa_enum_to_string(ctx->DrawBuffer->ColorDrawBuffer[0]),
IROUND(ctx->Current.RasterPos[0]),
IROUND(ctx->Current.RasterPos[1]));
@@ -96,8 +96,8 @@ _mesa_DrawPixels( GLsizei width, GLsizei height,
err = _mesa_error_check_format_and_type(ctx, format, type);
if (err != GL_NO_ERROR) {
_mesa_error(ctx, err, "glDrawPixels(invalid format %s and/or type %s)",
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
goto end;
}
@@ -198,9 +198,9 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height,
_mesa_debug(ctx,
"glCopyPixels(%d, %d, %d, %d, %s) // from %s to %s at %d, %d\n",
srcx, srcy, width, height,
- _mesa_lookup_enum_by_nr(type),
- _mesa_lookup_enum_by_nr(ctx->ReadBuffer->ColorReadBuffer),
- _mesa_lookup_enum_by_nr(ctx->DrawBuffer->ColorDrawBuffer[0]),
+ _mesa_enum_to_string(type),
+ _mesa_enum_to_string(ctx->ReadBuffer->ColorReadBuffer),
+ _mesa_enum_to_string(ctx->DrawBuffer->ColorDrawBuffer[0]),
IROUND(ctx->Current.RasterPos[0]),
IROUND(ctx->Current.RasterPos[1]));
@@ -218,7 +218,7 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height,
type != GL_STENCIL &&
type != GL_DEPTH_STENCIL) {
_mesa_error(ctx, GL_INVALID_ENUM, "glCopyPixels(type=%s)",
- _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(type));
return;
}
diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index 9008a386343..42f67990784 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -146,7 +146,7 @@ client_state(struct gl_context *ctx, GLenum cap, GLboolean state)
invalid_enum_error:
_mesa_error(ctx, GL_INVALID_ENUM, "gl%sClientState(%s)",
- state ? "Enable" : "Disable", _mesa_lookup_enum_by_nr(cap));
+ state ? "Enable" : "Disable", _mesa_enum_to_string(cap));
}
@@ -283,7 +283,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "%s %s (newstate is %x)\n",
state ? "glEnable" : "glDisable",
- _mesa_lookup_enum_by_nr(cap),
+ _mesa_enum_to_string(cap),
ctx->NewState);
switch (cap) {
@@ -1001,7 +1001,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
/* ARB_texture_multisample */
case GL_SAMPLE_MASK:
- if (!_mesa_is_desktop_gl(ctx))
+ if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
goto invalid_enum_error;
CHECK_EXTENSION(ARB_texture_multisample, cap);
if (ctx->Multisample.SampleMask == state)
@@ -1022,7 +1022,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
invalid_enum_error:
_mesa_error(ctx, GL_INVALID_ENUM, "gl%s(%s)",
- state ? "Enable" : "Disable", _mesa_lookup_enum_by_nr(cap));
+ state ? "Enable" : "Disable", _mesa_enum_to_string(cap));
}
@@ -1101,7 +1101,7 @@ _mesa_set_enablei(struct gl_context *ctx, GLenum cap,
invalid_enum_error:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(cap=%s)",
state ? "glEnablei" : "glDisablei",
- _mesa_lookup_enum_by_nr(cap));
+ _mesa_enum_to_string(cap));
}
@@ -1143,7 +1143,7 @@ _mesa_IsEnabledi( GLenum cap, GLuint index )
return (ctx->Scissor.EnableFlags >> index) & 1;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glIsEnabledIndexed(cap=%s)",
- _mesa_lookup_enum_by_nr(cap));
+ _mesa_enum_to_string(cap));
return GL_FALSE;
}
}
@@ -1603,7 +1603,7 @@ _mesa_IsEnabled( GLenum cap )
/* ARB_texture_multisample */
case GL_SAMPLE_MASK:
- if (!_mesa_is_desktop_gl(ctx))
+ if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles31(ctx))
goto invalid_enum_error;
CHECK_EXTENSION(ARB_texture_multisample);
return ctx->Multisample.SampleMask;
@@ -1623,6 +1623,6 @@ _mesa_IsEnabled( GLenum cap )
invalid_enum_error:
_mesa_error(ctx, GL_INVALID_ENUM, "glIsEnabled(%s)",
- _mesa_lookup_enum_by_nr(cap));
+ _mesa_enum_to_string(cap));
return GL_FALSE;
}
diff --git a/src/mesa/main/enums.h b/src/mesa/main/enums.h
index 66bdd53bbab..0e18cd407e9 100644
--- a/src/mesa/main/enums.h
+++ b/src/mesa/main/enums.h
@@ -42,7 +42,7 @@ extern "C" {
#endif
-extern const char *_mesa_lookup_enum_by_nr( int nr );
+extern const char *_mesa_enum_to_string( int nr );
/* Get the name of an enum given that it is a primitive type. Avoids
* GL_FALSE/GL_POINTS ambiguity and others.
diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c
index b3406665d94..f720de316e4 100644
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -1314,7 +1314,7 @@ flush_delayed_errors( struct gl_context *ctx )
if (ctx->ErrorDebugCount) {
_mesa_snprintf(s, MAX_DEBUG_MESSAGE_LENGTH, "%d similar %s errors",
ctx->ErrorDebugCount,
- _mesa_lookup_enum_by_nr(ctx->ErrorValue));
+ _mesa_enum_to_string(ctx->ErrorValue));
output_if_debug("Mesa", s, GL_TRUE);
@@ -1503,7 +1503,7 @@ _mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... )
}
len = _mesa_snprintf(s2, MAX_DEBUG_MESSAGE_LENGTH, "%s in %s",
- _mesa_lookup_enum_by_nr(error), s);
+ _mesa_enum_to_string(error), s);
if (len >= MAX_DEBUG_MESSAGE_LENGTH) {
/* Same as above. */
assert(0);
diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h
index 24f234f7f10..81e47a8b8c1 100644
--- a/src/mesa/main/errors.h
+++ b/src/mesa/main/errors.h
@@ -37,6 +37,7 @@
#include <stdio.h>
+#include <stdarg.h>
#include "compiler.h"
#include "glheader.h"
#include "mtypes.h"
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4176a69ed7c..d934d19c3e7 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -121,6 +121,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_framebuffer_object", o(ARB_framebuffer_object), GL, 2005 },
{ "GL_ARB_framebuffer_sRGB", o(EXT_framebuffer_sRGB), GL, 1998 },
{ "GL_ARB_get_program_binary", o(dummy_true), GL, 2010 },
+ { "GL_ARB_get_texture_sub_image", o(dummy_true), GL, 2014 },
{ "GL_ARB_gpu_shader5", o(ARB_gpu_shader5), GLC, 2010 },
{ "GL_ARB_gpu_shader_fp64", o(ARB_gpu_shader_fp64), GLC, 2010 },
{ "GL_ARB_half_float_pixel", o(dummy_true), GL, 2003 },
@@ -154,6 +155,8 @@ static const struct extension extension_table[] = {
{ "GL_ARB_shader_objects", o(dummy_true), GL, 2002 },
{ "GL_ARB_shader_precision", o(ARB_shader_precision), GL, 2010 },
{ "GL_ARB_shader_stencil_export", o(ARB_shader_stencil_export), GL, 2009 },
+ { "GL_ARB_shader_storage_buffer_object", o(ARB_shader_storage_buffer_object), GL, 2012 },
+ { "GL_ARB_shader_subroutine", o(ARB_shader_subroutine), GLC, 2010 },
{ "GL_ARB_shader_texture_lod", o(ARB_shader_texture_lod), GL, 2009 },
{ "GL_ARB_shading_language_100", o(dummy_true), GLL, 2003 },
{ "GL_ARB_shading_language_packing", o(ARB_shading_language_packing), GL, 2011 },
@@ -382,6 +385,9 @@ static const struct extension extension_table[] = {
{ "GL_NV_point_sprite", o(NV_point_sprite), GL, 2001 },
{ "GL_NV_primitive_restart", o(NV_primitive_restart), GLL, 2002 },
{ "GL_NV_read_buffer", o(dummy_true), ES2, 2011 },
+ { "GL_NV_read_depth", o(dummy_true), ES2, 2011 },
+ { "GL_NV_read_depth_stencil", o(dummy_true), ES2, 2011 },
+ { "GL_NV_read_stencil", o(dummy_true), ES2, 2011 },
{ "GL_NV_texgen_reflection", o(dummy_true), GLL, 1999 },
{ "GL_NV_texture_barrier", o(NV_texture_barrier), GL, 2009 },
{ "GL_NV_texture_env_combine4", o(NV_texture_env_combine4), GLL, 1999 },
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index f8dcf122d99..841834030df 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2007,7 +2007,7 @@ renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
baseFormat = _mesa_base_fbo_format(ctx, internalFormat);
if (baseFormat == 0) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(internalFormat=%s)",
- func, _mesa_lookup_enum_by_nr(internalFormat));
+ func, _mesa_enum_to_string(internalFormat));
return;
}
@@ -2095,12 +2095,12 @@ renderbuffer_storage_named(GLuint renderbuffer, GLenum internalFormat,
if (samples == NO_SAMPLES)
_mesa_debug(ctx, "%s(%u, %s, %d, %d)\n",
func, renderbuffer,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(internalFormat),
width, height);
else
_mesa_debug(ctx, "%s(%u, %s, %d, %d, %d)\n",
func, renderbuffer,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(internalFormat),
width, height, samples);
}
@@ -2131,14 +2131,14 @@ renderbuffer_storage_target(GLenum target, GLenum internalFormat,
if (samples == NO_SAMPLES)
_mesa_debug(ctx, "%s(%s, %s, %d, %d)\n",
func,
- _mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(target),
+ _mesa_enum_to_string(internalFormat),
width, height);
else
_mesa_debug(ctx, "%s(%s, %s, %d, %d, %d)\n",
func,
- _mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(target),
+ _mesa_enum_to_string(internalFormat),
width, height, samples);
}
@@ -2311,7 +2311,7 @@ get_render_buffer_parameteriv(struct gl_context *ctx,
/* fallthrough */
default:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname=%s)", func,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return;
}
}
@@ -2694,13 +2694,13 @@ _mesa_CheckFramebufferStatus(GLenum target)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glCheckFramebufferStatus(%s)\n",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
fb = get_framebuffer_target(ctx, target);
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glCheckFramebufferStatus(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return 0;
}
@@ -2732,7 +2732,7 @@ _mesa_CheckNamedFramebufferStatus(GLuint framebuffer, GLenum target)
default:
_mesa_error(ctx, GL_INVALID_ENUM,
"glCheckNamedFramebufferStatus(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return 0;
}
@@ -2851,7 +2851,7 @@ check_layered_texture_target(struct gl_context *ctx, GLenum target,
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(invalid texture target %s)", caller,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return false;
}
@@ -2893,7 +2893,7 @@ check_texture_target(struct gl_context *ctx, GLenum target,
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(invalid texture target %s)", caller,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return false;
}
@@ -2944,8 +2944,9 @@ check_textarget(struct gl_context *ctx, int dims, GLenum target,
break;
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
- err = _mesa_is_gles(ctx)
- || !ctx->Extensions.ARB_texture_multisample;
+ err = (_mesa_is_gles(ctx) ||
+ !ctx->Extensions.ARB_texture_multisample) &&
+ !_mesa_is_gles31(ctx);
break;
default:
err = true;
@@ -2962,7 +2963,7 @@ check_textarget(struct gl_context *ctx, int dims, GLenum target,
if (err) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(invalid textarget %s)",
- caller, _mesa_lookup_enum_by_nr(textarget));
+ caller, _mesa_enum_to_string(textarget));
return false;
}
@@ -3074,7 +3075,7 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct gl_framebuffer *fb,
att = get_attachment(ctx, fb, attachment);
if (att == NULL) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
- _mesa_lookup_enum_by_nr(attachment));
+ _mesa_enum_to_string(attachment));
return;
}
@@ -3157,7 +3158,7 @@ framebuffer_texture_with_dims(int dims, GLenum target,
fb = get_framebuffer_target(ctx, target);
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -3225,7 +3226,7 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum attachment,
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glFramebufferTextureLayer(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -3304,7 +3305,7 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment,
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
- GLboolean layered;
+ GLboolean layered = GL_FALSE;
const char *func = "FramebufferTexture";
@@ -3319,7 +3320,7 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment,
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glFramebufferTexture(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -3347,7 +3348,7 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum attachment,
GET_CURRENT_CONTEXT(ctx);
struct gl_framebuffer *fb;
struct gl_texture_object *texObj;
- GLboolean layered;
+ GLboolean layered = GL_FALSE;
const char *func = "glNamedFramebufferTexture";
@@ -3400,7 +3401,7 @@ _mesa_framebuffer_renderbuffer(struct gl_context *ctx,
if (att == NULL) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(invalid attachment %s)", func,
- _mesa_lookup_enum_by_nr(attachment));
+ _mesa_enum_to_string(attachment));
return;
}
@@ -3440,7 +3441,7 @@ _mesa_FramebufferRenderbuffer(GLenum target, GLenum attachment,
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glFramebufferRenderbuffer(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -3539,7 +3540,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
attachment != GL_DEPTH && attachment != GL_STENCIL) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(invalid attachment %s)", caller,
- _mesa_lookup_enum_by_nr(attachment));
+ _mesa_enum_to_string(attachment));
return;
}
/* the default / window-system FBO */
@@ -3552,7 +3553,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
if (att == NULL) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
- _mesa_lookup_enum_by_nr(attachment));
+ _mesa_enum_to_string(attachment));
return;
}
@@ -3609,7 +3610,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
}
else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
else {
goto invalid_pname_enum;
@@ -3626,7 +3627,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
}
else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
else {
goto invalid_pname_enum;
@@ -3637,7 +3638,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
goto invalid_pname_enum;
} else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
} else if (att->Type == GL_TEXTURE) {
if (att->Texture && (att->Texture->Target == GL_TEXTURE_3D ||
att->Texture->Target == GL_TEXTURE_2D_ARRAY)) {
@@ -3659,7 +3660,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
}
else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
else {
if (ctx->Extensions.EXT_framebuffer_sRGB) {
@@ -3682,7 +3683,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
}
else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
else {
mesa_format format = att->Renderbuffer->Format;
@@ -3734,7 +3735,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
}
else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
else if (att->Texture) {
const struct gl_texture_image *texImage =
@@ -3763,7 +3764,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
*params = att->Layered;
} else if (att->Type == GL_NONE) {
_mesa_error(ctx, err, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
} else {
goto invalid_pname_enum;
}
@@ -3776,7 +3777,7 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,
invalid_pname_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid pname %s)", caller,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return;
}
@@ -3792,7 +3793,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment,
if (!buffer) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetFramebufferAttachmentParameteriv(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4009,7 +4010,7 @@ invalidate_framebuffer_storage(struct gl_context *ctx,
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", name,
- _mesa_lookup_enum_by_nr(attachments[i]));
+ _mesa_enum_to_string(attachments[i]));
return;
}
@@ -4026,7 +4027,7 @@ _mesa_InvalidateSubFramebuffer(GLenum target, GLsizei numAttachments,
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glInvalidateSubFramebuffer(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4076,7 +4077,7 @@ _mesa_InvalidateFramebuffer(GLenum target, GLsizei numAttachments,
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glInvalidateFramebuffer(invalid target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4152,7 +4153,7 @@ _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments,
if (!fb) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glDiscardFramebufferEXT(target %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4189,5 +4190,5 @@ _mesa_DiscardFramebufferEXT(GLenum target, GLsizei numAttachments,
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM,
"glDiscardFramebufferEXT(attachment %s)",
- _mesa_lookup_enum_by_nr(attachments[i]));
+ _mesa_enum_to_string(attachments[i]));
}
diff --git a/src/mesa/main/feedback.c b/src/mesa/main/feedback.c
index 6bc4294f9c7..699e2a855a3 100644
--- a/src/mesa/main/feedback.c
+++ b/src/mesa/main/feedback.c
@@ -415,7 +415,7 @@ _mesa_RenderMode( GLenum mode )
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glRenderMode %s\n", _mesa_lookup_enum_by_nr(mode));
+ _mesa_debug(ctx, "glRenderMode %s\n", _mesa_enum_to_string(mode));
FLUSH_VERTICES(ctx, _NEW_RENDERMODE);
diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c
index 70adaf88551..95b428dca3e 100644
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -189,15 +189,15 @@ static void make_state_key( struct gl_context *ctx, struct state_key *key )
if (light->Enabled) {
key->unit[i].light_enabled = 1;
- if (light->EyePosition[3] == 0.0)
+ if (light->EyePosition[3] == 0.0F)
key->unit[i].light_eyepos3_is_zero = 1;
- if (light->SpotCutoff == 180.0)
+ if (light->SpotCutoff == 180.0F)
key->unit[i].light_spotcutoff_is_180 = 1;
- if (light->ConstantAttenuation != 1.0 ||
- light->LinearAttenuation != 0.0 ||
- light->QuadraticAttenuation != 0.0)
+ if (light->ConstantAttenuation != 1.0F ||
+ light->LinearAttenuation != 0.0F ||
+ light->QuadraticAttenuation != 0.0F)
key->unit[i].light_attenuated = 1;
}
}
diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
index 3bce289e785..45f343d61c8 100644
--- a/src/mesa/main/fog.c
+++ b/src/mesa/main/fog.c
@@ -115,7 +115,7 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
ctx->Fog.Mode = m;
break;
case GL_FOG_DENSITY:
- if (*params<0.0) {
+ if (*params<0.0F) {
_mesa_error( ctx, GL_INVALID_VALUE, "glFog" );
return;
}
diff --git a/src/mesa/main/format_parser.py b/src/mesa/main/format_parser.py
index 11184f78e2c..799b14f0b1c 100755
--- a/src/mesa/main/format_parser.py
+++ b/src/mesa/main/format_parser.py
@@ -40,9 +40,6 @@ SRGB = 'srgb'
YUV = 'yuv'
ZS = 'zs'
-def is_power_of_two(x):
- return not bool(x & (x - 1))
-
VERY_LARGE = 99999999999999999999999
class Channel:
@@ -100,10 +97,6 @@ class Channel:
else:
return 1
- def is_power_of_two(self):
- """Returns true if the size of this channel is a power of two."""
- return is_power_of_two(self.size)
-
def datatype(self):
"""Returns the datatype corresponding to a channel type and size"""
return _get_datatype(self.type, self.size)
diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h
index 7f500ec78da..618f43d0aaa 100644
--- a/src/mesa/main/format_utils.h
+++ b/src/mesa/main/format_utils.h
@@ -33,6 +33,7 @@
#include "imports.h"
#include "macros.h"
+#include "util/rounding.h"
extern const mesa_array_format RGBA32_FLOAT;
extern const mesa_array_format RGBA8_UBYTE;
@@ -84,7 +85,7 @@ _mesa_float_to_unorm(float x, unsigned dst_bits)
else if (x > 1.0f)
return MAX_UINT(dst_bits);
else
- return F_TO_I(x * MAX_UINT(dst_bits));
+ return _mesa_lroundevenf(x * MAX_UINT(dst_bits));
}
static inline unsigned
@@ -98,7 +99,7 @@ _mesa_unorm_to_unorm(unsigned x, unsigned src_bits, unsigned dst_bits)
{
if (src_bits < dst_bits) {
return EXTEND_NORMALIZED_INT(x, src_bits, dst_bits);
- } else {
+ } else if (src_bits > dst_bits) {
unsigned src_half = (1 << (src_bits - 1)) - 1;
if (src_bits + dst_bits > sizeof(x) * 8) {
@@ -108,6 +109,8 @@ _mesa_unorm_to_unorm(unsigned x, unsigned src_bits, unsigned dst_bits)
} else {
return (x * MAX_UINT(dst_bits) + src_half) / MAX_UINT(src_bits);
}
+ } else {
+ return x;
}
}
@@ -128,7 +131,7 @@ _mesa_float_to_snorm(float x, unsigned dst_bits)
else if (x > 1.0f)
return MAX_INT(dst_bits);
else
- return F_TO_I(x * MAX_INT(dst_bits));
+ return _mesa_lroundevenf(x * MAX_INT(dst_bits));
}
static inline int
diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 7741cabada1..85f7b6b5664 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -74,13 +74,15 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
/* These enums are only valid if ARB_texture_multisample is supported */
- if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample)
+ if ((_mesa_is_desktop_gl(ctx) &&
+ ctx->Extensions.ARB_texture_multisample) ||
+ _mesa_is_gles31(ctx))
break;
default:
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetInternalformativ(target=%s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -107,7 +109,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
_mesa_base_fbo_format(ctx, internalformat) == 0) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetInternalformativ(internalformat=%s)",
- _mesa_lookup_enum_by_nr(internalformat));
+ _mesa_enum_to_string(internalformat));
return;
}
@@ -119,7 +121,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
if (bufSize < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glGetInternalformativ(target=%s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -168,7 +170,7 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
default:
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetInternalformativ(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return;
}
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index baeb1bfe5de..d7b2bae59e7 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -354,14 +354,22 @@ _mesa_array_format_flip_channels(mesa_array_format format)
return format;
if (num_channels == 2) {
- _mesa_array_format_set_swizzle(&format, swizzle[1], swizzle[0],
- swizzle[2], swizzle[3]);
+ /* Assert that the swizzle makes sense for 2 channels */
+ for (unsigned i = 0; i < 4; i++)
+ assert(swizzle[i] != 2 && swizzle[i] != 3);
+
+ static const uint8_t flip_xy[6] = { 1, 0, 2, 3, 4, 5 };
+ _mesa_array_format_set_swizzle(&format,
+ flip_xy[swizzle[0]], flip_xy[swizzle[1]],
+ flip_xy[swizzle[2]], flip_xy[swizzle[3]]);
return format;
}
if (num_channels == 4) {
- _mesa_array_format_set_swizzle(&format, swizzle[3], swizzle[2],
- swizzle[1], swizzle[0]);
+ static const uint8_t flip[6] = { 3, 2, 1, 0, 4, 5 };
+ _mesa_array_format_set_swizzle(&format,
+ flip[swizzle[0]], flip[swizzle[1]],
+ flip[swizzle[2]], flip[swizzle[3]]);
return format;
}
@@ -372,10 +380,11 @@ uint32_t
_mesa_format_to_array_format(mesa_format format)
{
const struct gl_format_info *info = _mesa_get_format_info(format);
- if (_mesa_little_endian())
- return info->ArrayFormat;
- else
+ if (info->ArrayFormat && !_mesa_little_endian() &&
+ info->Layout == MESA_FORMAT_LAYOUT_PACKED)
return _mesa_array_format_flip_channels(info->ArrayFormat);
+ else
+ return info->ArrayFormat;
}
static struct hash_table *format_array_format_table;
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index 7e451caf0ff..d938e6ad513 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -191,6 +191,11 @@ static inline void
_mesa_array_format_set_swizzle(mesa_array_format *f,
int32_t x, int32_t y, int32_t z, int32_t w)
{
+ *f &= ~(MESA_ARRAY_FORMAT_SWIZZLE_X_MASK |
+ MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK |
+ MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK |
+ MESA_ARRAY_FORMAT_SWIZZLE_W_MASK);
+
*f |= ((x << 8 ) & MESA_ARRAY_FORMAT_SWIZZLE_X_MASK) |
((y << 11) & MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK) |
((z << 14) & MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK) |
diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index 77c04b8dab8..37e2c29c89c 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -938,7 +938,7 @@ _mesa_print_framebuffer(const struct gl_framebuffer *fb)
fprintf(stderr, "Mesa Framebuffer %u at %p\n", fb->Name, (void *) fb);
fprintf(stderr, " Size: %u x %u Status: %s\n", fb->Width, fb->Height,
- _mesa_lookup_enum_by_nr(fb->_Status));
+ _mesa_enum_to_string(fb->_Status));
fprintf(stderr, " Attachments:\n");
for (i = 0; i < BUFFER_COUNT; i++) {
diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
index 9aef090194e..c18f9d5223f 100644
--- a/src/mesa/main/genmipmap.c
+++ b/src/mesa/main/genmipmap.c
@@ -83,7 +83,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
if (error) {
_mesa_error(ctx, GL_INVALID_ENUM, "glGenerate%sMipmap(target=%s)",
- suffix, _mesa_lookup_enum_by_nr(target));
+ suffix, _mesa_enum_to_string(target));
return;
}
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 3d6d63916b3..307a5ffbd1c 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -149,6 +149,8 @@ enum value_extra {
EXTRA_EXT_UBO_GS4,
EXTRA_EXT_ATOMICS_GS4,
EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_ATOMICS_TESS,
+ EXTRA_EXT_SHADER_IMAGE_TESS,
};
#define NO_EXTRA NULL
@@ -349,12 +351,58 @@ static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = {
EXTRA_END
};
+static const int extra_ARB_shader_atomic_counters_and_tessellation[] = {
+ EXTRA_EXT_ATOMICS_TESS,
+ EXTRA_END
+};
+
+static const int extra_ARB_shader_image_load_store_and_tessellation[] = {
+ EXTRA_EXT_SHADER_IMAGE_TESS,
+ EXTRA_END
+};
+
static const int extra_ARB_draw_indirect_es31[] = {
EXT(ARB_draw_indirect),
EXTRA_API_ES31,
EXTRA_END
};
+static const int extra_ARB_shader_image_load_store_es31[] = {
+ EXT(ARB_shader_image_load_store),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
+static const int extra_ARB_shader_atomic_counters_es31[] = {
+ EXT(ARB_shader_atomic_counters),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
+static const int extra_ARB_texture_multisample_es31[] = {
+ EXT(ARB_texture_multisample),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
+static const int extra_ARB_texture_gather_es31[] = {
+ EXT(ARB_texture_gather),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
+static const int extra_ARB_compute_shader_es31[] = {
+ EXT(ARB_compute_shader),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
+static const int extra_ARB_explicit_uniform_location_es31[] = {
+ EXT(ARB_explicit_uniform_location),
+ EXTRA_API_ES31,
+ EXTRA_END
+};
+
EXTRA_EXT(ARB_texture_cube_map);
EXTRA_EXT(EXT_texture_array);
EXTRA_EXT(NV_fog_distance);
@@ -401,6 +449,8 @@ EXTRA_EXT(ARB_explicit_uniform_location);
EXTRA_EXT(ARB_clip_control);
EXTRA_EXT(EXT_polygon_offset_clamp);
EXTRA_EXT(ARB_framebuffer_no_attachments);
+EXTRA_EXT(ARB_tessellation_shader);
+EXTRA_EXT(ARB_shader_subroutine);
static const int
extra_ARB_color_buffer_float_or_glcore[] = {
@@ -626,7 +676,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
break;
case GL_EDGE_FLAG:
- v->value_bool = ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0] == 1.0;
+ v->value_bool = ctx->Current.Attrib[VERT_ATTRIB_EDGEFLAG][0] == 1.0F;
break;
case GL_READ_BUFFER:
@@ -1149,6 +1199,16 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d
api_found = (ctx->Extensions.ARB_shader_image_load_store &&
_mesa_has_geometry_shaders(ctx));
break;
+ case EXTRA_EXT_ATOMICS_TESS:
+ api_check = GL_TRUE;
+ api_found = ctx->Extensions.ARB_shader_atomic_counters &&
+ _mesa_has_tessellation(ctx);
+ break;
+ case EXTRA_EXT_SHADER_IMAGE_TESS:
+ api_check = GL_TRUE;
+ api_found = ctx->Extensions.ARB_shader_image_load_store &&
+ _mesa_has_tessellation(ctx);
+ break;
case EXTRA_END:
break;
default: /* *e is a offset into the extension struct */
@@ -1161,7 +1221,7 @@ check_extra(struct gl_context *ctx, const char *func, const struct value_desc *d
if (api_check && !api_found) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
- _mesa_lookup_enum_by_nr(d->pname));
+ _mesa_enum_to_string(d->pname));
return GL_FALSE;
}
@@ -1208,10 +1268,13 @@ find_value(const char *func, GLenum pname, void **p, union value *v)
* value since it's compatible with GLES2 its entry in table_set[] is at the
* end.
*/
- STATIC_ASSERT(ARRAY_SIZE(table_set) == API_OPENGL_LAST + 2);
+ STATIC_ASSERT(ARRAY_SIZE(table_set) == API_OPENGL_LAST + 3);
if (_mesa_is_gles3(ctx)) {
api = API_OPENGL_LAST + 1;
}
+ if (_mesa_is_gles31(ctx)) {
+ api = API_OPENGL_LAST + 2;
+ }
mask = ARRAY_SIZE(table(api)) - 1;
hash = (pname * prime_factor);
while (1) {
@@ -1222,7 +1285,7 @@ find_value(const char *func, GLenum pname, void **p, union value *v)
* any valid enum. */
if (unlikely(idx == 0)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return &error_value;
}
@@ -2004,11 +2067,11 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return TYPE_INVALID;
invalid_value:
_mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=%s)", func,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return TYPE_INVALID;
}
diff --git a/src/mesa/main/get_hash_generator.py b/src/mesa/main/get_hash_generator.py
index b200d197341..c777b782442 100644
--- a/src/mesa/main/get_hash_generator.py
+++ b/src/mesa/main/get_hash_generator.py
@@ -44,7 +44,7 @@ prime_factor = 89
prime_step = 281
hash_table_size = 1024
-gl_apis=set(["GL", "GL_CORE", "GLES", "GLES2", "GLES3"])
+gl_apis=set(["GL", "GL_CORE", "GLES", "GLES2", "GLES3", "GLES31"])
def print_header():
print "typedef const unsigned short table_t[%d];\n" % (hash_table_size)
@@ -68,6 +68,7 @@ api_enum = [
'GLES2',
'GL_CORE',
'GLES3', # Not in gl_api enum in mtypes.h
+ 'GLES31', # Not in gl_api enum in mtypes.h
]
def api_index(api):
@@ -167,10 +168,13 @@ def generate_hash_tables(enum_list, enabled_apis, param_descriptors):
for api in valid_apis:
add_to_hash_table(tables[api], hash_val, len(params))
- # Also add GLES2 items to the GLES3 hash table
+ # Also add GLES2 items to the GLES3 and GLES31 hash table
if api == "GLES2":
add_to_hash_table(tables["GLES3"], hash_val, len(params))
-
+ add_to_hash_table(tables["GLES31"], hash_val, len(params))
+ # Also add GLES3 items to the GLES31 hash table
+ if api == "GLES3":
+ add_to_hash_table(tables["GLES31"], hash_val, len(params))
params.append(["GL_" + enum_name, param[1]])
sorted_tables={}
@@ -206,7 +210,7 @@ if __name__ == '__main__':
die("missing descriptor file (-f)\n")
# generate the code for all APIs
- enabled_apis = set(["GLES", "GLES2", "GLES3", "GL", "GL_CORE"])
+ enabled_apis = set(["GLES", "GLES2", "GLES3", "GLES31", "GL", "GL_CORE"])
try:
api_desc = gl_XML.parse_GL_API(api_desc_file)
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index 74ff3ba6619..7dc92f10100 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -351,6 +351,9 @@ descriptor=[
# GL_ARB_framebuffer_object
[ "MAX_SAMPLES", "CONTEXT_INT(Const.MaxSamples), extra_ARB_framebuffer_object_EXT_framebuffer_multisample" ],
+# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
+ [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
+
# GL_ARB_sync
[ "MAX_SERVER_WAIT_TIMEOUT", "CONTEXT_INT64(Const.MaxServerWaitTimeout), extra_ARB_sync" ],
@@ -404,9 +407,49 @@ descriptor=[
[ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_OES_EGL_image_external" ],
]},
-{ "apis": ["GL", "GL_CORE", "GLES3"], "params": [
-# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
- [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
+# Enums in OpenGL and ES 3.1
+{ "apis": ["GL", "GL_CORE", "GLES31"], "params": [
+# GL_ARB_shader_image_load_store / GLES 3.1
+ [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store_es31" ],
+ [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ],
+ [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store_es31" ],
+ [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store_es31" ],
+
+# GL_ARB_shader_atomic_counters / GLES 3.1
+ [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters_es31" ],
+ [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters_es31" ],
+
+# GL_ARB_texture_multisample / GLES 3.1
+ [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample_es31" ],
+ [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample_es31" ],
+ [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample_es31" ],
+ [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample_es31" ],
+ [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample_es31" ],
+ [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample_es31" ],
+
+# GL_ARB_texture_gather / GLES 3.1
+ [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather_es31"],
+ [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather_es31"],
+
+# GL_ARB_compute_shader / GLES 3.1
+ [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
+
+# GL_ARB_explicit_uniform_location / GLES 3.1
+ [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location_es31" ],
]},
# Enums in OpenGL Core profile and ES 3.1
@@ -498,7 +541,6 @@ descriptor=[
[ "MAX_LIST_NESTING", "CONST(MAX_LIST_NESTING), NO_EXTRA" ],
[ "MAX_NAME_STACK_DEPTH", "CONST(MAX_NAME_STACK_DEPTH), NO_EXTRA" ],
[ "MAX_PIXEL_MAP_TABLE", "CONST(MAX_PIXEL_MAP_TABLE), NO_EXTRA" ],
- [ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
[ "NAME_STACK_DEPTH", "CONTEXT_INT(Select.NameStackDepth), NO_EXTRA" ],
[ "PACK_LSB_FIRST", "CONTEXT_BOOL(Pack.LsbFirst), NO_EXTRA" ],
[ "PACK_SWAP_BYTES", "CONTEXT_BOOL(Pack.SwapBytes), NO_EXTRA" ],
@@ -699,13 +741,7 @@ descriptor=[
[ "TEXTURE_BUFFER_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_texture_buffer_object" ],
# GL_ARB_texture_multisample / GL 3.2
- [ "TEXTURE_BINDING_2D_MULTISAMPLE", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_INDEX, extra_ARB_texture_multisample" ],
[ "TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY", "LOC_CUSTOM, TYPE_INT, TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX, extra_ARB_texture_multisample" ],
- [ "MAX_COLOR_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxColorTextureSamples), extra_ARB_texture_multisample" ],
- [ "MAX_DEPTH_TEXTURE_SAMPLES", "CONTEXT_INT(Const.MaxDepthTextureSamples), extra_ARB_texture_multisample" ],
- [ "MAX_INTEGER_SAMPLES", "CONTEXT_INT(Const.MaxIntegerSamples), extra_ARB_texture_multisample" ],
- [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ],
- [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ],
# GL 3.0
[ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ],
@@ -756,48 +792,23 @@ descriptor=[
[ "TEXTURE_BINDING_CUBE_MAP_ARRAY_ARB", "LOC_CUSTOM, TYPE_INT, TEXTURE_CUBE_ARRAY_INDEX, extra_ARB_texture_cube_map_array" ],
# GL_ARB_texture_gather
- [ "MIN_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MinProgramTextureGatherOffset), extra_ARB_texture_gather"],
- [ "MAX_PROGRAM_TEXTURE_GATHER_OFFSET", "CONTEXT_INT(Const.MaxProgramTextureGatherOffset), extra_ARB_texture_gather"],
[ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
# GL_ARB_separate_shader_objects
[ "PROGRAM_PIPELINE_BINDING", "LOC_CUSTOM, TYPE_INT, GL_PROGRAM_PIPELINE_BINDING, NO_EXTRA" ],
# GL_ARB_shader_atomic_counters
- [ "ATOMIC_COUNTER_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_shader_atomic_counters" ],
- [ "MAX_ATOMIC_COUNTER_BUFFER_BINDINGS", "CONTEXT_INT(Const.MaxAtomicBufferBindings), extra_ARB_shader_atomic_counters" ],
- [ "MAX_ATOMIC_COUNTER_BUFFER_SIZE", "CONTEXT_INT(Const.MaxAtomicBufferSize), extra_ARB_shader_atomic_counters" ],
- [ "MAX_VERTEX_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ],
- [ "MAX_VERTEX_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ],
- [ "MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers), extra_ARB_shader_atomic_counters" ],
- [ "MAX_FRAGMENT_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters), extra_ARB_shader_atomic_counters" ],
[ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
[ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
- [ "MAX_COMBINED_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.MaxCombinedAtomicBuffers), extra_ARB_shader_atomic_counters" ],
- [ "MAX_COMBINED_ATOMIC_COUNTERS", "CONTEXT_INT(Const.MaxCombinedAtomicCounters), extra_ARB_shader_atomic_counters" ],
# GL_ARB_vertex_attrib_binding
[ "MAX_VERTEX_ATTRIB_RELATIVE_OFFSET", "CONTEXT_ENUM(Const.MaxVertexAttribRelativeOffset), NO_EXTRA" ],
[ "MAX_VERTEX_ATTRIB_BINDINGS", "CONTEXT_ENUM(Const.MaxVertexAttribBindings), NO_EXTRA" ],
# GL_ARB_shader_image_load_store
- [ "MAX_IMAGE_UNITS", "CONTEXT_INT(Const.MaxImageUnits), extra_ARB_shader_image_load_store"],
- [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedImageUnitsAndFragmentOutputs), extra_ARB_shader_image_load_store"],
- [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store"],
- [ "MAX_VERTEX_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms), extra_ARB_shader_image_load_store"],
+ [ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedImageUnitsAndFragmentOutputs), extra_ARB_shader_image_load_store" ],
+ [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
[ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
- [ "MAX_FRAGMENT_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms), extra_ARB_shader_image_load_store"],
- [ "MAX_COMBINED_IMAGE_UNIFORMS", "CONTEXT_INT(Const.MaxCombinedImageUniforms), extra_ARB_shader_image_load_store"],
-
-# GL_ARB_compute_shader
- [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader" ],
- [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader" ],
# GL_ARB_framebuffer_no_attachments
["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
@@ -826,6 +837,38 @@ descriptor=[
[ "MIN_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MinFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
[ "MAX_FRAGMENT_INTERPOLATION_OFFSET", "CONTEXT_FLOAT(Const.MaxFragmentInterpolationOffset), extra_ARB_gpu_shader5" ],
[ "FRAGMENT_INTERPOLATION_OFFSET_BITS", "CONST(FRAGMENT_INTERPOLATION_OFFSET_BITS), extra_ARB_gpu_shader5" ],
+
+# GL_ARB_tessellation_shader
+ [ "PATCH_VERTICES", "CONTEXT_INT(TessCtrlProgram.patch_vertices), extra_ARB_tessellation_shader" ],
+ [ "PATCH_DEFAULT_OUTER_LEVEL", "CONTEXT_FLOAT4(TessCtrlProgram.patch_default_outer_level), extra_ARB_tessellation_shader" ],
+ [ "PATCH_DEFAULT_INNER_LEVEL", "CONTEXT_FLOAT2(TessCtrlProgram.patch_default_inner_level), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_GEN_LEVEL", "CONTEXT_INT(Const.MaxTessGenLevel), extra_ARB_tessellation_shader" ],
+ [ "MAX_PATCH_VERTICES", "CONTEXT_INT(Const.MaxPatchVertices), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_CONTROL_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_EVALUATION_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_CONTROL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_PATCH_COMPONENTS", "CONTEXT_INT(Const.MaxTessPatchComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxTessControlTotalOutputComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_EVALUATION_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_CONTROL_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_EVALUATION_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_CONTROL_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks), extra_ARB_tessellation_shader" ],
+ [ "MAX_TESS_EVALUATION_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks), extra_ARB_tessellation_shader" ],
+ [ "MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxCombinedUniformComponents), extra_ARB_tessellation_shader" ],
+ [ "MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxCombinedUniformComponents), extra_ARB_tessellation_shader" ],
+# Dependencies on GL_ARB_tessellation_shader
+ [ "MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_tessellation" ],
+ [ "MAX_TESS_CONTROL_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_tessellation" ],
+ [ "MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_tessellation" ],
+ [ "MAX_TESS_EVALUATION_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_tessellation" ],
+ [ "MAX_TESS_CONTROL_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms), extra_ARB_shader_image_load_store_and_tessellation"],
+ [ "MAX_TESS_EVALUATION_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms), extra_ARB_shader_image_load_store_and_tessellation"],
+
+# GL_ARB_shader_subroutine
+ [ "MAX_SUBROUTINES", "CONST(MAX_SUBROUTINES), extra_ARB_shader_subroutine" ],
+ [ "MAX_SUBROUTINE_UNIFORM_LOCATIONS", "CONST(MAX_SUBROUTINE_UNIFORM_LOCATIONS), extra_ARB_shader_subroutine" ],
]}
]
diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 72d99ca4e22..9873fdbf1a4 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -208,7 +208,7 @@ _mesa_GetPointerv( GLenum pname, GLvoid **params )
return;
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_lookup_enum_by_nr(pname));
+ _mesa_debug(ctx, "glGetPointerv %s\n", _mesa_enum_to_string(pname));
switch (pname) {
case GL_VERTEX_ARRAY_POINTER:
@@ -299,7 +299,7 @@ _mesa_GetError( void )
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetError <-- %s\n", _mesa_lookup_enum_by_nr(e));
+ _mesa_debug(ctx, "glGetError <-- %s\n", _mesa_enum_to_string(e));
ctx->ErrorValue = (GLenum) GL_NO_ERROR;
ctx->ErrorDebugCount = 0;
diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
index ac69fabccaa..3eb66dab7f8 100644
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -186,7 +186,7 @@ get_map_idx(GLenum value)
return IDX_RG;
default:
_mesa_problem(NULL, "Unexpected inFormat %s",
- _mesa_lookup_enum_by_nr(value));
+ _mesa_enum_to_string(value));
return 0;
}
}
@@ -216,8 +216,8 @@ _mesa_compute_component_mapping(GLenum inFormat, GLenum outFormat, GLubyte *map)
#if 0
printf("from %x/%s to %x/%s map %d %d %d %d %d %d\n",
- inFormat, _mesa_lookup_enum_by_nr(inFormat),
- outFormat, _mesa_lookup_enum_by_nr(outFormat),
+ inFormat, _mesa_enum_to_string(inFormat),
+ outFormat, _mesa_enum_to_string(outFormat),
map[0],
map[1],
map[2],
@@ -1278,9 +1278,53 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format)
}
}
+/**
+ * Convert various unpack formats to the corresponding base format.
+ */
+GLenum
+_mesa_unpack_format_to_base_format(GLenum format)
+{
+ switch(format) {
+ case GL_RED_INTEGER:
+ return GL_RED;
+ case GL_GREEN_INTEGER:
+ return GL_GREEN;
+ case GL_BLUE_INTEGER:
+ return GL_BLUE;
+ case GL_ALPHA_INTEGER:
+ return GL_ALPHA;
+ case GL_RG_INTEGER:
+ return GL_RG;
+ case GL_RGB_INTEGER:
+ return GL_RGB;
+ case GL_RGBA_INTEGER:
+ return GL_RGBA;
+ case GL_BGR_INTEGER:
+ return GL_BGR;
+ case GL_BGRA_INTEGER:
+ return GL_BGRA;
+ case GL_LUMINANCE_INTEGER_EXT:
+ return GL_LUMINANCE;
+ case GL_LUMINANCE_ALPHA_INTEGER_EXT:
+ return GL_LUMINANCE_ALPHA;
+ case GL_RED:
+ case GL_GREEN:
+ case GL_BLUE:
+ case GL_RG:
+ case GL_RGB:
+ case GL_RGBA:
+ case GL_BGR:
+ case GL_BGRA:
+ case GL_ALPHA:
+ case GL_LUMINANCE:
+ case GL_LUMINANCE_ALPHA:
+ default:
+ return format;
+ }
+}
/**
- * Convert various base formats to the cooresponding integer format.
+ * Convert various base formats to the corresponding integer format.
*/
GLenum
_mesa_base_format_to_integer_format(GLenum format)
@@ -2605,8 +2649,6 @@ get_swizzle_from_gl_format(GLenum format, uint8_t *swizzle)
uint32_t
_mesa_format_from_format_and_type(GLenum format, GLenum type)
{
- mesa_array_format array_format;
-
bool is_array_format = true;
uint8_t swizzle[4];
bool normalized = false, is_float = false, is_signed = false;
@@ -2662,15 +2704,9 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
normalized = !_mesa_is_enum_format_integer(format);
num_channels = _mesa_components_in_format(format);
- array_format =
- MESA_ARRAY_FORMAT(type_size, is_signed, is_float,
- normalized, num_channels,
- swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-
- if (!_mesa_little_endian())
- array_format = _mesa_array_format_flip_channels(array_format);
-
- return array_format;
+ return MESA_ARRAY_FORMAT(type_size, is_signed, is_float,
+ normalized, num_channels,
+ swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
}
/* Otherwise this is not an array format, so return the mesa_format
diff --git a/src/mesa/main/glformats.h b/src/mesa/main/glformats.h
index 8881cb7d86b..419955a6033 100644
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -101,6 +101,9 @@ _mesa_is_compressed_format(const struct gl_context *ctx, GLenum format);
extern GLenum
_mesa_base_format_to_integer_format(GLenum format);
+extern GLenum
+_mesa_unpack_format_to_base_format(GLenum format);
+
extern GLboolean
_mesa_base_format_has_channel(GLenum base_format, GLenum pname);
diff --git a/src/mesa/main/hint.c b/src/mesa/main/hint.c
index 3e056ebaf13..984239a7276 100644
--- a/src/mesa/main/hint.c
+++ b/src/mesa/main/hint.c
@@ -40,8 +40,8 @@ _mesa_Hint( GLenum target, GLenum mode )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glHint %s %s\n",
- _mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(mode));
+ _mesa_enum_to_string(target),
+ _mesa_enum_to_string(mode));
if (mode != GL_NICEST && mode != GL_FASTEST && mode != GL_DONT_CARE) {
_mesa_error(ctx, GL_INVALID_ENUM, "glHint(mode)");
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index 68c7316575c..350e6752c8b 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -369,7 +369,7 @@ _mesa_float_to_half(float val)
* or normal.
*/
e = 0;
- m = (int) _mesa_roundevenf((1 << 24) * fabsf(fi.f));
+ m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
}
else if (new_exp > 15) {
/* map this value to infinity */
@@ -383,7 +383,7 @@ _mesa_float_to_half(float val)
* either normal or infinite.
*/
e = new_exp + 15;
- m = (int) _mesa_roundevenf(flt_m / (float) (1 << 13));
+ m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
}
}
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 9ffe3decd0f..d61279ac4e5 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -170,34 +170,6 @@ static inline int IROUND_POS(float f)
return (int) (f + 0.5F);
}
-#ifdef __x86_64__
-# include <xmmintrin.h>
-#endif
-
-/**
- * Convert float to int using a fast method. The rounding mode may vary.
- */
-static inline int F_TO_I(float f)
-{
-#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
- int r;
- __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
- return r;
-#elif defined(USE_X86_ASM) && defined(_MSC_VER)
- int r;
- _asm {
- fld f
- fistp r
- }
- return r;
-#elif defined(__x86_64__)
- return _mm_cvt_ss2si(_mm_load_ss(&f));
-#else
- return IROUND(f);
-#endif
-}
-
-
/** Return (as an integer) floor of float */
static inline int IFLOOR(float f)
{
diff --git a/src/mesa/main/light.c b/src/mesa/main/light.c
index 4021dbef922..14b4b04162b 100644
--- a/src/mesa/main/light.c
+++ b/src/mesa/main/light.c
@@ -42,16 +42,16 @@ _mesa_ShadeModel( GLenum mode )
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glShadeModel %s\n", _mesa_lookup_enum_by_nr(mode));
+ _mesa_debug(ctx, "glShadeModel %s\n", _mesa_enum_to_string(mode));
+
+ if (ctx->Light.ShadeModel == mode)
+ return;
if (mode != GL_FLAT && mode != GL_SMOOTH) {
_mesa_error(ctx, GL_INVALID_ENUM, "glShadeModel");
return;
}
- if (ctx->Light.ShadeModel == mode)
- return;
-
FLUSH_VERTICES(ctx, _NEW_LIGHT);
ctx->Light.ShadeModel = mode;
@@ -143,7 +143,7 @@ _mesa_light(struct gl_context *ctx, GLuint lnum, GLenum pname, const GLfloat *pa
COPY_3V(light->SpotDirection, params);
break;
case GL_SPOT_EXPONENT:
- assert(params[0] >= 0.0);
+ assert(params[0] >= 0.0F);
assert(params[0] <= ctx->Const.MaxSpotExponent);
if (light->SpotExponent == params[0])
return;
@@ -151,12 +151,12 @@ _mesa_light(struct gl_context *ctx, GLuint lnum, GLenum pname, const GLfloat *pa
light->SpotExponent = params[0];
break;
case GL_SPOT_CUTOFF:
- assert(params[0] == 180.0 || (params[0] >= 0.0 && params[0] <= 90.0));
+ assert(params[0] == 180.0F || (params[0] >= 0.0F && params[0] <= 90.0F));
if (light->SpotCutoff == params[0])
return;
FLUSH_VERTICES(ctx, _NEW_LIGHT);
light->SpotCutoff = params[0];
- light->_CosCutoff = (GLfloat) (cos(light->SpotCutoff * M_PI / 180.0));
+ light->_CosCutoff = (cosf(light->SpotCutoff * M_PI / 180.0));
if (light->_CosCutoff < 0)
light->_CosCutoff = 0;
if (light->SpotCutoff != 180.0F)
@@ -165,21 +165,21 @@ _mesa_light(struct gl_context *ctx, GLuint lnum, GLenum pname, const GLfloat *pa
light->_Flags &= ~LIGHT_SPOT;
break;
case GL_CONSTANT_ATTENUATION:
- assert(params[0] >= 0.0);
+ assert(params[0] >= 0.0F);
if (light->ConstantAttenuation == params[0])
return;
FLUSH_VERTICES(ctx, _NEW_LIGHT);
light->ConstantAttenuation = params[0];
break;
case GL_LINEAR_ATTENUATION:
- assert(params[0] >= 0.0);
+ assert(params[0] >= 0.0F);
if (light->LinearAttenuation == params[0])
return;
FLUSH_VERTICES(ctx, _NEW_LIGHT);
light->LinearAttenuation = params[0];
break;
case GL_QUADRATIC_ATTENUATION:
- assert(params[0] >= 0.0);
+ assert(params[0] >= 0.0F);
if (light->QuadraticAttenuation == params[0])
return;
FLUSH_VERTICES(ctx, _NEW_LIGHT);
@@ -238,31 +238,31 @@ _mesa_Lightfv( GLenum light, GLenum pname, const GLfloat *params )
params = temp;
break;
case GL_SPOT_EXPONENT:
- if (params[0] < 0.0 || params[0] > ctx->Const.MaxSpotExponent) {
+ if (params[0] < 0.0F || params[0] > ctx->Const.MaxSpotExponent) {
_mesa_error(ctx, GL_INVALID_VALUE, "glLight");
return;
}
break;
case GL_SPOT_CUTOFF:
- if ((params[0] < 0.0 || params[0] > 90.0) && params[0] != 180.0) {
+ if ((params[0] < 0.0F || params[0] > 90.0F) && params[0] != 180.0F) {
_mesa_error(ctx, GL_INVALID_VALUE, "glLight");
return;
}
break;
case GL_CONSTANT_ATTENUATION:
- if (params[0] < 0.0) {
+ if (params[0] < 0.0F) {
_mesa_error(ctx, GL_INVALID_VALUE, "glLight");
return;
}
break;
case GL_LINEAR_ATTENUATION:
- if (params[0] < 0.0) {
+ if (params[0] < 0.0F) {
_mesa_error(ctx, GL_INVALID_VALUE, "glLight");
return;
}
break;
case GL_QUADRATIC_ATTENUATION:
- if (params[0] < 0.0) {
+ if (params[0] < 0.0F) {
_mesa_error(ctx, GL_INVALID_VALUE, "glLight");
return;
}
@@ -463,14 +463,14 @@ _mesa_LightModelfv( GLenum pname, const GLfloat *params )
case GL_LIGHT_MODEL_LOCAL_VIEWER:
if (ctx->API != API_OPENGL_COMPAT)
goto invalid_pname;
- newbool = (params[0]!=0.0);
+ newbool = (params[0] != 0.0F);
if (ctx->Light.Model.LocalViewer == newbool)
return;
FLUSH_VERTICES(ctx, _NEW_LIGHT);
ctx->Light.Model.LocalViewer = newbool;
break;
case GL_LIGHT_MODEL_TWO_SIDE:
- newbool = (params[0]!=0.0);
+ newbool = (params[0] != 0.0F);
if (ctx->Light.Model.TwoSide == newbool)
return;
FLUSH_VERTICES(ctx, _NEW_LIGHT);
@@ -723,8 +723,8 @@ _mesa_ColorMaterial( GLenum face, GLenum mode )
if (MESA_VERBOSE&VERBOSE_API)
_mesa_debug(ctx, "glColorMaterial %s %s\n",
- _mesa_lookup_enum_by_nr(face),
- _mesa_lookup_enum_by_nr(mode));
+ _mesa_enum_to_string(face),
+ _mesa_enum_to_string(mode));
bitmask = _mesa_material_bitmask(ctx, face, mode, legal, "glColorMaterial");
if (bitmask == 0)
@@ -975,7 +975,7 @@ compute_light_positions( struct gl_context *ctx )
}
else {
/* positional light w/ homogeneous coordinate, divide by W */
- GLfloat wInv = (GLfloat)1.0 / light->_Position[3];
+ GLfloat wInv = 1.0F / light->_Position[3];
light->_Position[0] *= wInv;
light->_Position[1] *= wInv;
light->_Position[2] *= wInv;
@@ -1024,7 +1024,7 @@ update_modelview_scale( struct gl_context *ctx )
if (!_math_matrix_is_length_preserving(ctx->ModelviewMatrixStack.Top)) {
const GLfloat *m = ctx->ModelviewMatrixStack.Top->inv;
GLfloat f = m[2] * m[2] + m[6] * m[6] + m[10] * m[10];
- if (f < 1e-12) f = 1.0;
+ if (f < 1e-12f) f = 1.0f;
if (ctx->_NeedEyeCoords)
ctx->_ModelViewInvScale = 1.0f / sqrtf(f);
else
diff --git a/src/mesa/main/lines.c b/src/mesa/main/lines.c
index 3c08ed2e713..c020fb3eb9e 100644
--- a/src/mesa/main/lines.c
+++ b/src/mesa/main/lines.c
@@ -45,7 +45,7 @@ _mesa_LineWidth( GLfloat width )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glLineWidth %f\n", width);
- if (width<=0.0) {
+ if (width <= 0.0F) {
_mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" );
return;
}
@@ -63,7 +63,7 @@ _mesa_LineWidth( GLfloat width )
if (ctx->API == API_OPENGL_CORE
&& ((ctx->Const.ContextFlags & GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT)
!= 0)
- && width > 1.0) {
+ && width > 1.0F) {
_mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" );
return;
}
diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h
index 0608650aeb4..54df50c9cfe 100644
--- a/src/mesa/main/macros.h
+++ b/src/mesa/main/macros.h
@@ -33,6 +33,7 @@
#include "util/macros.h"
#include "util/u_math.h"
+#include "util/rounding.h"
#include "imports.h"
@@ -131,12 +132,12 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256];
#define INT_TO_USHORT(i) ((i) < 0 ? 0 : ((GLushort) ((i) >> 15)))
#define UINT_TO_USHORT(i) ((i) < 0 ? 0 : ((GLushort) ((i) >> 16)))
#define UNCLAMPED_FLOAT_TO_USHORT(us, f) \
- us = ( (GLushort) F_TO_I( CLAMP((f), 0.0F, 1.0F) * 65535.0F) )
+ us = ( (GLushort) _mesa_lroundevenf( CLAMP((f), 0.0F, 1.0F) * 65535.0F) )
#define CLAMPED_FLOAT_TO_USHORT(us, f) \
- us = ( (GLushort) F_TO_I( (f) * 65535.0F) )
+ us = ( (GLushort) _mesa_lroundevenf( (f) * 65535.0F) )
#define UNCLAMPED_FLOAT_TO_SHORT(s, f) \
- s = ( (GLshort) F_TO_I( CLAMP((f), -1.0F, 1.0F) * 32767.0F) )
+ s = ( (GLshort) _mesa_lroundevenf( CLAMP((f), -1.0F, 1.0F) * 32767.0F) )
/***
*** UNCLAMPED_FLOAT_TO_UBYTE: clamp float to [0,1] and map to ubyte in [0,255]
@@ -167,9 +168,9 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256];
} while (0)
#else
#define UNCLAMPED_FLOAT_TO_UBYTE(ub, f) \
- ub = ((GLubyte) F_TO_I(CLAMP((f), 0.0F, 1.0F) * 255.0F))
+ ub = ((GLubyte) _mesa_lroundevenf(CLAMP((f), 0.0F, 1.0F) * 255.0F))
#define CLAMPED_FLOAT_TO_UBYTE(ub, f) \
- ub = ((GLubyte) F_TO_I((f) * 255.0F))
+ ub = ((GLubyte) _mesa_lroundevenf((f) * 255.0F))
#endif
static fi_type UINT_AS_UNION(GLuint u)
@@ -679,17 +680,6 @@ minify(unsigned value, unsigned levels)
}
/**
- * Return true if the given value is a power of two.
- *
- * Note that this considers 0 a power of two.
- */
-static inline bool
-is_power_of_two(unsigned value)
-{
- return (value & (value - 1)) == 0;
-}
-
-/**
* Align a value up to an alignment value
*
* If \c value is not already aligned to the requested alignment value, it
diff --git a/src/mesa/main/matrix.c b/src/mesa/main/matrix.c
index 80c8a248ce4..2b8016a4a72 100644
--- a/src/mesa/main/matrix.c
+++ b/src/mesa/main/matrix.c
@@ -229,7 +229,7 @@ _mesa_PushMatrix( void )
if (MESA_VERBOSE&VERBOSE_API)
_mesa_debug(ctx, "glPushMatrix %s\n",
- _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+ _mesa_enum_to_string(ctx->Transform.MatrixMode));
if (stack->Depth + 1 >= stack->MaxDepth) {
if (ctx->Transform.MatrixMode == GL_TEXTURE) {
@@ -239,7 +239,7 @@ _mesa_PushMatrix( void )
}
else {
_mesa_error(ctx, GL_STACK_OVERFLOW, "glPushMatrix(mode=%s)",
- _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+ _mesa_enum_to_string(ctx->Transform.MatrixMode));
}
return;
}
@@ -270,7 +270,7 @@ _mesa_PopMatrix( void )
if (MESA_VERBOSE&VERBOSE_API)
_mesa_debug(ctx, "glPopMatrix %s\n",
- _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+ _mesa_enum_to_string(ctx->Transform.MatrixMode));
if (stack->Depth == 0) {
if (ctx->Transform.MatrixMode == GL_TEXTURE) {
@@ -280,7 +280,7 @@ _mesa_PopMatrix( void )
}
else {
_mesa_error(ctx, GL_STACK_UNDERFLOW, "glPopMatrix(mode=%s)",
- _mesa_lookup_enum_by_nr(ctx->Transform.MatrixMode));
+ _mesa_enum_to_string(ctx->Transform.MatrixMode));
}
return;
}
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index 7732d09b2ec..1e22f930092 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -2077,9 +2077,12 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
/* Get the uncompressed image */
assert(srcImage->Level == texObj->BaseLevel);
- ctx->Driver.GetTexImage(ctx,
- temp_base_format, temp_datatype,
- temp_src, srcImage);
+ ctx->Driver.GetTexSubImage(ctx,
+ 0, 0, 0,
+ srcImage->Width, srcImage->Height,
+ srcImage->Depth,
+ temp_base_format, temp_datatype,
+ temp_src, srcImage);
/* restore packing mode */
ctx->Pack = save;
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 2d285b87a78..83f3717754d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -90,7 +90,7 @@ struct vbo_context;
/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
-#define PRIM_MAX GL_TRIANGLE_STRIP_ADJACENCY
+#define PRIM_MAX GL_PATCHES
#define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1)
#define PRIM_UNKNOWN (PRIM_MAX + 2)
@@ -109,6 +109,8 @@ _mesa_varying_slot_in_fs(gl_varying_slot slot)
case VARYING_SLOT_EDGE:
case VARYING_SLOT_CLIP_VERTEX:
case VARYING_SLOT_LAYER:
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ case VARYING_SLOT_TESS_LEVEL_INNER:
return GL_FALSE;
default:
return GL_TRUE;
@@ -1254,6 +1256,7 @@ typedef enum {
USAGE_UNIFORM_BUFFER = 0x1,
USAGE_TEXTURE_BUFFER = 0x2,
USAGE_ATOMIC_COUNTER_BUFFER = 0x4,
+ USAGE_SHADER_STORAGE_BUFFER = 0x8,
} gl_buffer_usage;
@@ -1654,6 +1657,11 @@ struct gl_transform_feedback_info
* multiple transform feedback outputs in the same buffer.
*/
unsigned BufferStride[MAX_FEEDBACK_BUFFERS];
+
+ /**
+ * Which transform feedback stream this buffer binding is associated with.
+ */
+ unsigned BufferStream[MAX_FEEDBACK_BUFFERS];
};
@@ -1891,6 +1899,8 @@ struct gl_program
GLbitfield64 InputsRead; /**< Bitmask of which input regs are read */
GLbitfield64 DoubleInputsRead; /**< Bitmask of which input regs are read and are doubles */
GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
+ GLbitfield PatchInputsRead; /**< VAR[0..31] usage for patch inputs (user-defined only) */
+ GLbitfield PatchOutputsWritten; /**< VAR[0..31] usage for patch outputs (user-defined only) */
GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */
GLbitfield TexturesUsed[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */
GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */
@@ -1958,6 +1968,29 @@ struct gl_vertex_program
};
+/** Tessellation control program object */
+struct gl_tess_ctrl_program
+{
+ struct gl_program Base; /**< base class */
+
+ /* output layout */
+ GLint VerticesOut;
+};
+
+
+/** Tessellation evaluation program object */
+struct gl_tess_eval_program
+{
+ struct gl_program Base; /**< base class */
+
+ /* input layout */
+ GLenum PrimitiveMode; /* GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
+ GLenum Spacing; /* GL_EQUAL, GL_FRACTIONAL_EVEN, GL_FRACTIONAL_ODD */
+ GLenum VertexOrder; /* GL_CW or GL_CCW */
+ bool PointMode;
+};
+
+
/** Geometry program object */
struct gl_geometry_program
{
@@ -2060,6 +2093,27 @@ struct gl_vertex_program_state
GLboolean _Overriden;
};
+/**
+ * Context state for tessellation control programs.
+ */
+struct gl_tess_ctrl_program_state
+{
+ /** Currently bound and valid shader. */
+ struct gl_tess_ctrl_program *_Current;
+
+ GLint patch_vertices;
+ GLfloat patch_default_outer_level[4];
+ GLfloat patch_default_inner_level[2];
+};
+
+/**
+ * Context state for tessellation evaluation programs.
+ */
+struct gl_tess_eval_program_state
+{
+ /** Currently bound and valid shader. */
+ struct gl_tess_eval_program *_Current;
+};
/**
* Context state for geometry programs.
@@ -2154,13 +2208,23 @@ struct gl_ati_fragment_shader_state
struct ati_fragment_shader *Current;
};
+/**
+ * Shader subroutine function definition
+ */
+struct gl_subroutine_function
+{
+ char *name;
+ int num_compat_types;
+ const struct glsl_type **types;
+};
/**
* A GLSL vertex or fragment shader object.
*/
struct gl_shader
{
- /** GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB.
+ /** GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB ||
+ * GL_TESS_CONTROL_SHADER || GL_TESS_EVALUATION_SHADER.
* Must be the first field.
*/
GLenum Type;
@@ -2240,6 +2304,41 @@ struct gl_shader
bool pixel_center_integer;
/**
+ * Tessellation Control shader state from layout qualifiers.
+ */
+ struct {
+ /**
+ * 0 - vertices not declared in shader, or
+ * 1 .. GL_MAX_PATCH_VERTICES
+ */
+ GLint VerticesOut;
+ } TessCtrl;
+
+ /**
+ * Tessellation Evaluation shader state from layout qualifiers.
+ */
+ struct {
+ /**
+ * GL_TRIANGLES, GL_QUADS, GL_ISOLINES or PRIM_UNKNOWN if it's not set
+ * in this shader.
+ */
+ GLenum PrimitiveMode;
+ /**
+ * GL_EQUAL, GL_FRACTIONAL_ODD, GL_FRACTIONAL_EVEN, or 0 if it's not set
+ * in this shader.
+ */
+ GLenum Spacing;
+ /**
+ * GL_CW, GL_CCW, or 0 if it's not set in this shader.
+ */
+ GLenum VertexOrder;
+ /**
+ * 1, 0, or -1 if it's not set in this shader.
+ */
+ int PointMode;
+ } TessEval;
+
+ /**
* Geometry shader state from GLSL 1.50 layout qualifiers.
*/
struct {
@@ -2304,6 +2403,25 @@ struct gl_shader
*/
unsigned LocalSize[3];
} Comp;
+
+ /**
+ * Number of types for subroutine uniforms.
+ */
+ GLuint NumSubroutineUniformTypes;
+
+ /**
+ * Subroutine uniform remap table
+ * based on the program level uniform remap table.
+ */
+ GLuint NumSubroutineUniformRemapTable;
+ struct gl_uniform_storage **SubroutineUniformRemapTable;
+
+ /**
+ * Num of subroutine functions for this stage
+ * and storage for them.
+ */
+ GLuint NumSubroutineFunctions;
+ struct gl_subroutine_function *SubroutineFunctions;
};
@@ -2365,6 +2483,11 @@ struct gl_uniform_block
GLuint UniformBufferSize;
/**
+ * Is this actually an interface block for a shader storage buffer?
+ */
+ bool IsShaderStorage;
+
+ /**
* Layout specified in the shader
*
* This isn't accessible through the API, but it is used while
@@ -2468,6 +2591,37 @@ struct gl_shader_program
enum gl_frag_depth_layout FragDepthLayout;
/**
+ * Tessellation Control shader state from layout qualifiers.
+ */
+ struct {
+ /**
+ * 0 - vertices not declared in shader, or
+ * 1 .. GL_MAX_PATCH_VERTICES
+ */
+ GLint VerticesOut;
+ } TessCtrl;
+
+ /**
+ * Tessellation Evaluation shader state from layout qualifiers.
+ */
+ struct {
+ /** GL_TRIANGLES, GL_QUADS or GL_ISOLINES */
+ GLenum PrimitiveMode;
+ /** GL_EQUAL, GL_FRACTIONAL_ODD or GL_FRACTIONAL_EVEN */
+ GLenum Spacing;
+ /** GL_CW or GL_CCW */
+ GLenum VertexOrder;
+ bool PointMode;
+ /**
+ * True if gl_ClipDistance is written to. Copied into
+ * gl_tess_eval_program by _mesa_copy_linked_program_data().
+ */
+ GLboolean UsesClipDistance;
+ GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
+ 0 if not present. */
+ } TessEval;
+
+ /**
* Geometry shader state - copied into gl_geometry_program by
* _mesa_copy_linked_program_data().
*/
@@ -2681,6 +2835,7 @@ struct gl_shader_compiler_options
GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */
GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */
GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
+ GLboolean EmitNoIndirectSampler; /**< No indirect addressing of samplers */
/*@}*/
GLuint MaxIfDepth; /**< Maximum nested IF blocks */
@@ -3100,6 +3255,9 @@ struct gl_program_constants
/* GL_ARB_shader_image_load_store */
GLuint MaxImageUniforms;
+
+ /* GL_ARB_shader_storage_buffer_object */
+ GLuint MaxShaderStorageBlocks;
};
@@ -3197,6 +3355,15 @@ struct gl_constants
GLuint UniformBufferOffsetAlignment;
/** @} */
+ /** @{
+ * GL_ARB_shader_storage_buffer_object
+ */
+ GLuint MaxCombinedShaderStorageBlocks;
+ GLuint MaxShaderStorageBufferBindings;
+ GLuint MaxShaderStorageBlockSize;
+ GLuint ShaderStorageBufferOffsetAlignment;
+ /** @} */
+
/**
* GL_ARB_explicit_uniform_location
*/
@@ -3423,6 +3590,13 @@ struct gl_constants
GLenum ContextReleaseBehavior;
struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_STAGES];
+
+ /** GL_ARB_tessellation_shader */
+ GLuint MaxPatchVertices;
+ GLuint MaxTessGenLevel;
+ GLuint MaxTessPatchComponents;
+ GLuint MaxTessControlTotalOutputComponents;
+ bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */
};
@@ -3484,6 +3658,8 @@ struct gl_extensions
GLboolean ARB_shader_image_load_store;
GLboolean ARB_shader_precision;
GLboolean ARB_shader_stencil_export;
+ GLboolean ARB_shader_storage_buffer_object;
+ GLboolean ARB_shader_subroutine;
GLboolean ARB_shader_texture_lod;
GLboolean ARB_shading_language_packing;
GLboolean ARB_shading_language_420pack;
@@ -3815,6 +3991,12 @@ struct gl_driver_flags
*/
uint64_t NewUniformBuffer;
+ /**
+ * gl_context::ShaderStorageBufferBindings
+ * gl_shader_program::ShaderStorageBlocks
+ */
+ uint64_t NewShaderStorageBuffer;
+
uint64_t NewTextureBuffer;
/**
@@ -3826,6 +4008,11 @@ struct gl_driver_flags
* gl_context::ImageUnits
*/
uint64_t NewImageUnits;
+
+ /**
+ * gl_context::TessCtrlProgram::patch_default_*
+ */
+ uint64_t NewDefaultTessLevels;
};
struct gl_uniform_buffer_binding
@@ -3842,6 +4029,20 @@ struct gl_uniform_buffer_binding
GLboolean AutomaticSize;
};
+struct gl_shader_storage_buffer_binding
+{
+ struct gl_buffer_object *BufferObject;
+ /** Start of shader storage block data in the buffer */
+ GLintptr Offset;
+ /** Size of data allowed to be referenced from the buffer (in bytes) */
+ GLsizeiptr Size;
+ /**
+ * glBindBufferBase() indicates that the Size should be ignored and only
+ * limited by the current size of the BufferObject.
+ */
+ GLboolean AutomaticSize;
+};
+
/**
* ARB_shader_image_load_store image unit.
*/
@@ -4047,6 +4248,8 @@ struct gl_context
struct gl_fragment_program_state FragmentProgram;
struct gl_geometry_program_state GeometryProgram;
struct gl_compute_program_state ComputeProgram;
+ struct gl_tess_ctrl_program_state TessCtrlProgram;
+ struct gl_tess_eval_program_state TessEvalProgram;
struct gl_ati_fragment_shader_state ATIFragmentShader;
struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */
@@ -4089,6 +4292,12 @@ struct gl_context
struct gl_buffer_object *UniformBuffer;
/**
+ * Current GL_ARB_shader_storage_buffer_object binding referenced by
+ * GL_SHADER_STORAGE_BUFFER target for glBufferData, glMapBuffer, etc.
+ */
+ struct gl_buffer_object *ShaderStorageBuffer;
+
+ /**
* Array of uniform buffers for GL_ARB_uniform_buffer_object and GL 3.1.
* This is set up using glBindBufferRange() or glBindBufferBase(). They are
* associated with uniform blocks by glUniformBlockBinding()'s state in the
@@ -4098,6 +4307,15 @@ struct gl_context
UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS];
/**
+ * Array of shader storage buffers for ARB_shader_storage_buffer_object
+ * and GL 4.3. This is set up using glBindBufferRange() or
+ * glBindBufferBase(). They are associated with shader storage blocks by
+ * glShaderStorageBlockBinding()'s state in the shader program.
+ */
+ struct gl_shader_storage_buffer_binding
+ ShaderStorageBufferBindings[MAX_COMBINED_SHADER_STORAGE_BUFFERS];
+
+ /**
* Object currently associated with the GL_ATOMIC_COUNTER_BUFFER
* target.
*/
diff --git a/src/mesa/main/multisample.c b/src/mesa/main/multisample.c
index 816837b95bd..09e6154f7ec 100644
--- a/src/mesa/main/multisample.c
+++ b/src/mesa/main/multisample.c
@@ -43,7 +43,7 @@ _mesa_SampleCoverage(GLclampf value, GLboolean invert)
FLUSH_VERTICES(ctx, 0);
- ctx->Multisample.SampleCoverageValue = (GLfloat) CLAMP(value, 0.0, 1.0);
+ ctx->Multisample.SampleCoverageValue = CLAMP(value, 0.0f, 1.0f);
ctx->Multisample.SampleCoverageInvert = invert;
ctx->NewState |= _NEW_MULTISAMPLE;
}
@@ -134,7 +134,7 @@ _mesa_MinSampleShading(GLclampf value)
FLUSH_VERTICES(ctx, 0);
- ctx->Multisample.MinSampleShadingValue = CLAMP(value, 0.0, 1.0);
+ ctx->Multisample.MinSampleShadingValue = CLAMP(value, 0.0f, 1.0f);
ctx->NewState |= _NEW_MULTISAMPLE;
}
@@ -164,8 +164,11 @@ _mesa_check_sample_count(struct gl_context *ctx, GLenum target,
*
* "If internalformat is a signed or unsigned integer format and samples
* is greater than zero, then the error INVALID_OPERATION is generated."
+ *
+ * This restriction is relaxed for OpenGL ES 3.1.
*/
- if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalFormat)
+ if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) &&
+ _mesa_is_enum_format_integer(internalFormat)
&& samples > 0) {
return GL_INVALID_OPERATION;
}
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 5626054687b..1019f893ba8 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -234,7 +234,7 @@ get_label_pointer(struct gl_context *ctx, GLenum identifier, GLuint name,
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(identifier = %s)",
- caller, _mesa_lookup_enum_by_nr(identifier));
+ caller, _mesa_enum_to_string(identifier));
return NULL;
}
diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index f72360817e9..7147fd6e4fe 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -470,7 +470,7 @@ extract_uint_indexes(GLuint n, GLuint indexes[],
static inline GLuint
clamp_float_to_uint(GLfloat f)
{
- return f < 0.0F ? 0 : F_TO_I(f);
+ return f < 0.0F ? 0 : _mesa_lroundevenf(f);
}
@@ -478,7 +478,7 @@ static inline GLuint
clamp_half_to_uint(GLhalfARB h)
{
GLfloat f = _mesa_half_to_float(h);
- return f < 0.0F ? 0 : F_TO_I(f);
+ return f < 0.0F ? 0 : _mesa_lroundevenf(f);
}
@@ -796,7 +796,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n,
* back to an int type can introduce errors that will show up as
* artifacts in things like depth peeling which uses glCopyTexImage.
*/
- if (ctx->Pixel.DepthScale == 1.0 && ctx->Pixel.DepthBias == 0.0) {
+ if (ctx->Pixel.DepthScale == 1.0F && ctx->Pixel.DepthBias == 0.0F) {
if (srcType == GL_UNSIGNED_INT && dstType == GL_UNSIGNED_SHORT) {
const GLuint *src = (const GLuint *) source;
GLushort *dst = (GLushort *) dest;
@@ -874,8 +874,8 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n,
case GL_UNSIGNED_INT_24_8_EXT: /* GL_EXT_packed_depth_stencil */
if (dstType == GL_UNSIGNED_INT_24_8_EXT &&
depthMax == 0xffffff &&
- ctx->Pixel.DepthScale == 1.0 &&
- ctx->Pixel.DepthBias == 0.0) {
+ ctx->Pixel.DepthScale == 1.0F &&
+ ctx->Pixel.DepthBias == 0.0F) {
const GLuint *src = (const GLuint *) source;
GLuint *zValues = (GLuint *) dest;
GLuint i;
@@ -945,7 +945,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n,
{
const GLfloat scale = ctx->Pixel.DepthScale;
const GLfloat bias = ctx->Pixel.DepthBias;
- if (scale != 1.0 || bias != 0.0) {
+ if (scale != 1.0F || bias != 0.0F) {
GLuint i;
for (i = 0; i < n; i++) {
depthValues[i] = depthValues[i] * scale + bias;
@@ -958,7 +958,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n,
if (needClamp) {
GLuint i;
for (i = 0; i < n; i++) {
- depthValues[i] = (GLfloat)CLAMP(depthValues[i], 0.0, 1.0);
+ depthValues[i] = CLAMP(depthValues[i], 0.0F, 1.0F);
}
}
@@ -1025,7 +1025,7 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest,
return;
}
- if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) {
+ if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) {
memcpy(depthCopy, depthSpan, n * sizeof(GLfloat));
_mesa_scale_and_bias_depth(ctx, n, depthCopy);
depthSpan = depthCopy;
@@ -1153,7 +1153,7 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n,
return;
}
- if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) {
+ if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F) {
memcpy(depthCopy, depthVals, n * sizeof(GLfloat));
_mesa_scale_and_bias_depth(ctx, n, depthCopy);
depthVals = depthCopy;
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 279ae2078fe..07acbf10c1d 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -244,14 +244,13 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program)
*
* "If stages is not the special value ALL_SHADER_BITS, and has a bit
* set that is not recognized, the error INVALID_VALUE is generated."
- *
- * NOT YET SUPPORTED:
- * GL_TESS_CONTROL_SHADER_BIT
- * GL_TESS_EVALUATION_SHADER_BIT
*/
any_valid_stages = GL_VERTEX_SHADER_BIT | GL_FRAGMENT_SHADER_BIT;
if (_mesa_has_geometry_shaders(ctx))
any_valid_stages |= GL_GEOMETRY_SHADER_BIT;
+ if (_mesa_has_tessellation(ctx))
+ any_valid_stages |= GL_TESS_CONTROL_SHADER_BIT |
+ GL_TESS_EVALUATION_SHADER_BIT;
if (stages != GL_ALL_SHADER_BITS && (stages & ~any_valid_stages) != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glUseProgramStages(Stages)");
@@ -327,6 +326,12 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program)
if ((stages & GL_GEOMETRY_SHADER_BIT) != 0)
_mesa_use_shader_program(ctx, GL_GEOMETRY_SHADER, shProg, pipe);
+
+ if ((stages & GL_TESS_CONTROL_SHADER_BIT) != 0)
+ _mesa_use_shader_program(ctx, GL_TESS_CONTROL_SHADER, shProg, pipe);
+
+ if ((stages & GL_TESS_EVALUATION_SHADER_BIT) != 0)
+ _mesa_use_shader_program(ctx, GL_TESS_EVALUATION_SHADER, shProg, pipe);
}
/**
@@ -588,6 +593,7 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
/* Are geometry shaders available in this context?
*/
const bool has_gs = _mesa_has_geometry_shaders(ctx);
+ const bool has_tess = _mesa_has_tessellation(ctx);;
if (!pipe) {
_mesa_error(ctx, GL_INVALID_OPERATION,
@@ -615,11 +621,17 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
? pipe->CurrentProgram[MESA_SHADER_VERTEX]->Name : 0;
return;
case GL_TESS_EVALUATION_SHADER:
- /* NOT YET SUPPORTED */
- break;
+ if (!has_tess)
+ break;
+ *params = pipe->CurrentProgram[MESA_SHADER_TESS_EVAL]
+ ? pipe->CurrentProgram[MESA_SHADER_TESS_EVAL]->Name : 0;
+ return;
case GL_TESS_CONTROL_SHADER:
- /* NOT YET SUPPORTED */
- break;
+ if (!has_tess)
+ break;
+ *params = pipe->CurrentProgram[MESA_SHADER_TESS_CTRL]
+ ? pipe->CurrentProgram[MESA_SHADER_TESS_CTRL]->Name : 0;
+ return;
case GL_GEOMETRY_SHADER:
if (!has_gs)
break;
@@ -635,7 +647,7 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
}
_mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramPipelineiv(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
/**
@@ -777,7 +789,9 @@ _mesa_validate_program_pipeline(struct gl_context* ctx,
* executable vertex shader."
*/
if (!pipe->CurrentProgram[MESA_SHADER_VERTEX]
- && pipe->CurrentProgram[MESA_SHADER_GEOMETRY]) {
+ && (pipe->CurrentProgram[MESA_SHADER_GEOMETRY] ||
+ pipe->CurrentProgram[MESA_SHADER_TESS_CTRL] ||
+ pipe->CurrentProgram[MESA_SHADER_TESS_EVAL])) {
pipe->InfoLog = ralloc_strdup(pipe, "Program lacks a vertex shader");
goto err;
}
diff --git a/src/mesa/main/pixel.c b/src/mesa/main/pixel.c
index ecda2694fc8..608a5454702 100644
--- a/src/mesa/main/pixel.c
+++ b/src/mesa/main/pixel.c
@@ -455,12 +455,12 @@ _mesa_GetnPixelMapusvARB( GLenum map, GLsizei bufSize, GLushort *values )
/* special cases */
case GL_PIXEL_MAP_I_TO_I:
for (i = 0; i < mapsize; i++) {
- values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0, 65535.);
+ values[i] = (GLushort) CLAMP(ctx->PixelMaps.ItoI.Map[i], 0.0F, 65535.0F);
}
break;
case GL_PIXEL_MAP_S_TO_S:
for (i = 0; i < mapsize; i++) {
- values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0, 65535.);
+ values[i] = (GLushort) CLAMP(ctx->PixelMaps.StoS.Map[i], 0.0F, 65535.0F);
}
break;
default:
diff --git a/src/mesa/main/pixeltransfer.c b/src/mesa/main/pixeltransfer.c
index 94464ea6709..22eac00a7df 100644
--- a/src/mesa/main/pixeltransfer.c
+++ b/src/mesa/main/pixeltransfer.c
@@ -35,6 +35,7 @@
#include "pixeltransfer.h"
#include "imports.h"
#include "mtypes.h"
+#include "util/rounding.h"
/*
@@ -47,25 +48,25 @@ _mesa_scale_and_bias_rgba(GLuint n, GLfloat rgba[][4],
GLfloat rBias, GLfloat gBias,
GLfloat bBias, GLfloat aBias)
{
- if (rScale != 1.0 || rBias != 0.0) {
+ if (rScale != 1.0F || rBias != 0.0F) {
GLuint i;
for (i = 0; i < n; i++) {
rgba[i][RCOMP] = rgba[i][RCOMP] * rScale + rBias;
}
}
- if (gScale != 1.0 || gBias != 0.0) {
+ if (gScale != 1.0F || gBias != 0.0F) {
GLuint i;
for (i = 0; i < n; i++) {
rgba[i][GCOMP] = rgba[i][GCOMP] * gScale + gBias;
}
}
- if (bScale != 1.0 || bBias != 0.0) {
+ if (bScale != 1.0F || bBias != 0.0F) {
GLuint i;
for (i = 0; i < n; i++) {
rgba[i][BCOMP] = rgba[i][BCOMP] * bScale + bBias;
}
}
- if (aScale != 1.0 || aBias != 0.0) {
+ if (aScale != 1.0F || aBias != 0.0F) {
GLuint i;
for (i = 0; i < n; i++) {
rgba[i][ACOMP] = rgba[i][ACOMP] * aScale + aBias;
@@ -94,10 +95,10 @@ _mesa_map_rgba( const struct gl_context *ctx, GLuint n, GLfloat rgba[][4] )
GLfloat g = CLAMP(rgba[i][GCOMP], 0.0F, 1.0F);
GLfloat b = CLAMP(rgba[i][BCOMP], 0.0F, 1.0F);
GLfloat a = CLAMP(rgba[i][ACOMP], 0.0F, 1.0F);
- rgba[i][RCOMP] = rMap[F_TO_I(r * rscale)];
- rgba[i][GCOMP] = gMap[F_TO_I(g * gscale)];
- rgba[i][BCOMP] = bMap[F_TO_I(b * bscale)];
- rgba[i][ACOMP] = aMap[F_TO_I(a * ascale)];
+ rgba[i][RCOMP] = rMap[(int)_mesa_lroundevenf(r * rscale)];
+ rgba[i][GCOMP] = gMap[(int)_mesa_lroundevenf(g * gscale)];
+ rgba[i][BCOMP] = bMap[(int)_mesa_lroundevenf(b * bscale)];
+ rgba[i][ACOMP] = aMap[(int)_mesa_lroundevenf(a * ascale)];
}
}
@@ -236,7 +237,7 @@ _mesa_apply_ci_transfer_ops(const struct gl_context *ctx,
GLuint i;
for (i = 0; i < n; i++) {
const GLuint j = indexes[i] & mask;
- indexes[i] = F_TO_I(ctx->PixelMaps.ItoI.Map[j]);
+ indexes[i] = _mesa_lroundevenf(ctx->PixelMaps.ItoI.Map[j]);
}
}
}
diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c
index 5ad1f38f366..863e3c1af32 100644
--- a/src/mesa/main/points.c
+++ b/src/mesa/main/points.c
@@ -45,7 +45,7 @@ _mesa_PointSize( GLfloat size )
{
GET_CURRENT_CONTEXT(ctx);
- if (size <= 0.0) {
+ if (size <= 0.0F) {
_mesa_error( ctx, GL_INVALID_VALUE, "glPointSize" );
return;
}
@@ -119,9 +119,9 @@ _mesa_PointParameterfv( GLenum pname, const GLfloat *params)
return;
FLUSH_VERTICES(ctx, _NEW_POINT);
COPY_3V(ctx->Point.Params, params);
- ctx->Point._Attenuated = (ctx->Point.Params[0] != 1.0 ||
- ctx->Point.Params[1] != 0.0 ||
- ctx->Point.Params[2] != 0.0);
+ ctx->Point._Attenuated = (ctx->Point.Params[0] != 1.0F ||
+ ctx->Point.Params[1] != 0.0F ||
+ ctx->Point.Params[2] != 0.0F);
break;
case GL_POINT_SIZE_MIN_EXT:
if (params[0] < 0.0F) {
diff --git a/src/mesa/main/polygon.c b/src/mesa/main/polygon.c
index a1f0aa02da1..60af88f9857 100644
--- a/src/mesa/main/polygon.c
+++ b/src/mesa/main/polygon.c
@@ -56,7 +56,7 @@ _mesa_CullFace( GLenum mode )
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE&VERBOSE_API)
- _mesa_debug(ctx, "glCullFace %s\n", _mesa_lookup_enum_by_nr(mode));
+ _mesa_debug(ctx, "glCullFace %s\n", _mesa_enum_to_string(mode));
if (mode!=GL_FRONT && mode!=GL_BACK && mode!=GL_FRONT_AND_BACK) {
_mesa_error( ctx, GL_INVALID_ENUM, "glCullFace" );
@@ -91,16 +91,16 @@ _mesa_FrontFace( GLenum mode )
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE&VERBOSE_API)
- _mesa_debug(ctx, "glFrontFace %s\n", _mesa_lookup_enum_by_nr(mode));
+ _mesa_debug(ctx, "glFrontFace %s\n", _mesa_enum_to_string(mode));
+
+ if (ctx->Polygon.FrontFace == mode)
+ return;
if (mode!=GL_CW && mode!=GL_CCW) {
_mesa_error( ctx, GL_INVALID_ENUM, "glFrontFace" );
return;
}
- if (ctx->Polygon.FrontFace == mode)
- return;
-
FLUSH_VERTICES(ctx, _NEW_POLYGON);
ctx->Polygon.FrontFace = mode;
@@ -128,8 +128,8 @@ _mesa_PolygonMode( GLenum face, GLenum mode )
if (MESA_VERBOSE&VERBOSE_API)
_mesa_debug(ctx, "glPolygonMode %s %s\n",
- _mesa_lookup_enum_by_nr(face),
- _mesa_lookup_enum_by_nr(mode));
+ _mesa_enum_to_string(face),
+ _mesa_enum_to_string(mode));
if (mode!=GL_POINT && mode!=GL_LINE && mode!=GL_FILL) {
_mesa_error( ctx, GL_INVALID_ENUM, "glPolygonMode(mode)" );
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index d857b84e60d..23d2b4d2da0 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -28,10 +28,11 @@
#include "main/mtypes.h"
#include "main/shaderapi.h"
#include "main/shaderobj.h"
+#include "main/context.h"
#include "program_resource.h"
-
+#include "ir_uniform.h"
static bool
-supported_interface_enum(GLenum iface)
+supported_interface_enum(struct gl_context *ctx, GLenum iface)
{
switch (iface) {
case GL_UNIFORM:
@@ -42,17 +43,21 @@ supported_interface_enum(GLenum iface)
case GL_ATOMIC_COUNTER_BUFFER:
return true;
case GL_VERTEX_SUBROUTINE:
- case GL_TESS_CONTROL_SUBROUTINE:
- case GL_TESS_EVALUATION_SUBROUTINE:
- case GL_GEOMETRY_SUBROUTINE:
case GL_FRAGMENT_SUBROUTINE:
- case GL_COMPUTE_SUBROUTINE:
case GL_VERTEX_SUBROUTINE_UNIFORM:
- case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
- case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
- case GL_GEOMETRY_SUBROUTINE_UNIFORM:
case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ return _mesa_has_shader_subroutine(ctx);
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ return _mesa_has_geometry_shaders(ctx) && _mesa_has_shader_subroutine(ctx);
+ case GL_COMPUTE_SUBROUTINE:
case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ return _mesa_has_compute_shaders(ctx) && _mesa_has_shader_subroutine(ctx);
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ return _mesa_has_tessellation(ctx) && _mesa_has_shader_subroutine(ctx);
case GL_BUFFER_VARIABLE:
case GL_SHADER_STORAGE_BLOCK:
default:
@@ -79,9 +84,9 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
}
/* Validate interface. */
- if (!supported_interface_enum(programInterface)) {
+ if (!supported_interface_enum(ctx, programInterface)) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramInterfaceiv(%s)",
- _mesa_lookup_enum_by_nr(programInterface));
+ _mesa_enum_to_string(programInterface));
return;
}
@@ -96,8 +101,8 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
if (programInterface == GL_ATOMIC_COUNTER_BUFFER) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glGetProgramInterfaceiv(%s pname %s)",
- _mesa_lookup_enum_by_nr(programInterface),
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
return;
}
/* Name length consists of base name, 3 additional chars '[0]' if
@@ -138,15 +143,40 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
default:
_mesa_error(ctx, GL_INVALID_OPERATION,
"glGetProgramInterfaceiv(%s pname %s)",
- _mesa_lookup_enum_by_nr(programInterface),
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
};
break;
case GL_MAX_NUM_COMPATIBLE_SUBROUTINES:
+ switch (programInterface) {
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: {
+ for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
+ if (shProg->ProgramResourceList[i].Type == programInterface) {
+ struct gl_uniform_storage *uni =
+ (struct gl_uniform_storage *)
+ shProg->ProgramResourceList[i].Data;
+ *params = MAX2(*params, uni->num_compatible_subroutines);
+ }
+ }
+ break;
+ }
+
+ default:
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramInterfaceiv(%s pname %s)",
+ _mesa_enum_to_string(programInterface),
+ _mesa_enum_to_string(pname));
+ }
+ break;
default:
_mesa_error(ctx, GL_INVALID_OPERATION,
"glGetProgramInterfaceiv(pname %s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
}
@@ -173,32 +203,12 @@ is_xfb_marker(const char *str)
return false;
}
-/**
- * Checks if given name index is legal for GetProgramResourceIndex,
- * check is written to be compatible with GL_ARB_array_of_arrays.
- */
-static bool
-valid_program_resource_index_name(const GLchar *name)
-{
- const char *array = strstr(name, "[");
- const char *close = strrchr(name, ']');
-
- /* Not array, no need for the check. */
- if (!array)
- return true;
-
- /* Last array index has to be zero. */
- if (!close || *--close != '0')
- return false;
-
- return true;
-}
-
GLuint GLAPIENTRY
_mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface,
const GLchar *name)
{
GET_CURRENT_CONTEXT(ctx);
+ unsigned array_index = 0;
struct gl_program_resource *res;
struct gl_shader_program *shProg =
_mesa_lookup_shader_program_err(ctx, program,
@@ -206,6 +216,11 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface,
if (!shProg || !name)
return GL_INVALID_INDEX;
+ if (!supported_interface_enum(ctx, programInterface)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)",
+ _mesa_enum_to_string(programInterface));
+ return GL_INVALID_INDEX;
+ }
/*
* For the interface TRANSFORM_FEEDBACK_VARYING, the value INVALID_INDEX
* should be returned when querying the index assigned to the special names
@@ -217,24 +232,33 @@ _mesa_GetProgramResourceIndex(GLuint program, GLenum programInterface,
return GL_INVALID_INDEX;
switch (programInterface) {
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_VERTEX_SUBROUTINE:
+ case GL_FRAGMENT_SUBROUTINE:
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
case GL_UNIFORM:
case GL_TRANSFORM_FEEDBACK_VARYING:
- /* Validate name syntax for array variables */
- if (!valid_program_resource_index_name(name))
- return GL_INVALID_INDEX;
- /* fall-through */
case GL_UNIFORM_BLOCK:
- res = _mesa_program_resource_find_name(shProg, programInterface, name);
- if (!res)
+ res = _mesa_program_resource_find_name(shProg, programInterface, name,
+ &array_index);
+ if (!res || array_index > 0)
return GL_INVALID_INDEX;
return _mesa_program_resource_index(shProg, res);
case GL_ATOMIC_COUNTER_BUFFER:
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceIndex(%s)",
- _mesa_lookup_enum_by_nr(programInterface));
+ _mesa_enum_to_string(programInterface));
}
return GL_INVALID_INDEX;
@@ -250,19 +274,13 @@ _mesa_GetProgramResourceName(GLuint program, GLenum programInterface,
_mesa_lookup_shader_program_err(ctx, program,
"glGetProgramResourceName");
- /* Set user friendly return values in case of errors. */
- if (name)
- *name = '\0';
- if (length)
- *length = 0;
-
if (!shProg || !name)
return;
if (programInterface == GL_ATOMIC_COUNTER_BUFFER ||
- !supported_interface_enum(programInterface)) {
+ !supported_interface_enum(ctx, programInterface)) {
_mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceName(%s)",
- _mesa_lookup_enum_by_nr(programInterface));
+ _mesa_enum_to_string(programInterface));
return;
}
@@ -300,36 +318,6 @@ _mesa_GetProgramResourceiv(GLuint program, GLenum programInterface,
propCount, props, bufSize, length, params);
}
-/**
- * Function verifies syntax of given name for GetProgramResourceLocation
- * and GetProgramResourceLocationIndex for the following cases:
- *
- * "array element portion of a string passed to GetProgramResourceLocation
- * or GetProgramResourceLocationIndex must not have, a "+" sign, extra
- * leading zeroes, or whitespace".
- *
- * Check is written to be compatible with GL_ARB_array_of_arrays.
- */
-static bool
-invalid_array_element_syntax(const GLchar *name)
-{
- char *first = strchr(name, '[');
- char *last = strrchr(name, '[');
-
- if (!first)
- return false;
-
- /* No '+' or ' ' allowed anywhere. */
- if (strchr(first, '+') || strchr(first, ' '))
- return true;
-
- /* Check that last array index is 0. */
- if (last[1] == '0' && last[2] != ']')
- return true;
-
- return false;
-}
-
static struct gl_shader_program *
lookup_linked_program(GLuint program, const char *caller)
{
@@ -356,7 +344,7 @@ _mesa_GetProgramResourceLocation(GLuint program, GLenum programInterface,
struct gl_shader_program *shProg =
lookup_linked_program(program, "glGetProgramResourceLocation");
- if (!shProg || !name || invalid_array_element_syntax(name))
+ if (!shProg || !name)
return -1;
/* Validate programInterface. */
@@ -366,24 +354,33 @@ _mesa_GetProgramResourceLocation(GLuint program, GLenum programInterface,
case GL_PROGRAM_OUTPUT:
break;
- /* For reference valid cases requiring additional extension support:
- * GL_ARB_shader_subroutine
- * GL_ARB_tessellation_shader
- * GL_ARB_compute_shader
- */
case GL_VERTEX_SUBROUTINE_UNIFORM:
- case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
- case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
- case GL_GEOMETRY_SUBROUTINE_UNIFORM:
case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ if (!_mesa_has_shader_subroutine(ctx))
+ goto fail;
+ break;
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ if (!_mesa_has_geometry_shaders(ctx) || !_mesa_has_shader_subroutine(ctx))
+ goto fail;
+ break;
case GL_COMPUTE_SUBROUTINE_UNIFORM:
-
+ if (!_mesa_has_compute_shaders(ctx) || !_mesa_has_shader_subroutine(ctx))
+ goto fail;
+ break;
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ if (!_mesa_has_tessellation(ctx) || !_mesa_has_shader_subroutine(ctx))
+ goto fail;
+ break;
default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocation(%s %s)",
- _mesa_lookup_enum_by_nr(programInterface), name);
+ goto fail;
}
return _mesa_program_resource_location(shProg, programInterface, name);
+fail:
+ _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramResourceLocation(%s %s)",
+ _mesa_enum_to_string(programInterface), name);
+ return -1;
}
/**
@@ -397,7 +394,7 @@ _mesa_GetProgramResourceLocationIndex(GLuint program, GLenum programInterface,
struct gl_shader_program *shProg =
lookup_linked_program(program, "glGetProgramResourceLocationIndex");
- if (!shProg || !name || invalid_array_element_syntax(name))
+ if (!shProg || !name)
return -1;
/* From the GL_ARB_program_interface_query spec:
@@ -408,7 +405,7 @@ _mesa_GetProgramResourceLocationIndex(GLuint program, GLenum programInterface,
if (programInterface != GL_PROGRAM_OUTPUT) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetProgramResourceLocationIndex(%s)",
- _mesa_lookup_enum_by_nr(programInterface));
+ _mesa_enum_to_string(programInterface));
return -1;
}
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index 5ff1b953231..98366857f62 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -217,7 +217,7 @@ get_query_binding_point(struct gl_context *ctx, GLenum target, GLuint index)
case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
- if (ctx->Extensions.ARB_tessellation_shader)
+ if (_mesa_has_tessellation(ctx))
return get_pipe_stats_binding_point(ctx, target);
else
return NULL;
@@ -295,7 +295,7 @@ _mesa_CreateQueries(GLenum target, GLsizei n, GLuint *ids)
break;
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glCreateQueries(invalid target = %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -390,7 +390,7 @@ _mesa_BeginQueryIndexed(GLenum target, GLuint index, GLuint id)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glBeginQueryIndexed(%s, %u, %u)\n",
- _mesa_lookup_enum_by_nr(target), index, id);
+ _mesa_enum_to_string(target), index, id);
if (!query_error_check_index(ctx, target, index))
return;
@@ -412,7 +412,7 @@ _mesa_BeginQueryIndexed(GLenum target, GLuint index, GLuint id)
if (*bindpt) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glBeginQuery{Indexed}(target=%s is active)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -496,7 +496,7 @@ _mesa_EndQueryIndexed(GLenum target, GLuint index)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glEndQueryIndexed(%s, %u)\n",
- _mesa_lookup_enum_by_nr(target), index);
+ _mesa_enum_to_string(target), index);
if (!query_error_check_index(ctx, target, index))
return;
@@ -516,8 +516,8 @@ _mesa_EndQueryIndexed(GLenum target, GLuint index)
if (q && q->Target != target) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEndQuery(target=%s with active query of target %s)",
- _mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(q->Target));
+ _mesa_enum_to_string(target),
+ _mesa_enum_to_string(q->Target));
return;
}
@@ -553,7 +553,7 @@ _mesa_QueryCounter(GLuint id, GLenum target)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glQueryCounter(%u, %s)\n", id,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
/* error checking */
if (target != GL_TIMESTAMP) {
@@ -628,9 +628,9 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, GLenum pname,
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glGetQueryIndexediv(%s, %u, %s)\n",
- _mesa_lookup_enum_by_nr(target),
+ _mesa_enum_to_string(target),
index,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
if (!query_error_check_index(ctx, target, index))
return;
@@ -712,7 +712,7 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, GLenum pname,
default:
_mesa_problem(ctx,
"Unknown target in glGetQueryIndexediv(target = %s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
*params = 0;
break;
}
@@ -740,7 +740,7 @@ _mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
if (id)
q = _mesa_lookup_query_object(ctx, id);
@@ -794,7 +794,7 @@ _mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
if (id)
q = _mesa_lookup_query_object(ctx, id);
@@ -851,7 +851,7 @@ _mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
if (id)
q = _mesa_lookup_query_object(ctx, id);
@@ -894,7 +894,7 @@ _mesa_GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64EXT *params)
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
if (id)
q = _mesa_lookup_query_object(ctx, id);
diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c
index a3357cd6419..d826ecfc3d5 100644
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -47,28 +47,47 @@
* Return true if the conversion L=R+G+B is needed.
*/
GLboolean
-_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format)
+_mesa_need_rgb_to_luminance_conversion(GLenum srcBaseFormat,
+ GLenum dstBaseFormat)
{
- GLenum baseTexFormat = _mesa_get_format_base_format(texFormat);
-
- return (baseTexFormat == GL_RG ||
- baseTexFormat == GL_RGB ||
- baseTexFormat == GL_RGBA) &&
- (format == GL_LUMINANCE ||
- format == GL_LUMINANCE_ALPHA ||
- format == GL_LUMINANCE_INTEGER_EXT ||
- format == GL_LUMINANCE_ALPHA_INTEGER_EXT);
+ return (srcBaseFormat == GL_RG ||
+ srcBaseFormat == GL_RGB ||
+ srcBaseFormat == GL_RGBA) &&
+ (dstBaseFormat == GL_LUMINANCE ||
+ dstBaseFormat == GL_LUMINANCE_ALPHA);
}
+/**
+ * Return true if the conversion L,I to RGB conversion is needed.
+ */
+GLboolean
+_mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat,
+ GLenum dstBaseFormat)
+{
+ return (srcBaseFormat == GL_LUMINANCE ||
+ srcBaseFormat == GL_LUMINANCE_ALPHA ||
+ srcBaseFormat == GL_INTENSITY) &&
+ (dstBaseFormat == GL_GREEN ||
+ dstBaseFormat == GL_BLUE ||
+ dstBaseFormat == GL_RG ||
+ dstBaseFormat == GL_RGB ||
+ dstBaseFormat == GL_BGR ||
+ dstBaseFormat == GL_RGBA ||
+ dstBaseFormat == GL_BGRA);
+}
/**
* Return transfer op flags for this ReadPixels operation.
*/
-static GLbitfield
-get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat,
- GLenum format, GLenum type, GLboolean uses_blit)
+GLbitfield
+_mesa_get_readpixels_transfer_ops(const struct gl_context *ctx,
+ mesa_format texFormat,
+ GLenum format, GLenum type,
+ GLboolean uses_blit)
{
GLbitfield transferOps = ctx->_ImageTransferState;
+ GLenum srcBaseFormat = _mesa_get_format_base_format(texFormat);
+ GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
if (format == GL_DEPTH_COMPONENT ||
format == GL_DEPTH_STENCIL ||
@@ -105,7 +124,7 @@ get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat,
* have any effect anyway.
*/
if (_mesa_get_format_datatype(texFormat) == GL_UNSIGNED_NORMALIZED &&
- !_mesa_need_rgb_to_luminance_conversion(texFormat, format)) {
+ !_mesa_need_rgb_to_luminance_conversion(srcBaseFormat, dstBaseFormat)) {
transferOps &= ~IMAGE_CLAMP_BIT;
}
@@ -128,7 +147,7 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
{
struct gl_renderbuffer *rb =
_mesa_get_read_renderbuffer_for_format(ctx, format);
- GLenum srcType;
+ GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
assert(rb);
@@ -149,28 +168,14 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
default:
/* Color formats. */
- if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format)) {
- return GL_TRUE;
- }
-
- /* Conversion between signed and unsigned integers needs masking
- * (it isn't just memcpy). */
- srcType = _mesa_get_format_datatype(rb->Format);
-
- if ((srcType == GL_INT &&
- (type == GL_UNSIGNED_INT ||
- type == GL_UNSIGNED_SHORT ||
- type == GL_UNSIGNED_BYTE)) ||
- (srcType == GL_UNSIGNED_INT &&
- (type == GL_INT ||
- type == GL_SHORT ||
- type == GL_BYTE))) {
+ if (_mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat,
+ dstBaseFormat)) {
return GL_TRUE;
}
/* And finally, see if there are any transfer ops. */
- return get_readpixels_transfer_ops(ctx, rb->Format, format, type,
- uses_blit) != 0;
+ return _mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, type,
+ uses_blit) != 0;
}
return GL_FALSE;
}
@@ -263,7 +268,7 @@ read_uint_depth_pixels( struct gl_context *ctx,
GLubyte *map, *dst;
int stride, dstStride, j;
- if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0)
+ if (ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F)
return GL_FALSE;
if (packing->SwapBytes)
@@ -432,18 +437,19 @@ read_rgba_pixels( struct gl_context *ctx,
uint8_t rebase_swizzle[4];
struct gl_framebuffer *fb = ctx->ReadBuffer;
struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
+ GLenum dstBaseFormat = _mesa_unpack_format_to_base_format(format);
if (!rb)
return;
- transferOps = get_readpixels_transfer_ops(ctx, rb->Format, format, type,
- GL_FALSE);
+ transferOps = _mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
+ type, GL_FALSE);
/* Describe the dst format */
dst_is_integer = _mesa_is_enum_format_integer(format);
dst_stride = _mesa_image_row_stride(packing, width, format, type);
dst_format = _mesa_format_from_format_and_type(format, type);
convert_rgb_to_lum =
- _mesa_need_rgb_to_luminance_conversion(rb->Format, format);
+ _mesa_need_rgb_to_luminance_conversion(rb->_BaseFormat, dstBaseFormat);
dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
format, type, 0, 0);
@@ -815,7 +821,7 @@ read_depth_stencil_pixels(struct gl_context *ctx,
const struct gl_pixelstore_attrib *packing )
{
const GLboolean scaleOrBias
- = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+ = ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F;
const GLboolean stencilTransfer = ctx->Pixel.IndexShift
|| ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag;
GLubyte *dst;
@@ -910,10 +916,8 @@ read_pixels_es3_error_check(GLenum format, GLenum type,
const GLenum data_type = _mesa_get_format_datatype(rb->Format);
GLboolean is_unsigned_int = GL_FALSE;
GLboolean is_signed_int = GL_FALSE;
-
- if (!_mesa_is_color_format(internalFormat)) {
- return GL_INVALID_OPERATION;
- }
+ GLboolean is_float_depth = (internalFormat == GL_DEPTH_COMPONENT32F) ||
+ (internalFormat == GL_DEPTH32F_STENCIL8);
is_unsigned_int = _mesa_is_enum_format_unsigned_int(internalFormat);
if (!is_unsigned_int) {
@@ -944,6 +948,43 @@ read_pixels_es3_error_check(GLenum format, GLenum type,
(is_unsigned_int && type == GL_UNSIGNED_INT))
return GL_NO_ERROR;
break;
+ case GL_DEPTH_STENCIL:
+ switch (type) {
+ case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+ if (is_float_depth)
+ return GL_NO_ERROR;
+ break;
+ case GL_UNSIGNED_INT_24_8:
+ if (!is_float_depth)
+ return GL_NO_ERROR;
+ break;
+ default:
+ return GL_INVALID_ENUM;
+ }
+ break;
+ case GL_DEPTH_COMPONENT:
+ switch (type) {
+ case GL_FLOAT:
+ if (is_float_depth)
+ return GL_NO_ERROR;
+ break;
+ case GL_UNSIGNED_SHORT:
+ case GL_UNSIGNED_INT_24_8:
+ if (!is_float_depth)
+ return GL_NO_ERROR;
+ break;
+ default:
+ return GL_INVALID_ENUM;
+ }
+ break;
+ case GL_STENCIL_INDEX:
+ switch (type) {
+ case GL_UNSIGNED_BYTE:
+ return GL_NO_ERROR;
+ default:
+ return GL_INVALID_ENUM;
+ }
+ break;
}
return GL_INVALID_OPERATION;
@@ -966,8 +1007,8 @@ _mesa_ReadnPixelsARB( GLint x, GLint y, GLsizei width, GLsizei height,
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glReadPixels(%d, %d, %s, %s, %p)\n",
width, height,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type),
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type),
pixels);
if (width < 0 || height < 0) {
@@ -1017,15 +1058,10 @@ _mesa_ReadnPixelsARB( GLint x, GLint y, GLsizei width, GLsizei height,
err = read_pixels_es3_error_check(format, type, rb);
}
- if (err == GL_NO_ERROR && (format == GL_DEPTH_COMPONENT
- || format == GL_DEPTH_STENCIL)) {
- err = GL_INVALID_ENUM;
- }
-
if (err != GL_NO_ERROR) {
_mesa_error(ctx, err, "glReadPixels(invalid format %s and/or type %s)",
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
return;
}
}
@@ -1033,8 +1069,8 @@ _mesa_ReadnPixelsARB( GLint x, GLint y, GLsizei width, GLsizei height,
err = _mesa_error_check_format_and_type(ctx, format, type);
if (err != GL_NO_ERROR) {
_mesa_error(ctx, err, "glReadPixels(invalid format %s and/or type %s)",
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
return;
}
diff --git a/src/mesa/main/readpix.h b/src/mesa/main/readpix.h
index 1636dd9ce3e..481ad9d9c37 100644
--- a/src/mesa/main/readpix.h
+++ b/src/mesa/main/readpix.h
@@ -38,7 +38,18 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
GLenum type, GLboolean uses_blit);
extern GLboolean
-_mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format);
+_mesa_need_rgb_to_luminance_conversion(GLenum srcBaseFormat,
+ GLenum dstBaseFormat);
+
+extern GLboolean
+_mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat,
+ GLenum dstBaseFormat);
+
+extern GLbitfield
+_mesa_get_readpixels_transfer_ops(const struct gl_context *ctx,
+ mesa_format texFormat,
+ GLenum format, GLenum type,
+ GLboolean uses_blit);
extern void
_mesa_readpixels(struct gl_context *ctx,
diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index a3aacc66aa3..32180fb1ba2 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -689,7 +689,7 @@ set_sampler_max_anisotropy(struct gl_context *ctx,
if (samp->MaxAnisotropy == param)
return GL_FALSE;
- if (param < 1.0)
+ if (param < 1.0F)
return INVALID_VALUE;
flush(ctx);
@@ -813,7 +813,7 @@ _mesa_SamplerParameteri(GLuint sampler, GLenum pname, GLint param)
break;
case INVALID_PNAME:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteri(pname=%s)\n",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
case INVALID_PARAM:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteri(param=%d)\n",
@@ -906,7 +906,7 @@ _mesa_SamplerParameterf(GLuint sampler, GLenum pname, GLfloat param)
break;
case INVALID_PNAME:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterf(pname=%s)\n",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
case INVALID_PARAM:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterf(param=%f)\n",
@@ -1006,7 +1006,7 @@ _mesa_SamplerParameteriv(GLuint sampler, GLenum pname, const GLint *params)
break;
case INVALID_PNAME:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteriv(pname=%s)\n",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
case INVALID_PARAM:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameteriv(param=%d)\n",
@@ -1099,7 +1099,7 @@ _mesa_SamplerParameterfv(GLuint sampler, GLenum pname, const GLfloat *params)
break;
case INVALID_PNAME:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterfv(pname=%s)\n",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
case INVALID_PARAM:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterfv(param=%f)\n",
@@ -1184,7 +1184,7 @@ _mesa_SamplerParameterIiv(GLuint sampler, GLenum pname, const GLint *params)
break;
case INVALID_PNAME:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIiv(pname=%s)\n",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
case INVALID_PARAM:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIiv(param=%d)\n",
@@ -1270,7 +1270,7 @@ _mesa_SamplerParameterIuiv(GLuint sampler, GLenum pname, const GLuint *params)
break;
case INVALID_PNAME:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIuiv(pname=%s)\n",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
case INVALID_PARAM:
_mesa_error(ctx, GL_INVALID_ENUM, "glSamplerParameterIuiv(param=%u)\n",
@@ -1380,7 +1380,7 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params)
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameteriv(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
@@ -1466,7 +1466,7 @@ _mesa_GetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat *params)
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterfv(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
@@ -1545,7 +1545,7 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params)
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterIiv(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
@@ -1624,7 +1624,7 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params)
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSamplerParameterIuiv(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index a6246a39aad..ee7320221e2 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -44,7 +44,8 @@ extern "C" {
static GLint
program_resource_location(struct gl_shader_program *shProg,
- struct gl_program_resource *res, const char *name);
+ struct gl_program_resource *res, const char *name,
+ unsigned array_index);
/**
* Declare convenience functions to return resource data in a given type.
@@ -61,6 +62,7 @@ DECL_RESOURCE_FUNC(UBO, gl_uniform_block);
DECL_RESOURCE_FUNC(UNI, gl_uniform_storage);
DECL_RESOURCE_FUNC(ATC, gl_active_atomic_buffer);
DECL_RESOURCE_FUNC(XFB, gl_transform_feedback_varying_info);
+DECL_RESOURCE_FUNC(SUB, gl_subroutine_function);
void GLAPIENTRY
_mesa_BindAttribLocation(GLhandleARB program, GLuint index,
@@ -189,63 +191,6 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index,
(GLint *) type, "glGetActiveAttrib");
}
-/* Locations associated with shader variables (array or non-array) can be
- * queried using its base name or using the base name appended with the
- * valid array index. For example, in case of below vertex shader, valid
- * queries can be made to know the location of "xyz", "array", "array[0]",
- * "array[1]", "array[2]" and "array[3]". In this example index reurned
- * will be 0, 0, 0, 1, 2, 3 respectively.
- *
- * [Vertex Shader]
- * layout(location=0) in vec4 xyz;
- * layout(location=1) in vec4[4] array;
- * void main()
- * { }
- *
- * This requirement came up with the addition of ARB_program_interface_query
- * to OpenGL 4.3 specification. See page 101 (page 122 of the PDF) for details.
- *
- * This utility function is used by:
- * _mesa_GetAttribLocation
- * _mesa_GetFragDataLocation
- * _mesa_GetFragDataIndex
- *
- * Returns 0:
- * if the 'name' string matches var->name.
- * Returns 'matched index':
- * if the 'name' string matches var->name appended with valid array index.
- */
-int static inline
-get_matching_index(const ir_variable *const var, const char *name) {
- unsigned idx = 0;
- const char *const paren = strchr(name, '[');
- const unsigned len = (paren != NULL) ? paren - name : strlen(name);
-
- if (paren != NULL) {
- if (!var->type->is_array())
- return -1;
-
- char *endptr;
- idx = (unsigned) strtol(paren + 1, &endptr, 10);
- const unsigned idx_len = endptr != (paren + 1) ? endptr - paren - 1 : 0;
-
- /* Validate the sub string representing index in 'name' string */
- if ((idx > 0 && paren[1] == '0') /* leading zeroes */
- || (idx == 0 && idx_len > 1) /* all zeroes */
- || paren[1] == ' ' /* whitespace */
- || endptr[0] != ']' /* closing brace */
- || endptr[1] != '\0' /* null char */
- || idx_len == 0 /* missing index */
- || idx >= var->type->length) /* exceeding array bound */
- return -1;
- }
-
- if (strncmp(var->name, name, len) == 0 && var->name[len] == '\0')
- return idx;
-
- return -1;
-}
-
GLint GLAPIENTRY
_mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name)
{
@@ -271,13 +216,15 @@ _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name)
if (shProg->_LinkedShaders[MESA_SHADER_VERTEX] == NULL)
return -1;
+ unsigned array_index = 0;
struct gl_program_resource *res =
- _mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT, name);
+ _mesa_program_resource_find_name(shProg, GL_PROGRAM_INPUT, name,
+ &array_index);
if (!res)
return -1;
- GLint loc = program_resource_location(shProg, res, name);
+ GLint loc = program_resource_location(shProg, res, name, array_index);
/* The extra check against against 0 is made because of builtin-attribute
* locations that have offset applied. Function program_resource_location
@@ -455,13 +402,15 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name)
if (shProg->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL)
return -1;
+ unsigned array_index = 0;
struct gl_program_resource *res =
- _mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT, name);
+ _mesa_program_resource_find_name(shProg, GL_PROGRAM_OUTPUT, name,
+ &array_index);
if (!res)
return -1;
- GLint loc = program_resource_location(shProg, res, name);
+ GLint loc = program_resource_location(shProg, res, name, array_index);
/* The extra check against against 0 is made because of builtin-attribute
* locations that have offset applied. Function program_resource_location
@@ -497,6 +446,20 @@ _mesa_program_resource_name(struct gl_program_resource *res)
return RESOURCE_VAR(res)->name;
case GL_UNIFORM:
return RESOURCE_UNI(res)->name;
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ return RESOURCE_UNI(res)->name + MESA_SUBROUTINE_PREFIX_LEN;
+ case GL_VERTEX_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_FRAGMENT_SUBROUTINE:
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
+ return RESOURCE_SUB(res)->name;
default:
assert(!"support for resource type not implemented");
}
@@ -515,7 +478,19 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
case GL_PROGRAM_OUTPUT:
return RESOURCE_VAR(res)->data.max_array_access;
case GL_UNIFORM:
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
return RESOURCE_UNI(res)->array_elements;
+ case GL_VERTEX_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_FRAGMENT_SUBROUTINE:
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
case GL_ATOMIC_COUNTER_BUFFER:
case GL_UNIFORM_BLOCK:
return 0;
@@ -525,39 +500,32 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
return 0;
}
-static int
-array_index_of_resource(struct gl_program_resource *res,
- const char *name)
+/**
+ * Checks if array subscript is valid and if so sets array_index.
+ */
+static bool
+valid_array_index(const GLchar *name, unsigned *array_index)
{
- assert(res->Data);
+ long idx = 0;
+ const GLchar *out_base_name_end;
- switch (res->Type) {
- case GL_PROGRAM_INPUT:
- case GL_PROGRAM_OUTPUT:
- return get_matching_index(RESOURCE_VAR(res), name);
- default:
- assert(!"support for resource type not implemented");
- return -1;
- }
+ idx = parse_program_resource_name(name, &out_base_name_end);
+ if (idx < 0)
+ return false;
+
+ if (array_index)
+ *array_index = idx;
+
+ return true;
}
/* Find a program resource with specific name in given interface.
*/
struct gl_program_resource *
_mesa_program_resource_find_name(struct gl_shader_program *shProg,
- GLenum programInterface, const char *name)
+ GLenum programInterface, const char *name,
+ unsigned *array_index)
{
- GET_CURRENT_CONTEXT(ctx);
- const char *full_name = name;
-
- /* When context has 'VertexID_is_zero_based' set, gl_VertexID has been
- * lowered to gl_VertexIDMESA.
- */
- if (name && ctx->Const.VertexID_is_zero_based) {
- if (strcmp(name, "gl_VertexID") == 0)
- full_name = "gl_VertexIDMESA";
- }
-
struct gl_program_resource *res = shProg->ProgramResourceList;
for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) {
if (res->Type != programInterface)
@@ -567,26 +535,46 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
const char *rname = _mesa_program_resource_name(res);
unsigned baselen = strlen(rname);
- switch (programInterface) {
- case GL_TRANSFORM_FEEDBACK_VARYING:
- case GL_UNIFORM_BLOCK:
- case GL_UNIFORM:
- if (strncmp(rname, name, baselen) == 0) {
+ if (strncmp(rname, name, baselen) == 0) {
+ switch (programInterface) {
+ case GL_UNIFORM_BLOCK:
/* Basename match, check if array or struct. */
if (name[baselen] == '\0' ||
name[baselen] == '[' ||
name[baselen] == '.') {
return res;
}
+ break;
+ case GL_TRANSFORM_FEEDBACK_VARYING:
+ case GL_UNIFORM:
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ case GL_VERTEX_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_FRAGMENT_SUBROUTINE:
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
+ if (name[baselen] == '.') {
+ return res;
+ }
+ /* fall-through */
+ case GL_PROGRAM_INPUT:
+ case GL_PROGRAM_OUTPUT:
+ if (name[baselen] == '\0') {
+ return res;
+ } else if (name[baselen] == '[' &&
+ valid_array_index(name, array_index)) {
+ return res;
+ }
+ break;
+ default:
+ assert(!"not implemented for given interface");
}
- break;
- case GL_PROGRAM_INPUT:
- case GL_PROGRAM_OUTPUT:
- if (array_index_of_resource(res, full_name) >= 0)
- return res;
- break;
- default:
- assert(!"not implemented for given interface");
}
}
return NULL;
@@ -651,6 +639,18 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
case GL_UNIFORM:
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ case GL_VERTEX_SUBROUTINE:
+ case GL_GEOMETRY_SUBROUTINE:
+ case GL_FRAGMENT_SUBROUTINE:
+ case GL_COMPUTE_SUBROUTINE:
+ case GL_TESS_CONTROL_SUBROUTINE:
+ case GL_TESS_EVALUATION_SUBROUTINE:
if (++idx == (int) index)
return res;
break;
@@ -719,6 +719,12 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
bool add_index = !(((programInterface == GL_PROGRAM_INPUT) &&
res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
+ /* Transform feedback varyings have array index already appended
+ * in their names.
+ */
+ if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING)
+ add_index = false;
+
if (add_index && _mesa_program_resource_array_size(res)) {
int i;
@@ -736,17 +742,9 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
static GLint
program_resource_location(struct gl_shader_program *shProg,
- struct gl_program_resource *res, const char *name)
+ struct gl_program_resource *res, const char *name,
+ unsigned array_index)
{
- unsigned index, offset;
- int array_index = -1;
-
- if (res->Type == GL_PROGRAM_INPUT || res->Type == GL_PROGRAM_OUTPUT) {
- array_index = array_index_of_resource(res, name);
- if (array_index < 0)
- return -1;
- }
-
/* Built-in locations should report GL_INVALID_INDEX. */
if (is_gl_identifier(name))
return GL_INVALID_INDEX;
@@ -757,13 +755,22 @@ program_resource_location(struct gl_shader_program *shProg,
*/
switch (res->Type) {
case GL_PROGRAM_INPUT:
+ /* If the input is an array, fail if the index is out of bounds. */
+ if (array_index > 0
+ && array_index >= RESOURCE_VAR(res)->type->length) {
+ return -1;
+ }
return RESOURCE_VAR(res)->data.location + array_index - VERT_ATTRIB_GENERIC0;
case GL_PROGRAM_OUTPUT:
+ /* If the output is an array, fail if the index is out of bounds. */
+ if (array_index > 0
+ && array_index >= RESOURCE_VAR(res)->type->length) {
+ return -1;
+ }
return RESOURCE_VAR(res)->data.location + array_index - FRAG_RESULT_DATA0;
case GL_UNIFORM:
- index = _mesa_get_uniform_location(shProg, name, &offset);
-
- if (index == GL_INVALID_INDEX)
+ /* If the uniform is built-in, fail. */
+ if (RESOURCE_UNI(res)->builtin)
return -1;
/* From the GL_ARB_uniform_buffer_object spec:
@@ -777,9 +784,21 @@ program_resource_location(struct gl_shader_program *shProg,
RESOURCE_UNI(res)->atomic_buffer_index != -1)
return -1;
- /* location in remap table + array element offset */
- return RESOURCE_UNI(res)->remap_location + offset;
+ /* fallthrough */
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ /* If the uniform is an array, fail if the index is out of bounds. */
+ if (array_index > 0
+ && array_index >= RESOURCE_UNI(res)->array_elements) {
+ return -1;
+ }
+ /* location in remap table + array element offset */
+ return RESOURCE_UNI(res)->remap_location + array_index;
default:
return -1;
}
@@ -787,22 +806,22 @@ program_resource_location(struct gl_shader_program *shProg,
/**
* Function implements following location queries:
- * glGetAttribLocation
- * glGetFragDataLocation
* glGetUniformLocation
*/
GLint
_mesa_program_resource_location(struct gl_shader_program *shProg,
GLenum programInterface, const char *name)
{
+ unsigned array_index = 0;
struct gl_program_resource *res =
- _mesa_program_resource_find_name(shProg, programInterface, name);
+ _mesa_program_resource_find_name(shProg, programInterface, name,
+ &array_index);
/* Resource not found. */
if (!res)
return -1;
- return program_resource_location(shProg, res, name);
+ return program_resource_location(shProg, res, name, array_index);
}
/**
@@ -814,7 +833,7 @@ _mesa_program_resource_location_index(struct gl_shader_program *shProg,
GLenum programInterface, const char *name)
{
struct gl_program_resource *res =
- _mesa_program_resource_find_name(shProg, programInterface, name);
+ _mesa_program_resource_find_name(shProg, programInterface, name, NULL);
/* Non-existent variable or resource is not referenced by fragment stage. */
if (!res || !(res->StageReferences & (1 << MESA_SHADER_FRAGMENT)))
@@ -829,6 +848,10 @@ stage_from_enum(GLenum ref)
switch (ref) {
case GL_REFERENCED_BY_VERTEX_SHADER:
return MESA_SHADER_VERTEX;
+ case GL_REFERENCED_BY_TESS_CONTROL_SHADER:
+ return MESA_SHADER_TESS_CTRL;
+ case GL_REFERENCED_BY_TESS_EVALUATION_SHADER:
+ return MESA_SHADER_TESS_EVAL;
case GL_REFERENCED_BY_GEOMETRY_SHADER:
return MESA_SHADER_GEOMETRY;
case GL_REFERENCED_BY_FRAGMENT_SHADER:
@@ -886,7 +909,8 @@ get_buffer_property(struct gl_shader_program *shProg,
for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
struct gl_program_resource *uni =
- _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname);
+ _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname,
+ NULL);
if (!uni)
continue;
(*val)++;
@@ -896,7 +920,8 @@ get_buffer_property(struct gl_shader_program *shProg,
for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) {
const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName;
struct gl_program_resource *uni =
- _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname);
+ _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname,
+ NULL);
if (!uni)
continue;
*val++ =
@@ -925,8 +950,8 @@ get_buffer_property(struct gl_shader_program *shProg,
invalid_operation:
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s prop %s)", caller,
- _mesa_lookup_enum_by_nr(res->Type),
- _mesa_lookup_enum_by_nr(prop));
+ _mesa_enum_to_string(res->Type),
+ _mesa_enum_to_string(prop));
return 0;
}
@@ -944,11 +969,17 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
switch(prop) {
case GL_NAME_LENGTH:
- if (res->Type == GL_ATOMIC_COUNTER_BUFFER)
+ switch (res->Type) {
+ case GL_ATOMIC_COUNTER_BUFFER:
goto invalid_operation;
- /* Base name +3 if array '[0]' + terminator. */
- *val = strlen(_mesa_program_resource_name(res)) +
- (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+ case GL_TRANSFORM_FEEDBACK_VARYING:
+ *val = strlen(_mesa_program_resource_name(res)) + 1;
+ break;
+ default:
+ /* Base name +3 if array '[0]' + terminator. */
+ *val = strlen(_mesa_program_resource_name(res)) +
+ (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+ }
return 1;
case GL_TYPE:
switch (res->Type) {
@@ -1014,6 +1045,8 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
goto invalid_enum;
/* fallthrough */
case GL_REFERENCED_BY_VERTEX_SHADER:
+ case GL_REFERENCED_BY_TESS_CONTROL_SHADER:
+ case GL_REFERENCED_BY_TESS_EVALUATION_SHADER:
case GL_REFERENCED_BY_GEOMETRY_SHADER:
case GL_REFERENCED_BY_FRAGMENT_SHADER:
switch (res->Type) {
@@ -1034,7 +1067,8 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
case GL_PROGRAM_INPUT:
case GL_PROGRAM_OUTPUT:
*val = program_resource_location(shProg, res,
- _mesa_program_resource_name(res));
+ _mesa_program_resource_name(res),
+ 0);
return 1;
default:
goto invalid_operation;
@@ -1045,10 +1079,54 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
*val = RESOURCE_VAR(res)->data.index;
return 1;
+ case GL_NUM_COMPATIBLE_SUBROUTINES:
+ if (res->Type != GL_VERTEX_SUBROUTINE_UNIFORM &&
+ res->Type != GL_FRAGMENT_SUBROUTINE_UNIFORM &&
+ res->Type != GL_GEOMETRY_SUBROUTINE_UNIFORM &&
+ res->Type != GL_COMPUTE_SUBROUTINE_UNIFORM &&
+ res->Type != GL_TESS_CONTROL_SUBROUTINE_UNIFORM &&
+ res->Type != GL_TESS_EVALUATION_SUBROUTINE_UNIFORM)
+ goto invalid_operation;
+ *val = RESOURCE_UNI(res)->num_compatible_subroutines;
+ return 1;
+ case GL_COMPATIBLE_SUBROUTINES: {
+ const struct gl_uniform_storage *uni;
+ struct gl_shader *sh;
+ unsigned count, i;
+ int j;
+
+ if (res->Type != GL_VERTEX_SUBROUTINE_UNIFORM &&
+ res->Type != GL_FRAGMENT_SUBROUTINE_UNIFORM &&
+ res->Type != GL_GEOMETRY_SUBROUTINE_UNIFORM &&
+ res->Type != GL_COMPUTE_SUBROUTINE_UNIFORM &&
+ res->Type != GL_TESS_CONTROL_SUBROUTINE_UNIFORM &&
+ res->Type != GL_TESS_EVALUATION_SUBROUTINE_UNIFORM)
+ goto invalid_operation;
+ uni = RESOURCE_UNI(res);
+
+ sh = shProg->_LinkedShaders[_mesa_shader_stage_from_subroutine_uniform(res->Type)];
+ count = 0;
+ for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+ struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+ for (j = 0; j < fn->num_compat_types; j++) {
+ if (fn->types[j] == uni->type) {
+ val[count++] = i;
+ break;
+ }
+ }
+ }
+ return count;
+ }
/* GL_ARB_tessellation_shader */
case GL_IS_PER_PATCH:
- case GL_REFERENCED_BY_TESS_CONTROL_SHADER:
- case GL_REFERENCED_BY_TESS_EVALUATION_SHADER:
+ switch (res->Type) {
+ case GL_PROGRAM_INPUT:
+ case GL_PROGRAM_OUTPUT:
+ *val = RESOURCE_VAR(res)->data.patch;
+ return 1;
+ default:
+ goto invalid_operation;
+ }
default:
goto invalid_enum;
}
@@ -1057,14 +1135,14 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "%s(%s prop %s)", caller,
- _mesa_lookup_enum_by_nr(res->Type),
- _mesa_lookup_enum_by_nr(prop));
+ _mesa_enum_to_string(res->Type),
+ _mesa_enum_to_string(prop));
return 0;
invalid_operation:
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(%s prop %s)", caller,
- _mesa_lookup_enum_by_nr(res->Type),
- _mesa_lookup_enum_by_nr(prop));
+ _mesa_enum_to_string(res->Type),
+ _mesa_enum_to_string(prop));
return 0;
}
@@ -1086,7 +1164,7 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg,
if (!res || bufSize < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glGetProgramResourceiv(%s index %d bufSize %d)",
- _mesa_lookup_enum_by_nr(programInterface), index, bufSize);
+ _mesa_enum_to_string(programInterface), index, bufSize);
return;
}
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index a4296adf799..f9a7d130f9c 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -110,6 +110,7 @@ _mesa_init_shader_state(struct gl_context *ctx)
*/
struct gl_shader_compiler_options options;
gl_shader_stage sh;
+ int i;
memset(&options, 0, sizeof(options));
options.MaxUnrollIterations = 32;
@@ -126,6 +127,12 @@ _mesa_init_shader_state(struct gl_context *ctx)
/* Extended for ARB_separate_shader_objects */
ctx->Shader.RefCount = 1;
mtx_init(&ctx->Shader.Mutex, mtx_plain);
+
+ ctx->TessCtrlProgram.patch_vertices = 3;
+ for (i = 0; i < 4; ++i)
+ ctx->TessCtrlProgram.patch_default_outer_level[i] = 1.0;
+ for (i = 0; i < 2; ++i)
+ ctx->TessCtrlProgram.patch_default_inner_level[i] = 1.0;
}
@@ -199,6 +206,9 @@ _mesa_validate_shader_target(const struct gl_context *ctx, GLenum type)
return ctx == NULL || ctx->Extensions.ARB_vertex_shader;
case GL_GEOMETRY_SHADER_ARB:
return ctx == NULL || _mesa_has_geometry_shaders(ctx);
+ case GL_TESS_CONTROL_SHADER:
+ case GL_TESS_EVALUATION_SHADER:
+ return ctx == NULL || _mesa_has_tessellation(ctx);
case GL_COMPUTE_SHADER:
return ctx == NULL || ctx->Extensions.ARB_compute_shader;
default:
@@ -415,6 +425,8 @@ detach_shader(struct gl_context *ctx, GLuint program, GLuint shader)
/* sanity check - make sure the new list's entries are sensible */
for (j = 0; j < shProg->NumShaders; j++) {
assert(shProg->Shaders[j]->Type == GL_VERTEX_SHADER ||
+ shProg->Shaders[j]->Type == GL_TESS_CONTROL_SHADER ||
+ shProg->Shaders[j]->Type == GL_TESS_EVALUATION_SHADER ||
shProg->Shaders[j]->Type == GL_GEOMETRY_SHADER ||
shProg->Shaders[j]->Type == GL_FRAGMENT_SHADER);
assert(shProg->Shaders[j]->RefCount > 0);
@@ -511,6 +523,57 @@ check_gs_query(struct gl_context *ctx, const struct gl_shader_program *shProg)
/**
+ * Check if a tessellation control shader query is valid at this time.
+ * If not, report an error and return false.
+ *
+ * From GL 4.0 section 6.1.12 (Shader and Program Queries):
+ *
+ * "If TESS_CONTROL_OUTPUT_VERTICES is queried for a program which has
+ * not been linked successfully, or which does not contain objects to
+ * form a tessellation control shader, then an INVALID_OPERATION error is
+ * generated."
+ */
+static bool
+check_tcs_query(struct gl_context *ctx, const struct gl_shader_program *shProg)
+{
+ if (shProg->LinkStatus &&
+ shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL] != NULL) {
+ return true;
+ }
+
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glGetProgramv(linked tessellation control shader required)");
+ return false;
+}
+
+
+/**
+ * Check if a tessellation evaluation shader query is valid at this time.
+ * If not, report an error and return false.
+ *
+ * From GL 4.0 section 6.1.12 (Shader and Program Queries):
+ *
+ * "If any of the pname values in this paragraph are queried for a program
+ * which has not been linked successfully, or which does not contain
+ * objects to form a tessellation evaluation shader, then an
+ * INVALID_OPERATION error is generated."
+ *
+ */
+static bool
+check_tes_query(struct gl_context *ctx, const struct gl_shader_program *shProg)
+{
+ if (shProg->LinkStatus &&
+ shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL] != NULL) {
+ return true;
+ }
+
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramv(linked tessellation "
+ "evaluation shader required)");
+ return false;
+}
+
+
+/**
* glGetProgramiv() - get shader program state.
* Note that this is for GLSL shader programs, not ARB vertex/fragment
* programs (see glGetProgramivARB).
@@ -533,6 +596,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
* and GL 3.2) are available in this context
*/
const bool has_core_gs = _mesa_has_geometry_shaders(ctx);
+ const bool has_tess = _mesa_has_tessellation(ctx);
/* Are uniform buffer objects available in this context?
*/
@@ -711,12 +775,44 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
case GL_PROGRAM_SEPARABLE:
*params = shProg->SeparateShader;
return;
+
+ /* ARB_tessellation_shader */
+ case GL_TESS_CONTROL_OUTPUT_VERTICES:
+ if (!has_tess)
+ break;
+ if (check_tcs_query(ctx, shProg))
+ *params = shProg->TessCtrl.VerticesOut;
+ return;
+ case GL_TESS_GEN_MODE:
+ if (!has_tess)
+ break;
+ if (check_tes_query(ctx, shProg))
+ *params = shProg->TessEval.PrimitiveMode;
+ return;
+ case GL_TESS_GEN_SPACING:
+ if (!has_tess)
+ break;
+ if (check_tes_query(ctx, shProg))
+ *params = shProg->TessEval.Spacing;
+ return;
+ case GL_TESS_GEN_VERTEX_ORDER:
+ if (!has_tess)
+ break;
+ if (check_tes_query(ctx, shProg))
+ *params = shProg->TessEval.VertexOrder;
+ return;
+ case GL_TESS_GEN_POINT_MODE:
+ if (!has_tess)
+ break;
+ if (check_tes_query(ctx, shProg))
+ *params = shProg->TessEval.PointMode;
+ return;
default:
break;
}
_mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramiv(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
@@ -992,6 +1088,12 @@ print_shader_info(const struct gl_shader_program *shProg)
if (shProg->_LinkedShaders[MESA_SHADER_GEOMETRY])
printf(" geom prog %u\n",
shProg->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program->Id);
+ if (shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL])
+ printf(" tesc prog %u\n",
+ shProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program->Id);
+ if (shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL])
+ printf(" tese prog %u\n",
+ shProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program->Id);
}
@@ -1037,11 +1139,9 @@ use_shader_program(struct gl_context *ctx, gl_shader_stage stage,
*/
switch (stage) {
case MESA_SHADER_VERTEX:
- /* Empty for now. */
- break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
- /* Empty for now. */
- break;
case MESA_SHADER_COMPUTE:
/* Empty for now. */
break;
@@ -1071,6 +1171,7 @@ _mesa_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
use_shader_program(ctx, i, shProg, &ctx->Shader);
_mesa_active_program(ctx, shProg, "glUseProgram");
+ _mesa_shader_program_init_subroutine_defaults(shProg);
if (ctx->Driver.UseProgram)
ctx->Driver.UseProgram(ctx, shProg);
}
@@ -1172,7 +1273,7 @@ _mesa_CreateShader(GLenum type)
{
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glCreateShader %s\n", _mesa_lookup_enum_by_nr(type));
+ _mesa_debug(ctx, "glCreateShader %s\n", _mesa_enum_to_string(type));
return create_shader(ctx, type);
}
@@ -1331,7 +1432,7 @@ void GLAPIENTRY
_mesa_GetObjectParameterfvARB(GLhandleARB object, GLenum pname,
GLfloat *params)
{
- GLint iparams[1]; /* XXX is one element enough? */
+ GLint iparams[1] = {0}; /* XXX is one element enough? */
_mesa_GetObjectParameterivARB(object, pname, iparams);
params[0] = (GLfloat) iparams[0];
}
@@ -1460,7 +1561,7 @@ read_shader(const char *fname)
*/
void GLAPIENTRY
_mesa_ShaderSource(GLhandleARB shaderObj, GLsizei count,
- const GLcharARB * const * string, const GLint * length)
+ const GLcharARB * const * string, const GLint * length)
{
GET_CURRENT_CONTEXT(ctx);
GLint *offsets;
@@ -1692,12 +1793,23 @@ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat,
const void* binary, GLint length)
{
GET_CURRENT_CONTEXT(ctx);
- (void) n;
(void) shaders;
(void) binaryformat;
(void) binary;
- (void) length;
- _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderBinary");
+
+ /* Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1, and
+ * page 88 of the OpenGL 4.5 specs state:
+ *
+ * "An INVALID_VALUE error is generated if count or length is negative.
+ * An INVALID_ENUM error is generated if binaryformat is not a supported
+ * format returned in SHADER_BINARY_FORMATS."
+ */
+ if (n < 0 || length < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glShaderBinary(count or length < 0)");
+ return;
+ }
+
+ _mesa_error(ctx, GL_INVALID_ENUM, "glShaderBinary(format)");
}
@@ -1857,7 +1969,7 @@ _mesa_ProgramParameteri(GLuint program, GLenum pname, GLint value)
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteri(pname=%s)",
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return;
}
@@ -1865,7 +1977,7 @@ invalid_value:
_mesa_error(ctx, GL_INVALID_VALUE,
"glProgramParameteri(pname=%s, value=%d): "
"value must be 0 or 1.",
- _mesa_lookup_enum_by_nr(pname),
+ _mesa_enum_to_string(pname),
value);
}
@@ -1885,7 +1997,8 @@ _mesa_use_shader_program(struct gl_context *ctx, GLenum type,
static GLuint
_mesa_create_shader_program(struct gl_context* ctx, GLboolean separate,
- GLenum type, GLsizei count, const GLchar* const *strings)
+ GLenum type, GLsizei count,
+ const GLchar* const *strings)
{
const GLuint shader = create_shader(ctx, type);
GLuint program = 0;
@@ -1920,8 +2033,8 @@ _mesa_create_shader_program(struct gl_context* ctx, GLboolean separate,
}
#endif
}
-
- ralloc_strcat(&shProg->InfoLog, sh->InfoLog);
+ if (sh->InfoLog)
+ ralloc_strcat(&shProg->InfoLog, sh->InfoLog);
}
delete_shader(ctx, shader);
@@ -1944,6 +2057,22 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
case MESA_SHADER_VERTEX:
dst->UsesClipDistanceOut = src->Vert.UsesClipDistance;
break;
+ case MESA_SHADER_TESS_CTRL: {
+ struct gl_tess_ctrl_program *dst_tcp =
+ (struct gl_tess_ctrl_program *) dst;
+ dst_tcp->VerticesOut = src->TessCtrl.VerticesOut;
+ break;
+ }
+ case MESA_SHADER_TESS_EVAL: {
+ struct gl_tess_eval_program *dst_tep =
+ (struct gl_tess_eval_program *) dst;
+ dst_tep->PrimitiveMode = src->TessEval.PrimitiveMode;
+ dst_tep->Spacing = src->TessEval.Spacing;
+ dst_tep->VertexOrder = src->TessEval.VertexOrder;
+ dst_tep->PointMode = src->TessEval.PointMode;
+ dst->UsesClipDistanceOut = src->TessEval.UsesClipDistance;
+ break;
+ }
case MESA_SHADER_GEOMETRY: {
struct gl_geometry_program *dst_gp = (struct gl_geometry_program *) dst;
dst_gp->VerticesIn = src->Geom.VerticesIn;
@@ -1954,20 +2083,20 @@ _mesa_copy_linked_program_data(gl_shader_stage type,
dst->UsesClipDistanceOut = src->Geom.UsesClipDistance;
dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
dst_gp->UsesStreams = src->Geom.UsesStreams;
- }
break;
+ }
case MESA_SHADER_FRAGMENT: {
struct gl_fragment_program *dst_fp = (struct gl_fragment_program *) dst;
dst_fp->FragDepthLayout = src->FragDepthLayout;
- }
break;
+ }
case MESA_SHADER_COMPUTE: {
struct gl_compute_program *dst_cp = (struct gl_compute_program *) dst;
int i;
for (i = 0; i < 3; i++)
dst_cp->LocalSize[i] = src->Comp.LocalSize[i];
- }
break;
+ }
default:
break;
}
@@ -1984,3 +2113,568 @@ _mesa_CreateShaderProgramv(GLenum type, GLsizei count,
return _mesa_create_shader_program(ctx, GL_TRUE, type, count, strings);
}
+
+
+/**
+ * For GL_ARB_tessellation_shader
+ */
+extern void GLAPIENTRY
+_mesa_PatchParameteri(GLenum pname, GLint value)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (!_mesa_has_tessellation(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glPatchParameteri");
+ return;
+ }
+
+ if (pname != GL_PATCH_VERTICES) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glPatchParameteri");
+ return;
+ }
+
+ if (value <= 0 || value > ctx->Const.MaxPatchVertices) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glPatchParameteri");
+ return;
+ }
+
+ ctx->TessCtrlProgram.patch_vertices = value;
+}
+
+
+extern void GLAPIENTRY
+_mesa_PatchParameterfv(GLenum pname, const GLfloat *values)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (!_mesa_has_tessellation(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glPatchParameterfv");
+ return;
+ }
+
+ switch(pname) {
+ case GL_PATCH_DEFAULT_OUTER_LEVEL:
+ FLUSH_VERTICES(ctx, 0);
+ memcpy(ctx->TessCtrlProgram.patch_default_outer_level, values,
+ 4 * sizeof(GLfloat));
+ ctx->NewDriverState |= ctx->DriverFlags.NewDefaultTessLevels;
+ return;
+ case GL_PATCH_DEFAULT_INNER_LEVEL:
+ FLUSH_VERTICES(ctx, 0);
+ memcpy(ctx->TessCtrlProgram.patch_default_inner_level, values,
+ 2 * sizeof(GLfloat));
+ ctx->NewDriverState |= ctx->DriverFlags.NewDefaultTessLevels;
+ return;
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM, "glPatchParameterfv");
+ return;
+ }
+}
+
+/**
+ * ARB_shader_subroutine
+ */
+GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+ const GLchar *name)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetSubroutineUniformLocation";
+ struct gl_shader_program *shProg;
+ GLenum resource_type;
+ gl_shader_stage stage;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+ if (!shProg)
+ return -1;
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ if (!shProg->_LinkedShaders[stage]) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+ return _mesa_program_resource_location(shProg, resource_type, name);
+}
+
+GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+ const GLchar *name)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetSubroutineIndex";
+ struct gl_shader_program *shProg;
+ struct gl_program_resource *res;
+ GLenum resource_type;
+ gl_shader_stage stage;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+ if (!shProg)
+ return -1;
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ if (!shProg->_LinkedShaders[stage]) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ resource_type = _mesa_shader_stage_to_subroutine(stage);
+ res = _mesa_program_resource_find_name(shProg, resource_type, name, NULL);
+ if (!res) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return -1;
+ }
+
+ return _mesa_program_resource_index(shProg, res);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+ GLuint index, GLenum pname, GLint *values)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetActiveSubroutineUniformiv";
+ struct gl_shader_program *shProg;
+ struct gl_shader *sh;
+ gl_shader_stage stage;
+ struct gl_program_resource *res;
+ const struct gl_uniform_storage *uni;
+ GLenum resource_type;
+ int count, i, j;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+ if (!shProg)
+ return;
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+
+ sh = shProg->_LinkedShaders[stage];
+ if (!sh) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ switch (pname) {
+ case GL_NUM_COMPATIBLE_SUBROUTINES: {
+ res = _mesa_program_resource_find_index(shProg, resource_type, index);
+ if (res) {
+ uni = res->Data;
+ values[0] = uni->num_compatible_subroutines;
+ }
+ break;
+ }
+ case GL_COMPATIBLE_SUBROUTINES: {
+ res = _mesa_program_resource_find_index(shProg, resource_type, index);
+ if (res) {
+ uni = res->Data;
+ count = 0;
+ for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+ struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+ for (j = 0; j < fn->num_compat_types; j++) {
+ if (fn->types[j] == uni->type) {
+ values[count++] = i;
+ break;
+ }
+ }
+ }
+ }
+ break;
+ }
+ case GL_UNIFORM_SIZE:
+ res = _mesa_program_resource_find_index(shProg, resource_type, index);
+ if (res) {
+ uni = res->Data;
+ values[0] = uni->array_elements ? uni->array_elements : 1;
+ }
+ break;
+ case GL_UNIFORM_NAME_LENGTH:
+ res = _mesa_program_resource_find_index(shProg, resource_type, index);
+ if (res) {
+ values[0] = strlen(_mesa_program_resource_name(res)) + 1
+ + ((_mesa_program_resource_array_size(res) != 0) ? 3 : 0);;
+ }
+ break;
+ default:
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetActiveSubroutineUniformName";
+ struct gl_shader_program *shProg;
+ GLenum resource_type;
+ gl_shader_stage stage;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+ if (!shProg)
+ return;
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ if (!shProg->_LinkedShaders[stage]) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+ /* get program resource name */
+ _mesa_get_program_resource_name(shProg, resource_type,
+ index, bufsize,
+ length, name, api_name);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetActiveSubroutineName";
+ struct gl_shader_program *shProg;
+ GLenum resource_type;
+ gl_shader_stage stage;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+ if (!shProg)
+ return;
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ if (!shProg->_LinkedShaders[stage]) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+ resource_type = _mesa_shader_stage_to_subroutine(stage);
+ _mesa_get_program_resource_name(shProg, resource_type,
+ index, bufsize,
+ length, name, api_name);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+ const GLuint *indices)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glUniformSubroutinesuiv";
+ struct gl_shader_program *shProg;
+ struct gl_shader *sh;
+ gl_shader_stage stage;
+ int i;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ shProg = ctx->_Shader->CurrentProgram[stage];
+ if (!shProg) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ sh = shProg->_LinkedShaders[stage];
+ if (!sh) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (count != sh->NumSubroutineUniformRemapTable) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name);
+ return;
+ }
+
+ i = 0;
+ do {
+ struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i];
+ int uni_count = uni->array_elements ? uni->array_elements : 1;
+ int j, k;
+
+ for (j = i; j < i + uni_count; j++) {
+ struct gl_subroutine_function *subfn;
+ if (indices[j] >= sh->NumSubroutineFunctions) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name);
+ return;
+ }
+
+ subfn = &sh->SubroutineFunctions[indices[j]];
+ for (k = 0; k < subfn->num_compat_types; k++) {
+ if (subfn->types[k] == uni->type)
+ break;
+ }
+ if (k == subfn->num_compat_types) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+ }
+ i += uni_count;
+ } while(i < count);
+
+ FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
+ i = 0;
+ do {
+ struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i];
+ int uni_count = uni->array_elements ? uni->array_elements : 1;
+
+ memcpy(&uni->storage[0], &indices[i],
+ sizeof(GLuint) * uni_count);
+
+ uni->initialized = true;
+ _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count);
+ i += uni_count;
+ } while(i < count);
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+ GLuint *params)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetUniformSubroutineuiv";
+ struct gl_shader_program *shProg;
+ struct gl_shader *sh;
+ gl_shader_stage stage;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ shProg = ctx->_Shader->CurrentProgram[stage];
+ if (!shProg) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ sh = shProg->_LinkedShaders[stage];
+ if (!sh) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (location >= sh->NumSubroutineUniformRemapTable) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s", api_name);
+ return;
+ }
+
+ {
+ struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[location];
+ int offset = location - uni->subroutine[stage].index;
+ memcpy(params, &uni->storage[offset],
+ sizeof(GLuint));
+ }
+}
+
+
+GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+ GLenum pname, GLint *values)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ const char *api_name = "glGetProgramStageiv";
+ struct gl_shader_program *shProg;
+ struct gl_shader *sh;
+ gl_shader_stage stage;
+
+ if (!_mesa_has_shader_subroutine(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ if (!_mesa_validate_shader_target(ctx, shadertype)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ shProg = _mesa_lookup_shader_program_err(ctx, program, api_name);
+ if (!shProg)
+ return;
+
+ stage = _mesa_shader_enum_to_shader_stage(shadertype);
+ sh = shProg->_LinkedShaders[stage];
+ if (!sh) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s", api_name);
+ return;
+ }
+
+ switch (pname) {
+ case GL_ACTIVE_SUBROUTINES:
+ values[0] = sh->NumSubroutineFunctions;
+ break;
+ case GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS:
+ values[0] = sh->NumSubroutineUniformRemapTable;
+ break;
+ case GL_ACTIVE_SUBROUTINE_UNIFORMS:
+ values[0] = sh->NumSubroutineUniformTypes;
+ break;
+ case GL_ACTIVE_SUBROUTINE_MAX_LENGTH:
+ {
+ unsigned i;
+ GLint max_len = 0;
+ GLenum resource_type;
+ struct gl_program_resource *res;
+
+ resource_type = _mesa_shader_stage_to_subroutine(stage);
+ for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+ res = _mesa_program_resource_find_index(shProg, resource_type, i);
+ if (res) {
+ const GLint len = strlen(_mesa_program_resource_name(res)) + 1;
+ if (len > max_len)
+ max_len = len;
+ }
+ }
+ values[0] = max_len;
+ break;
+ }
+ case GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH:
+ {
+ unsigned i;
+ GLint max_len = 0;
+ GLenum resource_type;
+ struct gl_program_resource *res;
+
+ resource_type = _mesa_shader_stage_to_subroutine_uniform(stage);
+ for (i = 0; i < sh->NumSubroutineUniformRemapTable; i++) {
+ res = _mesa_program_resource_find_index(shProg, resource_type, i);
+ if (res) {
+ const GLint len = strlen(_mesa_program_resource_name(res)) + 1
+ + ((_mesa_program_resource_array_size(res) != 0) ? 3 : 0);
+
+ if (len > max_len)
+ max_len = len;
+ }
+ }
+ values[0] = max_len;
+ break;
+ }
+ default:
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s", api_name);
+ values[0] = -1;
+ break;
+ }
+}
+
+static int
+find_compat_subroutine(struct gl_shader *sh, const struct glsl_type *type)
+{
+ int i, j;
+
+ for (i = 0; i < sh->NumSubroutineFunctions; i++) {
+ struct gl_subroutine_function *fn = &sh->SubroutineFunctions[i];
+ for (j = 0; j < fn->num_compat_types; j++) {
+ if (fn->types[j] == type)
+ return i;
+ }
+ }
+ return 0;
+}
+
+static void
+_mesa_shader_init_subroutine_defaults(struct gl_shader *sh)
+{
+ int i, j;
+
+ for (i = 0; i < sh->NumSubroutineUniformRemapTable; i++) {
+ struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[i];
+ int uni_count;
+ int val;
+
+ if (!uni)
+ continue;
+ uni_count = uni->array_elements ? uni->array_elements : 1;
+ val = find_compat_subroutine(sh, uni->type);
+
+ for (j = 0; j < uni_count; j++)
+ memcpy(&uni->storage[j], &val, sizeof(int));
+ uni->initialized = true;
+ _mesa_propagate_uniforms_to_driver_storage(uni, 0, uni_count);
+ }
+}
+
+void
+_mesa_shader_program_init_subroutine_defaults(struct gl_shader_program *shProg)
+{
+ int i;
+
+ if (!shProg)
+ return;
+
+ for (i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!shProg->_LinkedShaders[i])
+ continue;
+
+ _mesa_shader_init_subroutine_defaults(shProg->_LinkedShaders[i]);
+ }
+}
diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h
index aba6d5d8306..0a10191684f 100644
--- a/src/mesa/main/shaderapi.h
+++ b/src/mesa/main/shaderapi.h
@@ -232,7 +232,8 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
extern struct gl_program_resource *
_mesa_program_resource_find_name(struct gl_shader_program *shProg,
- GLenum programInterface, const char *name);
+ GLenum programInterface, const char *name,
+ unsigned *array_index);
extern struct gl_program_resource *
_mesa_program_resource_find_index(struct gl_shader_program *shProg,
@@ -264,6 +265,51 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg,
GLsizei bufSize, GLsizei *length,
GLint *params);
+/* GL_ARB_tessellation_shader */
+extern void GLAPIENTRY
+_mesa_PatchParameteri(GLenum pname, GLint value);
+
+extern void GLAPIENTRY
+_mesa_PatchParameterfv(GLenum pname, const GLfloat *values);
+
+/* GL_ARB_shader_subroutine */
+void
+_mesa_shader_program_init_subroutine_defaults(struct gl_shader_program *shProg);
+
+extern GLint GLAPIENTRY
+_mesa_GetSubroutineUniformLocation(GLuint program, GLenum shadertype,
+ const GLchar *name);
+
+extern GLuint GLAPIENTRY
+_mesa_GetSubroutineIndex(GLuint program, GLenum shadertype,
+ const GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformiv(GLuint program, GLenum shadertype,
+ GLuint index, GLenum pname, GLint *values);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineUniformName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetActiveSubroutineName(GLuint program, GLenum shadertype,
+ GLuint index, GLsizei bufsize,
+ GLsizei *length, GLchar *name);
+
+extern GLvoid GLAPIENTRY
+_mesa_UniformSubroutinesuiv(GLenum shadertype, GLsizei count,
+ const GLuint *indices);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetUniformSubroutineuiv(GLenum shadertype, GLint location,
+ GLuint *params);
+
+extern GLvoid GLAPIENTRY
+_mesa_GetProgramStageiv(GLuint program, GLenum shadertype,
+ GLenum pname, GLint *values);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index 80b77275f93..a348cdb0405 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -610,7 +610,7 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
"glBindImageTextures(the internal format %s of "
"the level zero texture image of textures[%d]=%u "
"is not supported)",
- _mesa_lookup_enum_by_nr(tex_format),
+ _mesa_enum_to_string(tex_format),
i, texture);
continue;
}
diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
index 3d696a1887e..943044e37cd 100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@ -111,6 +111,10 @@ _mesa_shader_enum_to_shader_stage(GLenum v)
return MESA_SHADER_FRAGMENT;
case GL_GEOMETRY_SHADER:
return MESA_SHADER_GEOMETRY;
+ case GL_TESS_CONTROL_SHADER:
+ return MESA_SHADER_TESS_CTRL;
+ case GL_TESS_EVALUATION_SHADER:
+ return MESA_SHADER_TESS_EVAL;
case GL_COMPUTE_SHADER:
return MESA_SHADER_COMPUTE;
default:
@@ -119,6 +123,107 @@ _mesa_shader_enum_to_shader_stage(GLenum v)
}
}
+/* 8 bytes + another underscore */
+#define MESA_SUBROUTINE_PREFIX_LEN 9
+static inline const char *
+_mesa_shader_stage_to_subroutine_prefix(gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ return "__subu_v";
+ case MESA_SHADER_GEOMETRY:
+ return "__subu_g";
+ case MESA_SHADER_FRAGMENT:
+ return "__subu_f";
+ case MESA_SHADER_COMPUTE:
+ return "__subu_c";
+ case MESA_SHADER_TESS_CTRL:
+ return "__subu_t";
+ case MESA_SHADER_TESS_EVAL:
+ return "__subu_e";
+ default:
+ return NULL;
+ }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine_uniform(GLenum subuniform)
+{
+ switch (subuniform) {
+ default:
+ case GL_VERTEX_SUBROUTINE_UNIFORM:
+ return MESA_SHADER_VERTEX;
+ case GL_GEOMETRY_SUBROUTINE_UNIFORM:
+ return MESA_SHADER_GEOMETRY;
+ case GL_FRAGMENT_SUBROUTINE_UNIFORM:
+ return MESA_SHADER_FRAGMENT;
+ case GL_COMPUTE_SUBROUTINE_UNIFORM:
+ return MESA_SHADER_COMPUTE;
+ case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
+ return MESA_SHADER_TESS_CTRL;
+ case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
+ return MESA_SHADER_TESS_EVAL;
+ }
+}
+
+static inline gl_shader_stage
+_mesa_shader_stage_from_subroutine(GLenum subroutine)
+{
+ switch (subroutine) {
+ case GL_VERTEX_SUBROUTINE:
+ return MESA_SHADER_VERTEX;
+ case GL_GEOMETRY_SUBROUTINE:
+ return MESA_SHADER_GEOMETRY;
+ case GL_FRAGMENT_SUBROUTINE:
+ return MESA_SHADER_FRAGMENT;
+ case GL_COMPUTE_SUBROUTINE:
+ return MESA_SHADER_COMPUTE;
+ case GL_TESS_CONTROL_SUBROUTINE:
+ return MESA_SHADER_TESS_CTRL;
+ case GL_TESS_EVALUATION_SUBROUTINE:
+ return MESA_SHADER_TESS_EVAL;
+ }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine(gl_shader_stage stage)
+{
+ switch (stage) {
+ default:
+ case MESA_SHADER_VERTEX:
+ return GL_VERTEX_SUBROUTINE;
+ case MESA_SHADER_GEOMETRY:
+ return GL_GEOMETRY_SUBROUTINE;
+ case MESA_SHADER_FRAGMENT:
+ return GL_FRAGMENT_SUBROUTINE;
+ case MESA_SHADER_COMPUTE:
+ return GL_COMPUTE_SUBROUTINE;
+ case MESA_SHADER_TESS_CTRL:
+ return GL_TESS_CONTROL_SUBROUTINE;
+ case MESA_SHADER_TESS_EVAL:
+ return GL_TESS_EVALUATION_SUBROUTINE;
+ }
+}
+
+static inline GLenum
+_mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage)
+{
+ switch (stage) {
+ default:
+ case MESA_SHADER_VERTEX:
+ return GL_VERTEX_SUBROUTINE_UNIFORM;
+ case MESA_SHADER_GEOMETRY:
+ return GL_GEOMETRY_SUBROUTINE_UNIFORM;
+ case MESA_SHADER_FRAGMENT:
+ return GL_FRAGMENT_SUBROUTINE_UNIFORM;
+ case MESA_SHADER_COMPUTE:
+ return GL_COMPUTE_SUBROUTINE_UNIFORM;
+ case MESA_SHADER_TESS_CTRL:
+ return GL_TESS_CONTROL_SUBROUTINE_UNIFORM;
+ case MESA_SHADER_TESS_EVAL:
+ return GL_TESS_EVALUATION_SUBROUTINE_UNIFORM;
+ }
+}
#ifdef __cplusplus
}
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index bede7fe1d0e..d3b1c72b08d 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -79,8 +79,8 @@ update_program_enables(struct gl_context *ctx)
/**
- * Update the ctx->Vertex/Geometry/FragmentProgram._Current pointers to point
- * to the current/active programs. Then call ctx->Driver.BindProgram() to
+ * Update the ctx->*Program._Current pointers to point to the
+ * current/active programs. Then call ctx->Driver.BindProgram() to
* tell the driver which programs to use.
*
* Programs may come from 3 sources: GLSL shaders, ARB/NV_vertex/fragment
@@ -97,6 +97,10 @@ update_program(struct gl_context *ctx)
{
const struct gl_shader_program *vsProg =
ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+ const struct gl_shader_program *tcsProg =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+ const struct gl_shader_program *tesProg =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
const struct gl_shader_program *gsProg =
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
struct gl_shader_program *fsProg =
@@ -106,6 +110,8 @@ update_program(struct gl_context *ctx)
const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
+ const struct gl_tess_ctrl_program *prevTCP = ctx->TessCtrlProgram._Current;
+ const struct gl_tess_eval_program *prevTEP = ctx->TessEvalProgram._Current;
const struct gl_compute_program *prevCP = ctx->ComputeProgram._Current;
GLbitfield new_state = 0x0;
@@ -175,6 +181,30 @@ update_program(struct gl_context *ctx)
_mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
}
+ if (tesProg && tesProg->LinkStatus
+ && tesProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]) {
+ /* Use GLSL tessellation evaluation shader */
+ _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current,
+ gl_tess_eval_program(
+ tesProg->_LinkedShaders[MESA_SHADER_TESS_EVAL]->Program));
+ }
+ else {
+ /* No tessellation evaluation program */
+ _mesa_reference_tesseprog(ctx, &ctx->TessEvalProgram._Current, NULL);
+ }
+
+ if (tcsProg && tcsProg->LinkStatus
+ && tcsProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
+ /* Use GLSL tessellation control shader */
+ _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current,
+ gl_tess_ctrl_program(
+ tcsProg->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program));
+ }
+ else {
+ /* No tessellation control program */
+ _mesa_reference_tesscprog(ctx, &ctx->TessCtrlProgram._Current, NULL);
+ }
+
/* Examine vertex program after fragment program as
* _mesa_get_fixed_func_vertex_program() needs to know active
* fragprog inputs.
@@ -230,6 +260,22 @@ update_program(struct gl_context *ctx)
}
}
+ if (ctx->TessEvalProgram._Current != prevTEP) {
+ new_state |= _NEW_PROGRAM;
+ if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, GL_TESS_EVALUATION_PROGRAM_NV,
+ (struct gl_program *) ctx->TessEvalProgram._Current);
+ }
+ }
+
+ if (ctx->TessCtrlProgram._Current != prevTCP) {
+ new_state |= _NEW_PROGRAM;
+ if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, GL_TESS_CONTROL_PROGRAM_NV,
+ (struct gl_program *) ctx->TessCtrlProgram._Current);
+ }
+ }
+
if (ctx->VertexProgram._Current != prevVP) {
new_state |= _NEW_PROGRAM;
if (ctx->Driver.BindProgram) {
@@ -266,8 +312,8 @@ update_program_constants(struct gl_context *ctx)
}
}
- /* Don't handle geometry shaders here. They don't use any state
- * constants.
+ /* Don't handle tessellation and geometry shaders here. They don't use
+ * any state constants.
*/
if (ctx->VertexProgram._Current) {
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index 800720b798e..af89d2c1cfb 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -563,6 +563,8 @@ const struct function common_desktop_functions_possible[] = {
/* GL 4.0 */
{ "glMinSampleShading", 40, -1 },
+ { "glPatchParameteri", 40, -1 },
+ { "glPatchParameterfv", 40, -1 },
{ "glBlendEquationi", 40, -1 },
{ "glBlendEquationSeparatei", 40, -1 },
{ "glBlendFunci", 40, -1 },
@@ -930,6 +932,11 @@ const struct function common_desktop_functions_possible[] = {
/* GL_EXT_polygon_offset_clamp */
{ "glPolygonOffsetClampEXT", 11, -1 },
+
+ /* GL_ARB_get_texture_sub_image */
+ { "glGetTextureSubImage", 20, -1 },
+ { "glGetCompressedTextureSubImage", 20, -1 },
+
{ NULL, 0, -1 }
};
@@ -1424,6 +1431,16 @@ const struct function gl_core_functions_possible[] = {
/* GL 3.2 */
{ "glFramebufferTexture", 32, -1 },
+ /* GL 4.0 */
+ { "glGetSubroutineUniformLocation", 40, -1 },
+ { "glGetSubroutineIndex", 40, -1 },
+ { "glGetActiveSubroutineUniformiv", 40, -1 },
+ { "glGetActiveSubroutineUniformName", 40, -1 },
+ { "glGetActiveSubroutineName", 40, -1 },
+ { "glUniformSubroutinesuiv", 40, -1 },
+ { "glGetUniformSubroutineuiv", 40, -1 },
+ { "glGetProgramStageiv", 40, -1 },
+
/* GL 4.3 */
{ "glIsRenderbuffer", 43, -1 },
{ "glBindRenderbuffer", 43, -1 },
@@ -1562,16 +1579,6 @@ const struct function gl_core_functions_possible[] = {
{ "glUniformMatrix4x2dv", 40, -1 },
{ "glUniformMatrix4x3dv", 40, -1 },
{ "glGetUniformdv", 43, -1 },
-// { "glGetSubroutineUniformLocation", 43, -1 }, // XXX: Add to xml
-// { "glGetSubroutineIndex", 43, -1 }, // XXX: Add to xml
-// { "glGetActiveSubroutineUniformiv", 43, -1 }, // XXX: Add to xml
-// { "glGetActiveSubroutineUniformName", 43, -1 }, // XXX: Add to xml
-// { "glGetActiveSubroutineName", 43, -1 }, // XXX: Add to xml
-// { "glUniformSubroutinesuiv", 43, -1 }, // XXX: Add to xml
-// { "glGetUniformSubroutineuiv", 43, -1 }, // XXX: Add to xml
-// { "glGetProgramStageiv", 43, -1 }, // XXX: Add to xml
-// { "glPatchParameteri", 43, -1 }, // XXX: Add to xml
-// { "glPatchParameterfv", 43, -1 }, // XXX: Add to xml
{ "glBindTransformFeedback", 43, -1 },
{ "glDeleteTransformFeedbacks", 43, -1 },
diff --git a/src/mesa/main/tests/enum_strings.cpp b/src/mesa/main/tests/enum_strings.cpp
index dc5fe751a86..8218cc9a685 100644
--- a/src/mesa/main/tests/enum_strings.cpp
+++ b/src/mesa/main/tests/enum_strings.cpp
@@ -39,13 +39,13 @@ TEST(EnumStrings, LookUpByNumber)
{
for (unsigned i = 0; everything[i].name != NULL; i++) {
EXPECT_STREQ(everything[i].name,
- _mesa_lookup_enum_by_nr(everything[i].value));
+ _mesa_enum_to_string(everything[i].value));
}
}
TEST(EnumStrings, LookUpUnknownNumber)
{
- EXPECT_STRCASEEQ("0xEEEE", _mesa_lookup_enum_by_nr(0xEEEE));
+ EXPECT_STRCASEEQ("0xEEEE", _mesa_enum_to_string(0xEEEE));
}
/* Please type the name and the value. This makes it easier to detect
@@ -1731,6 +1731,10 @@ const struct enum_info everything[] = {
{ 0x8DDF, "GL_MAX_GEOMETRY_UNIFORM_COMPONENTS" },
{ 0x8DE0, "GL_MAX_GEOMETRY_OUTPUT_VERTICES" },
{ 0x8DE1, "GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS" },
+ { 0x8DE5, "GL_ACTIVE_SUBROUTINES" },
+ { 0x8DE6, "GL_ACTIVE_SUBROUTINE_UNIFORMS" },
+ { 0x8DE7, "GL_MAX_SUBROUTINES" },
+ { 0x8DE8, "GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS" },
{ 0x8DF0, "GL_LOW_FLOAT" },
{ 0x8DF1, "GL_MEDIUM_FLOAT" },
{ 0x8DF2, "GL_HIGH_FLOAT" },
@@ -1759,6 +1763,11 @@ const struct enum_info everything[] = {
{ 0x8E44, "GL_TEXTURE_SWIZZLE_B" },
{ 0x8E45, "GL_TEXTURE_SWIZZLE_A" },
{ 0x8E46, "GL_TEXTURE_SWIZZLE_RGBA" },
+ { 0x8E47, "GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS" },
+ { 0x8E48, "GL_ACTIVE_SUBROUTINE_MAX_LENGTH" },
+ { 0x8E49, "GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH" },
+ { 0x8E4A, "GL_NUM_COMPATIBLE_SUBROUTINES" },
+ { 0x8E4B, "GL_COMPATIBLE_SUBROUTINES" },
{ 0x8E4C, "GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION" },
{ 0x8E4D, "GL_FIRST_VERTEX_CONVENTION" },
{ 0x8E4E, "GL_LAST_VERTEX_CONVENTION" },
diff --git a/src/mesa/main/texenv.c b/src/mesa/main/texenv.c
index 3edafc0f776..091922161c5 100644
--- a/src/mesa/main/texenv.c
+++ b/src/mesa/main/texenv.c
@@ -42,7 +42,7 @@
#define TE_ERROR(errCode, msg, value) \
- _mesa_error(ctx, errCode, msg, _mesa_lookup_enum_by_nr(value));
+ _mesa_error(ctx, errCode, msg, _mesa_enum_to_string(value));
/** Set texture env mode */
@@ -482,16 +482,16 @@ _mesa_TexEnvfv( GLenum target, GLenum pname, const GLfloat *param )
}
else {
_mesa_error(ctx, GL_INVALID_ENUM, "glTexEnv(target=%s)",
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
if (MESA_VERBOSE&(VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glTexEnv %s %s %.1f(%s) ...\n",
- _mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(pname),
+ _mesa_enum_to_string(target),
+ _mesa_enum_to_string(pname),
*param,
- _mesa_lookup_enum_by_nr((GLenum) iparam0));
+ _mesa_enum_to_string((GLenum) iparam0));
/* Tell device driver about the new texture environment */
if (ctx->Driver.TexEnv) {
diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c
index 3c4baca7026..f4d17e1bdb5 100644
--- a/src/mesa/main/texformat.c
+++ b/src/mesa/main/texformat.c
@@ -847,7 +847,7 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
}
_mesa_problem(ctx, "unexpected format %s in _mesa_choose_tex_format()",
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(internalFormat));
return MESA_FORMAT_NONE;
}
diff --git a/src/mesa/main/texgen.c b/src/mesa/main/texgen.c
index 41e428b69e7..24ba295746a 100644
--- a/src/mesa/main/texgen.c
+++ b/src/mesa/main/texgen.c
@@ -76,10 +76,10 @@ _mesa_TexGenfv( GLenum coord, GLenum pname, const GLfloat *params )
if (MESA_VERBOSE&(VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glTexGen %s %s %.1f(%s)...\n",
- _mesa_lookup_enum_by_nr(coord),
- _mesa_lookup_enum_by_nr(pname),
+ _mesa_enum_to_string(coord),
+ _mesa_enum_to_string(pname),
*params,
- _mesa_lookup_enum_by_nr((GLenum) (GLint) *params));
+ _mesa_enum_to_string((GLenum) (GLint) *params));
if (ctx->Texture.CurrentUnit >= ctx->Const.MaxTextureCoordUnits) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glTexGen(current unit)");
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 92b4d6795c6..c0ccce3d50e 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -75,12 +75,11 @@ type_needs_clamping(GLenum type)
*/
static void
get_tex_depth(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
- const GLint width = texImage->Width;
- GLint height = texImage->Height;
- GLint depth = texImage->Depth;
GLint img, row;
GLfloat *depthRow = malloc(width * sizeof(GLfloat));
@@ -94,14 +93,15 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions,
height = 1;
}
+ assert(zoffset + depth <= texImage->Depth);
for (img = 0; img < depth; img++) {
GLubyte *srcMap;
GLint srcRowStride;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, img,
- 0, 0, width, height, GL_MAP_READ_BIT,
- &srcMap, &srcRowStride);
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+ xoffset, yoffset, width, height,
+ GL_MAP_READ_BIT, &srcMap, &srcRowStride);
if (srcMap) {
for (row = 0; row < height; row++) {
@@ -113,7 +113,7 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions,
_mesa_pack_depth_span(ctx, width, dest, type, depthRow, &ctx->Pack);
}
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
}
else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -130,26 +130,26 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions,
*/
static void
get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
- const GLint width = texImage->Width;
- const GLint height = texImage->Height;
- const GLint depth = texImage->Depth;
GLint img, row;
assert(format == GL_DEPTH_STENCIL);
assert(type == GL_UNSIGNED_INT_24_8 ||
type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
+ assert(zoffset + depth <= texImage->Depth);
for (img = 0; img < depth; img++) {
GLubyte *srcMap;
GLint rowstride;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, img,
- 0, 0, width, height, GL_MAP_READ_BIT,
- &srcMap, &rowstride);
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+ xoffset, yoffset, width, height,
+ GL_MAP_READ_BIT, &srcMap, &rowstride);
if (srcMap) {
for (row = 0; row < height; row++) {
@@ -166,7 +166,7 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions,
}
}
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
}
else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -180,12 +180,11 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions,
*/
static void
get_tex_stencil(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
- const GLint width = texImage->Width;
- const GLint height = texImage->Height;
- const GLint depth = texImage->Depth;
GLint img, row;
assert(format == GL_STENCIL_INDEX);
@@ -195,8 +194,9 @@ get_tex_stencil(struct gl_context *ctx, GLuint dimensions,
GLint rowstride;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, img,
- 0, 0, width, height, GL_MAP_READ_BIT,
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+ xoffset, yoffset, width, height,
+ GL_MAP_READ_BIT,
&srcMap, &rowstride);
if (srcMap) {
@@ -211,7 +211,7 @@ get_tex_stencil(struct gl_context *ctx, GLuint dimensions,
dest);
}
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
}
else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -226,22 +226,22 @@ get_tex_stencil(struct gl_context *ctx, GLuint dimensions,
*/
static void
get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
- const GLint width = texImage->Width;
- const GLint height = texImage->Height;
- const GLint depth = texImage->Depth;
GLint img, row;
+ assert(zoffset + depth <= texImage->Depth);
for (img = 0; img < depth; img++) {
GLubyte *srcMap;
GLint rowstride;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, img,
- 0, 0, width, height, GL_MAP_READ_BIT,
- &srcMap, &rowstride);
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+ xoffset, yoffset, width, height,
+ GL_MAP_READ_BIT, &srcMap, &rowstride);
if (srcMap) {
for (row = 0; row < height; row++) {
@@ -264,7 +264,7 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions,
}
}
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
}
else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -279,6 +279,8 @@ get_tex_ycbcr(struct gl_context *ctx, GLuint dimensions,
*/
static void
get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage,
GLbitfield transferOps)
@@ -287,9 +289,6 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
const mesa_format texFormat =
_mesa_get_srgb_format_linear(texImage->TexFormat);
const GLenum baseFormat = _mesa_get_format_base_format(texFormat);
- const GLuint width = texImage->Width;
- const GLuint height = texImage->Height;
- const GLuint depth = texImage->Depth;
GLfloat *tempImage, *tempSlice;
GLuint slice;
int srcStride, dstStride;
@@ -312,15 +311,15 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
tempSlice = tempImage + slice * 4 * width * height;
- ctx->Driver.MapTextureImage(ctx, texImage, slice,
- 0, 0, width, height,
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + slice,
+ xoffset, yoffset, width, height,
GL_MAP_READ_BIT,
&srcMap, &srcRowStride);
if (srcMap) {
_mesa_decompress_image(texFormat, width, height,
srcMap, srcRowStride, tempSlice);
- ctx->Driver.UnmapTextureImage(ctx, texImage, slice);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + slice);
}
else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -409,6 +408,8 @@ _mesa_base_pack_format(GLenum format)
*/
static void
get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage,
GLbitfield transferOps)
@@ -416,9 +417,6 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
/* don't want to apply sRGB -> RGB conversion here so override the format */
const mesa_format texFormat =
_mesa_get_srgb_format_linear(texImage->TexFormat);
- const GLuint width = texImage->Width;
- GLuint height = texImage->Height;
- GLuint depth = texImage->Depth;
GLuint img;
GLboolean dst_is_integer;
uint32_t dst_format;
@@ -430,6 +428,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
depth = height;
height = 1;
+ zoffset = yoffset;
+ yoffset = 0;
}
/* Depending on the base format involved we may need to apply a rebase
@@ -449,7 +449,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
- } else if (texImage->_BaseFormat != _mesa_get_format_base_format(texFormat)) {
+ } else if (texImage->_BaseFormat !=
+ _mesa_get_format_base_format(texFormat)) {
needsRebase =
_mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat,
rebaseSwizzle);
@@ -480,8 +481,9 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
uint32_t src_format;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, img,
- 0, 0, width, height, GL_MAP_READ_BIT,
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + img,
+ xoffset, yoffset, width, height,
+ GL_MAP_READ_BIT,
&srcMap, &rowstride);
if (!srcMap) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -530,8 +532,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
/* If we had to rebase, we have already handled that */
needsRebase = false;
- /* If we were lucky and our RGBA conversion matches the dst format, then
- * we are done.
+ /* If we were lucky and our RGBA conversion matches the dst format,
+ * then we are done.
*/
if (!need_convert)
goto do_swap;
@@ -568,7 +570,7 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
}
/* Unmap the src texture buffer */
- ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
}
done:
@@ -583,6 +585,8 @@ done:
*/
static void
get_tex_rgba(struct gl_context *ctx, GLuint dimensions,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
@@ -604,11 +608,17 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions,
}
if (_mesa_is_format_compressed(texImage->TexFormat)) {
- get_tex_rgba_compressed(ctx, dimensions, format, type,
+ get_tex_rgba_compressed(ctx, dimensions,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type,
pixels, texImage, transferOps);
}
else {
- get_tex_rgba_uncompressed(ctx, dimensions, format, type,
+ get_tex_rgba_uncompressed(ctx, dimensions,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type,
pixels, texImage, transferOps);
}
}
@@ -619,8 +629,10 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions,
* \return GL_TRUE if done, GL_FALSE otherwise
*/
static GLboolean
-get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
- GLvoid *pixels,
+get_tex_memcpy(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLenum format, GLenum type, GLvoid *pixels,
struct gl_texture_image *texImage)
{
const GLenum target = texImage->TexObject->Target;
@@ -642,20 +654,25 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
ctx->Pack.SwapBytes);
}
+ if (depth > 1) {
+ /* only a single slice is supported at this time */
+ memCopy = FALSE;
+ }
+
if (memCopy) {
const GLuint bpp = _mesa_get_format_bytes(texImage->TexFormat);
- const GLint bytesPerRow = texImage->Width * bpp;
+ const GLint bytesPerRow = width * bpp;
GLubyte *dst =
- _mesa_image_address2d(&ctx->Pack, pixels, texImage->Width,
- texImage->Height, format, type, 0, 0);
+ _mesa_image_address2d(&ctx->Pack, pixels, width, height,
+ format, type, 0, 0);
const GLint dstRowStride =
- _mesa_image_row_stride(&ctx->Pack, texImage->Width, format, type);
+ _mesa_image_row_stride(&ctx->Pack, width, format, type);
GLubyte *src;
GLint srcRowStride;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, 0,
- 0, 0, texImage->Width, texImage->Height,
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset,
+ xoffset, yoffset, width, height,
GL_MAP_READ_BIT, &src, &srcRowStride);
if (src) {
@@ -664,7 +681,7 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
}
else {
GLuint row;
- for (row = 0; row < texImage->Height; row++) {
+ for (row = 0; row < height; row++) {
memcpy(dst, src, bytesPerRow);
dst += dstRowStride;
src += srcRowStride;
@@ -672,7 +689,7 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
}
/* unmap src texture buffer */
- ctx->Driver.UnmapTextureImage(ctx, texImage, 0);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset);
}
else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
@@ -684,15 +701,17 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
/**
- * This is the software fallback for Driver.GetTexImage().
+ * This is the software fallback for Driver.GetTexSubImage().
* All error checking will have been done before this routine is called.
* We'll call ctx->Driver.MapTextureImage() to access the data, then
* unmap with ctx->Driver.UnmapTextureImage().
*/
void
-_mesa_GetTexImage_sw(struct gl_context *ctx,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage)
+_mesa_GetTexSubImage_sw(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLenum format, GLenum type, GLvoid *pixels,
+ struct gl_texture_image *texImage)
{
const GLuint dimensions =
_mesa_get_texture_dimensions(texImage->TexObject->Target);
@@ -720,23 +739,30 @@ _mesa_GetTexImage_sw(struct gl_context *ctx,
pixels = ADD_POINTERS(buf, pixels);
}
- if (get_tex_memcpy(ctx, format, type, pixels, texImage)) {
+ if (get_tex_memcpy(ctx, xoffset, yoffset, zoffset, width, height, depth,
+ format, type, pixels, texImage)) {
/* all done */
}
else if (format == GL_DEPTH_COMPONENT) {
- get_tex_depth(ctx, dimensions, format, type, pixels, texImage);
+ get_tex_depth(ctx, dimensions, xoffset, yoffset, zoffset,
+ width, height, depth, format, type, pixels, texImage);
}
else if (format == GL_DEPTH_STENCIL_EXT) {
- get_tex_depth_stencil(ctx, dimensions, format, type, pixels, texImage);
+ get_tex_depth_stencil(ctx, dimensions, xoffset, yoffset, zoffset,
+ width, height, depth, format, type, pixels,
+ texImage);
}
else if (format == GL_STENCIL_INDEX) {
- get_tex_stencil(ctx, dimensions, format, type, pixels, texImage);
+ get_tex_stencil(ctx, dimensions, xoffset, yoffset, zoffset,
+ width, height, depth, format, type, pixels, texImage);
}
else if (format == GL_YCBCR_MESA) {
- get_tex_ycbcr(ctx, dimensions, format, type, pixels, texImage);
+ get_tex_ycbcr(ctx, dimensions, xoffset, yoffset, zoffset,
+ width, height, depth, format, type, pixels, texImage);
}
else {
- get_tex_rgba(ctx, dimensions, format, type, pixels, texImage);
+ get_tex_rgba(ctx, dimensions, xoffset, yoffset, zoffset,
+ width, height, depth, format, type, pixels, texImage);
}
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
@@ -747,13 +773,16 @@ _mesa_GetTexImage_sw(struct gl_context *ctx,
/**
- * This is the software fallback for Driver.GetCompressedTexImage().
+ * This is the software fallback for Driver.GetCompressedTexSubImage().
* All error checking will have been done before this routine is called.
*/
void
-_mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
- struct gl_texture_image *texImage,
- GLvoid *img)
+_mesa_GetCompressedTexSubImage_sw(struct gl_context *ctx,
+ struct gl_texture_image *texImage,
+ GLint xoffset, GLint yoffset,
+ GLint zoffset, GLsizei width,
+ GLint height, GLint depth,
+ GLvoid *img)
{
const GLuint dimensions =
_mesa_get_texture_dimensions(texImage->TexObject->Target);
@@ -762,10 +791,8 @@ _mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
GLubyte *dest;
_mesa_compute_compressed_pixelstore(dimensions, texImage->TexFormat,
- texImage->Width, texImage->Height,
- texImage->Depth,
- &ctx->Pack,
- &store);
+ width, height, depth,
+ &ctx->Pack, &store);
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
/* pack texture image into a PBO */
@@ -791,8 +818,8 @@ _mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
GLubyte *src;
/* map src texture buffer */
- ctx->Driver.MapTextureImage(ctx, texImage, slice,
- 0, 0, texImage->Width, texImage->Height,
+ ctx->Driver.MapTextureImage(ctx, texImage, zoffset + slice,
+ xoffset, yoffset, width, height,
GL_MAP_READ_BIT, &src, &srcRowStride);
if (src) {
@@ -803,10 +830,11 @@ _mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
src += srcRowStride;
}
- ctx->Driver.UnmapTextureImage(ctx, texImage, slice);
+ ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + slice);
/* Advance to next slice */
- dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice - store.CopyRowsPerSlice);
+ dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice -
+ store.CopyRowsPerSlice);
} else {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetCompresssedTexImage");
@@ -863,29 +891,299 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa)
/**
- * Do error checking for a glGetTex(ture)Image() call.
- * \return GL_TRUE if any error, GL_FALSE if no errors.
+ * Wrapper for _mesa_select_tex_image() which can handle target being
+ * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face.
+ * This can happen for glGetTextureImage and glGetTextureSubImage (DSA
+ * functions).
*/
-static GLboolean
+static struct gl_texture_image *
+select_tex_image(const struct gl_texture_object *texObj, GLenum target,
+ GLint level, GLint zoffset)
+{
+ assert(level >= 0);
+ assert(level < MAX_TEXTURE_LEVELS);
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ assert(zoffset >= 0);
+ assert(zoffset < 6);
+ target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset;
+ }
+ return _mesa_select_tex_image(texObj, target, level);
+}
+
+
+/**
+ * Error-check the offset and size arguments to
+ * glGet[Compressed]TextureSubImage(). Also checks if the specified
+ * texture image is missing.
+ * \return true if error, false if no error.
+ */
+static bool
+dimensions_error_check(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ const char *caller)
+{
+ const struct gl_texture_image *texImage;
+ int i;
+
+ if (xoffset < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset);
+ return true;
+ }
+
+ if (yoffset < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(yoffset = %d)", caller, yoffset);
+ return true;
+ }
+
+ if (zoffset < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(zoffset = %d)", caller, zoffset);
+ return true;
+ }
+
+ if (width < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(width = %d)", caller, width);
+ return true;
+ }
+
+ if (height < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(height = %d)", caller, height);
+ return true;
+ }
+
+ if (depth < 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(depth = %d)", caller, depth);
+ return true;
+ }
+
+ /* do special per-target checks */
+ switch (target) {
+ case GL_TEXTURE_1D:
+ if (yoffset != 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(1D, yoffset = %d)", caller, yoffset);
+ return true;
+ }
+ if (height > 1) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(1D, height = %d)", caller, height);
+ return true;
+ }
+ /* fall-through */
+ case GL_TEXTURE_1D_ARRAY:
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_RECTANGLE:
+ if (zoffset != 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(zoffset = %d)", caller, zoffset);
+ return true;
+ }
+ if (depth > 1) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(depth = %d)", caller, depth);
+ return true;
+ }
+ break;
+ case GL_TEXTURE_CUBE_MAP:
+ /* Non-array cube maps are special because we have a gl_texture_image
+ * per face.
+ */
+ if (zoffset + depth > 6) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(zoffset + depth = %d)", caller, zoffset + depth);
+ return true;
+ }
+ /* check that the range of faces exist */
+ for (i = 0; i < depth; i++) {
+ GLenum face = GL_TEXTURE_CUBE_MAP_POSITIVE_X + zoffset + i;
+ if (!_mesa_select_tex_image(texObj, face, level)) {
+ /* non-existant face */
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(missing cube face)", caller);
+ return true;
+ }
+ }
+ break;
+ default:
+ ; /* nothing */
+ }
+
+ texImage = select_tex_image(texObj, target, level, zoffset);
+ if (!texImage) {
+ /* missing texture image */
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(missing image)", caller);
+ return true;
+ }
+
+ if (xoffset + width > texImage->Width) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(xoffset %d + width %d > %u)",
+ caller, xoffset, width, texImage->Width);
+ return true;
+ }
+
+ if (yoffset + height > texImage->Height) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(yoffset %d + height %d > %u)",
+ caller, yoffset, height, texImage->Height);
+ return true;
+ }
+
+ if (target != GL_TEXTURE_CUBE_MAP) {
+ /* Cube map error checking was done above */
+ if (zoffset + depth > texImage->Depth) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(zoffset %d + depth %d > %u)",
+ caller, zoffset, depth, texImage->Depth);
+ return true;
+ }
+ }
+
+ /* Extra checks for compressed textures */
+ {
+ GLuint bw, bh;
+ _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh);
+ if (bw > 1 || bh > 1) {
+ /* offset must be multiple of block size */
+ if (xoffset % bw != 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(xoffset = %d)", caller, xoffset);
+ return true;
+ }
+ if (target != GL_TEXTURE_1D && target != GL_TEXTURE_1D_ARRAY) {
+ if (yoffset % bh != 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(yoffset = %d)", caller, yoffset);
+ return true;
+ }
+ }
+
+ /* The size must be a multiple of bw x bh, or we must be using a
+ * offset+size that exactly hits the edge of the image.
+ */
+ if ((width % bw != 0) &&
+ (xoffset + width != (GLint) texImage->Width)) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(width = %d)", caller, width);
+ return true;
+ }
+
+ if ((height % bh != 0) &&
+ (yoffset + height != (GLint) texImage->Height)) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "%s(height = %d)", caller, height);
+ return true;
+ }
+ }
+ }
+
+ if (width == 0 || height == 0 || depth == 0) {
+ /* Not an error, but nothing to do. Return 'true' so that the
+ * caller simply returns.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+
+/**
+ * Do PBO-related error checking for getting uncompressed images.
+ * \return true if there was an error (or the GetTexImage is to be a no-op)
+ */
+static bool
+pbo_error_check(struct gl_context *ctx, GLenum target,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei clientMemSize,
+ GLvoid *pixels,
+ const char *caller)
+{
+ const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2;
+
+ if (!_mesa_validate_pbo_access(dimensions, &ctx->Pack, width, height, depth,
+ format, type, clientMemSize, pixels)) {
+ if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(out of bounds PBO access)", caller);
+ } else {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(out of bounds access: bufSize (%d) is too small)",
+ caller, clientMemSize);
+ }
+ return true;
+ }
+
+ if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
+ /* PBO should not be mapped */
+ if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(PBO is mapped)", caller);
+ return true;
+ }
+ }
+
+ if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
+ /* not an error, do nothing */
+ return true;
+ }
+
+ return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
getteximage_error_check(struct gl_context *ctx,
- struct gl_texture_image *texImage,
+ struct gl_texture_object *texObj,
GLenum target, GLint level,
- GLenum format, GLenum type, GLsizei clientMemSize,
- GLvoid *pixels, bool dsa)
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei bufSize,
+ GLvoid *pixels, const char *caller)
{
- const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
- const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2;
- GLenum baseFormat;
- const char *suffix = dsa ? "ture" : "";
+ struct gl_texture_image *texImage;
+ GLenum baseFormat, err;
+ GLint maxLevels;
- assert(texImage);
- assert(maxLevels != 0);
+ assert(texObj);
+
+ if (texObj->Target == 0) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+ return true;
+ }
+
+ maxLevels = _mesa_max_texture_levels(ctx, target);
if (level < 0 || level >= maxLevels) {
- _mesa_error(ctx, GL_INVALID_VALUE,
- "glGetTex%sImage(level out of range)", suffix);
- return GL_TRUE;
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
+ return true;
}
+ err = _mesa_error_check_format_and_type(ctx, format, type);
+ if (err != GL_NO_ERROR) {
+ _mesa_error(ctx, err, "%s(format/type)", caller);
+ return true;
+ }
+
+ if (dimensions_error_check(ctx, texObj, target, level,
+ xoffset, yoffset, zoffset,
+ width, height, depth, caller)) {
+ return true;
+ }
+
+ if (pbo_error_check(ctx, target, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
+ return true;
+ }
+
+ texImage = select_tex_image(texObj, target, level, zoffset);
+ assert(texImage);
+
/*
* Format and type checking has been moved up to GetnTexImage and
* GetTextureImage so that it happens before getting the texImage object.
@@ -899,494 +1197,579 @@ getteximage_error_check(struct gl_context *ctx,
if (_mesa_is_color_format(format)
&& !_mesa_is_color_format(baseFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(format mismatch)", suffix);
- return GL_TRUE;
+ "%s(format mismatch)", caller);
+ return true;
}
else if (_mesa_is_depth_format(format)
&& !_mesa_is_depth_format(baseFormat)
&& !_mesa_is_depthstencil_format(baseFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(format mismatch)", suffix);
- return GL_TRUE;
+ "%s(format mismatch)", caller);
+ return true;
}
else if (_mesa_is_stencil_format(format)
&& !ctx->Extensions.ARB_texture_stencil8) {
_mesa_error(ctx, GL_INVALID_ENUM,
- "glGetTex%sImage(format=GL_STENCIL_INDEX)", suffix);
- return GL_TRUE;
+ "%s(format=GL_STENCIL_INDEX)", caller);
+ return true;
}
else if (_mesa_is_ycbcr_format(format)
&& !_mesa_is_ycbcr_format(baseFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(format mismatch)", suffix);
- return GL_TRUE;
+ "%s(format mismatch)", caller);
+ return true;
}
else if (_mesa_is_depthstencil_format(format)
&& !_mesa_is_depthstencil_format(baseFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(format mismatch)", suffix);
- return GL_TRUE;
+ "%s(format mismatch)", caller);
+ return true;
}
- else if (!_mesa_is_stencil_format(format) && _mesa_is_enum_format_integer(format) !=
+ else if (!_mesa_is_stencil_format(format) &&
+ _mesa_is_enum_format_integer(format) !=
_mesa_is_format_integer(texImage->TexFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(format mismatch)", suffix);
- return GL_TRUE;
+ "%s(format mismatch)", caller);
+ return true;
}
- if (!_mesa_validate_pbo_access(dimensions, &ctx->Pack, texImage->Width,
- texImage->Height, texImage->Depth,
- format, type, clientMemSize, pixels)) {
- if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(out of bounds PBO access)", suffix);
- } else {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "%s(out of bounds access:"
- " bufSize (%d) is too small)",
- dsa ? "glGetTextureImage" : "glGetnTexImageARB",
- clientMemSize);
- }
- return GL_TRUE;
+ return false;
+}
+
+
+/**
+ * Return the width, height and depth of a texture image.
+ * This function must be resilient to bad parameter values since
+ * this is called before full error checking.
+ */
+static void
+get_texture_image_dims(const struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLsizei *width, GLsizei *height, GLsizei *depth)
+{
+ const struct gl_texture_image *texImage = NULL;
+
+ if (level >= 0 && level < MAX_TEXTURE_LEVELS) {
+ texImage = _mesa_select_tex_image(texObj, target, level);
}
- if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
- /* PBO should not be mapped */
- if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTex%sImage(PBO is mapped)", suffix);
- return GL_TRUE;
+ if (texImage) {
+ *width = texImage->Width;
+ *height = texImage->Height;
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ *depth = 6;
+ }
+ else {
+ *depth = texImage->Depth;
}
}
-
- return GL_FALSE;
+ else {
+ *width = *height = *depth = 0;
+ }
}
/**
- * This is the implementation for glGetnTexImageARB, glGetTextureImage,
- * and glGetTexImage.
- *
- * Requires caller to pass in texImage object because _mesa_GetTextureImage
- * must handle the GL_TEXTURE_CUBE_MAP target.
- *
- * \param target texture target.
+ * Common code for all (uncompressed) get-texture-image functions.
+ * \param texObj the texture object (should not be null)
+ * \param target user-provided target, or 0 for DSA
* \param level image level.
* \param format pixel data format for returned image.
* \param type pixel data type for returned image.
* \param bufSize size of the pixels data buffer.
* \param pixels returned pixel data.
- * \param dsa True when the caller is an ARB_direct_state_access function,
- * false otherwise
+ * \param caller name of calling function
*/
-void
-_mesa_get_texture_image(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage, GLenum target,
- GLint level, GLenum format, GLenum type,
- GLsizei bufSize, GLvoid *pixels, bool dsa)
+static void
+get_texture_image(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLenum format, GLenum type,
+ GLvoid *pixels, const char *caller)
{
- assert(texObj);
- assert(texImage);
+ struct gl_texture_image *texImage;
+ unsigned firstFace, numFaces, i;
+ GLint imageStride;
FLUSH_VERTICES(ctx, 0);
- /*
- * Legal target checking has been moved up to GetnTexImage and
- * GetTextureImage so that it can be caught before receiving a NULL
- * texImage object and exiting.
- */
-
- if (getteximage_error_check(ctx, texImage, target, level, format,
- type, bufSize, pixels, dsa)) {
- return;
- }
+ texImage = select_tex_image(texObj, target, level, zoffset);
+ assert(texImage); /* should have been error checked already */
- if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
- /* not an error, do nothing */
+ if (_mesa_is_zero_size_texture(texImage)) {
+ /* no image data to return */
return;
}
- if (_mesa_is_zero_size_texture(texImage))
- return;
-
if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) {
- _mesa_debug(ctx, "glGetTex%sImage(tex %u) format = %s, w=%d, h=%d,"
+ _mesa_debug(ctx, "%s(tex %u) format = %s, w=%d, h=%d,"
" dstFmt=0x%x, dstType=0x%x\n",
- dsa ? "ture": "",
- texObj->Name,
+ caller, texObj->Name,
_mesa_get_format_name(texImage->TexFormat),
texImage->Width, texImage->Height,
format, type);
}
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ /* Compute stride between cube faces */
+ imageStride = _mesa_image_image_stride(&ctx->Pack, width, height,
+ format, type);
+ firstFace = zoffset;
+ numFaces = depth;
+ zoffset = 0;
+ depth = 1;
+ }
+ else {
+ imageStride = 0;
+ firstFace = _mesa_tex_target_to_face(target);
+ numFaces = 1;
+ }
+
_mesa_lock_texture(ctx, texObj);
- {
- ctx->Driver.GetTexImage(ctx, format, type, pixels, texImage);
+
+ for (i = 0; i < numFaces; i++) {
+ texImage = texObj->Image[firstFace + i][level];
+ assert(texImage);
+
+ ctx->Driver.GetTexSubImage(ctx, xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels, texImage);
+
+ /* next cube face */
+ pixels = (GLubyte *) pixels + imageStride;
}
+
_mesa_unlock_texture(ctx, texObj);
}
-/**
- * Get texture image. Called by glGetTexImage.
- *
- * \param target texture target.
- * \param level image level.
- * \param format pixel data format for returned image.
- * \param type pixel data type for returned image.
- * \param bufSize size of the pixels data buffer.
- * \param pixels returned pixel data.
- */
+
void GLAPIENTRY
-_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format,
- GLenum type, GLsizei bufSize, GLvoid *pixels)
+_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type,
+ GLsizei bufSize, GLvoid *pixels)
{
- struct gl_texture_object *texObj;
- struct gl_texture_image *texImage;
- GLenum err;
GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetnTexImageARB";
+ GLsizei width, height, depth;
+ struct gl_texture_object *texObj;
- /*
- * This has been moved here because a format/type mismatch can cause a NULL
- * texImage object, which in turn causes the mismatch error to be
- * ignored.
- */
- err = _mesa_error_check_format_and_type(ctx, format, type);
- if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err, "glGetnTexImage(format/type)");
- return;
- }
-
- /*
- * Legal target checking has been moved here to prevent exiting with a NULL
- * texImage object.
- */
if (!legal_getteximage_target(ctx, target, false)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetnTexImage(target=0x%x)",
- target);
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
return;
}
texObj = _mesa_get_current_tex_object(ctx, target);
- if (!texObj)
- return;
+ assert(texObj);
+
+ get_texture_image_dims(texObj, target, level, &width, &height, &depth);
- texImage = _mesa_select_tex_image(texObj, target, level);
- if (!texImage)
+ if (getteximage_error_check(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
return;
+ }
- _mesa_get_texture_image(ctx, texObj, texImage, target, level, format, type,
- bufSize, pixels, false);
+ get_texture_image(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ format, type, pixels, caller);
}
void GLAPIENTRY
-_mesa_GetTexImage( GLenum target, GLint level, GLenum format,
- GLenum type, GLvoid *pixels )
+_mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type,
+ GLvoid *pixels )
{
- _mesa_GetnTexImageARB(target, level, format, type, INT_MAX, pixels);
+ GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetTexImage";
+ GLsizei width, height, depth;
+ struct gl_texture_object *texObj;
+
+ if (!legal_getteximage_target(ctx, target, false)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
+ return;
+ }
+
+ texObj = _mesa_get_current_tex_object(ctx, target);
+ assert(texObj);
+
+ get_texture_image_dims(texObj, target, level, &width, &height, &depth);
+
+ if (getteximage_error_check(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ format, type, INT_MAX, pixels, caller)) {
+ return;
+ }
+
+ get_texture_image(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ format, type, pixels, caller);
}
-/**
- * Get texture image.
- *
- * \param texture texture name.
- * \param level image level.
- * \param format pixel data format for returned image.
- * \param type pixel data type for returned image.
- * \param bufSize size of the pixels data buffer.
- * \param pixels returned pixel data.
- */
+
void GLAPIENTRY
-_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format,
- GLenum type, GLsizei bufSize, GLvoid *pixels)
+_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type,
+ GLsizei bufSize, GLvoid *pixels)
{
- struct gl_texture_object *texObj;
- struct gl_texture_image *texImage;
- int i;
- GLint image_stride;
- GLenum err;
GET_CURRENT_CONTEXT(ctx);
+ GLsizei width, height, depth;
+ static const char *caller = "glGetTextureImage";
+ struct gl_texture_object *texObj =
+ _mesa_lookup_texture_err(ctx, texture, caller);
- /*
- * This has been moved here because a format/type mismatch can cause a NULL
- * texImage object, which in turn causes the mismatch error to be
- * ignored.
- */
- err = _mesa_error_check_format_and_type(ctx, format, type);
- if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err, "glGetTextureImage(format/type)");
+ if (!texObj) {
return;
}
- texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureImage");
- if (!texObj)
- return;
+ get_texture_image_dims(texObj, texObj->Target, level,
+ &width, &height, &depth);
- /*
- * Legal target checking has been moved here to prevent exiting with a NULL
- * texImage object.
- */
- if (!legal_getteximage_target(ctx, texObj->Target, true)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetTextureImage(target=%s)",
- _mesa_lookup_enum_by_nr(texObj->Target));
+ if (getteximage_error_check(ctx, texObj, texObj->Target, level,
+ 0, 0, 0, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
return;
}
- /* Must handle special case GL_TEXTURE_CUBE_MAP. */
- if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
-
- /* Make sure the texture object is a proper cube.
- * (See texturesubimage in teximage.c for details on why this check is
- * performed.)
- */
- if (!_mesa_cube_level_complete(texObj, level)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetTextureImage(cube map incomplete)");
- return;
- }
+ get_texture_image(ctx, texObj, texObj->Target, level,
+ 0, 0, 0, width, height, depth,
+ format, type, pixels, caller);
+}
- /* Copy each face. */
- for (i = 0; i < 6; ++i) {
- texImage = texObj->Image[i][level];
- assert(texImage);
- _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level,
- format, type, bufSize, pixels, true);
+void GLAPIENTRY
+_mesa_GetTextureSubImage(GLuint texture, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei bufSize,
+ void *pixels)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetTextureSubImage";
+ struct gl_texture_object *texObj =
+ _mesa_lookup_texture_err(ctx, texture, caller);
- image_stride = _mesa_image_image_stride(&ctx->Pack, texImage->Width,
- texImage->Height, format,
- type);
- pixels = (GLubyte *) pixels + image_stride;
- bufSize -= image_stride;
- }
+ if (!texObj) {
+ return;
}
- else {
- texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
- if (!texImage)
- return;
- _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level,
- format, type, bufSize, pixels, true);
+ if (getteximage_error_check(ctx, texObj, texObj->Target, level,
+ xoffset, yoffset, zoffset, width, height, depth,
+ format, type, bufSize, pixels, caller)) {
+ return;
}
+
+ get_texture_image(ctx, texObj, texObj->Target, level,
+ xoffset, yoffset, zoffset, width, height, depth,
+ format, type, pixels, caller);
}
+
+
/**
- * Do error checking for a glGetCompressedTexImage() call.
- * \return GL_TRUE if any error, GL_FALSE if no errors.
+ * Compute the number of bytes which will be written when retrieving
+ * a sub-region of a compressed texture.
*/
-static GLboolean
+static GLsizei
+packed_compressed_size(GLuint dimensions, mesa_format format,
+ GLsizei width, GLsizei height, GLsizei depth,
+ const struct gl_pixelstore_attrib *packing)
+{
+ struct compressed_pixelstore st;
+ GLsizei totalBytes;
+
+ _mesa_compute_compressed_pixelstore(dimensions, format,
+ width, height, depth,
+ packing, &st);
+ totalBytes =
+ (st.CopySlices - 1) * st.TotalRowsPerSlice * st.TotalBytesPerRow +
+ st.SkipBytes +
+ (st.CopyRowsPerSlice - 1) * st.TotalBytesPerRow +
+ st.CopyBytesPerRow;
+
+ return totalBytes;
+}
+
+
+/**
+ * Do error checking for getting compressed texture images.
+ * \return true if any error, false if no errors.
+ */
+static bool
getcompressedteximage_error_check(struct gl_context *ctx,
- struct gl_texture_image *texImage,
- GLenum target,
- GLint level, GLsizei clientMemSize,
- GLvoid *img, bool dsa)
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLsizei bufSize, GLvoid *pixels,
+ const char *caller)
{
- const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
- GLuint compressedSize, dimensions;
- const char *suffix = dsa ? "ture" : "";
+ struct gl_texture_image *texImage;
+ GLint maxLevels;
+ GLsizei totalBytes;
+ GLuint dimensions;
- assert(texImage);
+ assert(texObj);
- if (!legal_getteximage_target(ctx, target, dsa)) {
- _mesa_error(ctx, GL_INVALID_ENUM,
- "glGetCompressedTex%sImage(target=%s)", suffix,
- _mesa_lookup_enum_by_nr(target));
- return GL_TRUE;
+ if (texObj->Target == 0) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+ return true;
}
- assert(maxLevels != 0);
+ maxLevels = _mesa_max_texture_levels(ctx, target);
if (level < 0 || level >= maxLevels) {
_mesa_error(ctx, GL_INVALID_VALUE,
- "glGetCompressedTex%sImage(bad level = %d)", suffix, level);
- return GL_TRUE;
+ "%s(bad level = %d)", caller, level);
+ return true;
+ }
+
+ if (dimensions_error_check(ctx, texObj, target, level,
+ xoffset, yoffset, zoffset,
+ width, height, depth, caller)) {
+ return true;
}
+ texImage = select_tex_image(texObj, target, level, zoffset);
+ assert(texImage);
+
if (!_mesa_is_format_compressed(texImage->TexFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetCompressedTex%sImage(texture is not compressed)",
- suffix);
- return GL_TRUE;
+ "%s(texture is not compressed)", caller);
+ return true;
}
- compressedSize = _mesa_format_image_size(texImage->TexFormat,
- texImage->Width,
- texImage->Height,
- texImage->Depth);
-
/* Check for invalid pixel storage modes */
- dimensions = _mesa_get_texture_dimensions(texImage->TexObject->Target);
+ dimensions = _mesa_get_texture_dimensions(texObj->Target);
if (!_mesa_compressed_pixel_storage_error_check(ctx, dimensions,
- &ctx->Pack, dsa ?
- "glGetCompressedTextureImage":
- "glGetCompressedTexImage")) {
- return GL_TRUE;
+ &ctx->Pack,
+ caller)) {
+ return true;
}
- if (!_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
- /* do bounds checking on writing to client memory */
- if (clientMemSize < (GLsizei) compressedSize) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "%s(out of bounds access: bufSize (%d) is too small)",
- dsa ? "glGetCompressedTextureImage" :
- "glGetnCompressedTexImageARB", clientMemSize);
- return GL_TRUE;
- }
- } else {
+ /* Compute number of bytes that may be touched in the dest buffer */
+ totalBytes = packed_compressed_size(dimensions, texImage->TexFormat,
+ width, height, depth,
+ &ctx->Pack);
+
+ /* Do dest buffer bounds checking */
+ if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
/* do bounds checking on PBO write */
- if ((const GLubyte *) img + compressedSize >
- (const GLubyte *) ctx->Pack.BufferObj->Size) {
+ if ((GLubyte *) pixels + totalBytes >
+ (GLubyte *) ctx->Pack.BufferObj->Size) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetCompressedTex%sImage(out of bounds PBO access)",
- suffix);
- return GL_TRUE;
+ "%s(out of bounds PBO access)", caller);
+ return true;
}
/* make sure PBO is not mapped */
if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", caller);
+ return true;
+ }
+ }
+ else {
+ /* do bounds checking on writing to client memory */
+ if (totalBytes > bufSize) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetCompressedTex%sImage(PBO is mapped)", suffix);
- return GL_TRUE;
+ "%s(out of bounds access: bufSize (%d) is too small)",
+ caller, bufSize);
+ return true;
}
}
- return GL_FALSE;
+ if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
+ /* not an error, but do nothing */
+ return true;
+ }
+
+ return false;
}
-/** Implements glGetnCompressedTexImageARB, glGetCompressedTexImage, and
- * glGetCompressedTextureImage.
- *
- * texImage must be passed in because glGetCompressedTexImage must handle the
- * target GL_TEXTURE_CUBE_MAP.
+
+/**
+ * Common helper for all glGetCompressed-teximage functions.
*/
-void
-_mesa_get_compressed_texture_image(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage,
- GLenum target, GLint level,
- GLsizei bufSize, GLvoid *pixels,
- bool dsa)
+static void
+get_compressed_texture_image(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLvoid *pixels,
+ const char *caller)
{
- assert(texObj);
- assert(texImage);
+ struct gl_texture_image *texImage;
+ unsigned firstFace, numFaces, i, imageStride;
FLUSH_VERTICES(ctx, 0);
- if (getcompressedteximage_error_check(ctx, texImage, target, level,
- bufSize, pixels, dsa)) {
- return;
- }
-
- if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
- /* not an error, do nothing */
- return;
- }
+ texImage = select_tex_image(texObj, target, level, zoffset);
+ assert(texImage); /* should have been error checked already */
if (_mesa_is_zero_size_texture(texImage))
return;
if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) {
_mesa_debug(ctx,
- "glGetCompressedTex%sImage(tex %u) format = %s, w=%d, h=%d\n",
- dsa ? "ture" : "", texObj->Name,
+ "%s(tex %u) format = %s, w=%d, h=%d\n",
+ caller, texObj->Name,
_mesa_get_format_name(texImage->TexFormat),
texImage->Width, texImage->Height);
}
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ struct compressed_pixelstore store;
+
+ /* Compute image stride between cube faces */
+ _mesa_compute_compressed_pixelstore(2, texImage->TexFormat,
+ width, height, depth,
+ &ctx->Pack, &store);
+ imageStride = store.TotalBytesPerRow * store.TotalRowsPerSlice;
+
+ firstFace = zoffset;
+ numFaces = depth;
+ zoffset = 0;
+ depth = 1;
+ }
+ else {
+ imageStride = 0;
+ firstFace = _mesa_tex_target_to_face(target);
+ numFaces = 1;
+ }
+
_mesa_lock_texture(ctx, texObj);
- {
- ctx->Driver.GetCompressedTexImage(ctx, texImage, pixels);
+
+ for (i = 0; i < numFaces; i++) {
+ texImage = texObj->Image[firstFace + i][level];
+ assert(texImage);
+
+ ctx->Driver.GetCompressedTexSubImage(ctx, texImage,
+ xoffset, yoffset, zoffset,
+ width, height, depth, pixels);
+
+ /* next cube face */
+ pixels = (GLubyte *) pixels + imageStride;
}
+
_mesa_unlock_texture(ctx, texObj);
}
+
void GLAPIENTRY
_mesa_GetnCompressedTexImageARB(GLenum target, GLint level, GLsizei bufSize,
- GLvoid *img)
+ GLvoid *pixels)
{
- struct gl_texture_object *texObj;
- struct gl_texture_image *texImage;
GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetnCompressedTexImageARB";
+ GLsizei width, height, depth;
+ struct gl_texture_object *texObj;
- texObj = _mesa_get_current_tex_object(ctx, target);
- if (!texObj)
+ if (!legal_getteximage_target(ctx, target, false)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
return;
+ }
- texImage = _mesa_select_tex_image(texObj, target, level);
- if (!texImage)
+ texObj = _mesa_get_current_tex_object(ctx, target);
+ assert(texObj);
+
+ get_texture_image_dims(texObj, target, level, &width, &height, &depth);
+
+ if (getcompressedteximage_error_check(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ INT_MAX, pixels, caller)) {
return;
+ }
- _mesa_get_compressed_texture_image(ctx, texObj, texImage, target, level,
- bufSize, img, false);
+ get_compressed_texture_image(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ pixels, caller);
}
+
void GLAPIENTRY
-_mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *img)
+_mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *pixels)
{
- _mesa_GetnCompressedTexImageARB(target, level, INT_MAX, img);
+ GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetCompressedTexImage";
+ GLsizei width, height, depth;
+ struct gl_texture_object *texObj;
+
+ if (!legal_getteximage_target(ctx, target, false)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s", caller);
+ return;
+ }
+
+ texObj = _mesa_get_current_tex_object(ctx, target);
+ assert(texObj);
+
+ get_texture_image_dims(texObj, target, level,
+ &width, &height, &depth);
+
+ if (getcompressedteximage_error_check(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ INT_MAX, pixels, caller)) {
+ return;
+ }
+
+ get_compressed_texture_image(ctx, texObj, target, level,
+ 0, 0, 0, width, height, depth,
+ pixels, caller);
}
-/**
- * Get compressed texture image.
- *
- * \param texture texture name.
- * \param level image level.
- * \param bufSize size of the pixels data buffer.
- * \param pixels returned pixel data.
- */
+
void GLAPIENTRY
_mesa_GetCompressedTextureImage(GLuint texture, GLint level,
GLsizei bufSize, GLvoid *pixels)
{
- struct gl_texture_object *texObj;
- struct gl_texture_image *texImage;
- int i;
- GLint image_stride;
GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetCompressedTextureImage";
+ GLsizei width, height, depth;
+ struct gl_texture_object *texObj =
+ _mesa_lookup_texture_err(ctx, texture, caller);
- texObj = _mesa_lookup_texture_err(ctx, texture,
- "glGetCompressedTextureImage");
- if (!texObj)
+ if (!texObj) {
return;
+ }
- /* Must handle special case GL_TEXTURE_CUBE_MAP. */
- if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+ get_texture_image_dims(texObj, texObj->Target, level,
+ &width, &height, &depth);
- /* Make sure the texture object is a proper cube.
- * (See texturesubimage in teximage.c for details on why this check is
- * performed.)
- */
- if (!_mesa_cube_level_complete(texObj, level)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetCompressedTextureImage(cube map incomplete)");
- return;
- }
+ if (getcompressedteximage_error_check(ctx, texObj, texObj->Target, level,
+ 0, 0, 0, width, height, depth,
+ bufSize, pixels, caller)) {
+ return;
+ }
- /* Copy each face. */
- for (i = 0; i < 6; ++i) {
- texImage = texObj->Image[i][level];
- assert(texImage);
+ get_compressed_texture_image(ctx, texObj, texObj->Target, level,
+ 0, 0, 0, width, height, depth,
+ pixels, caller);
+}
- _mesa_get_compressed_texture_image(ctx, texObj, texImage,
- texObj->Target, level,
- bufSize, pixels, true);
- /* Compressed images don't have a client format */
- image_stride = _mesa_format_image_size(texImage->TexFormat,
- texImage->Width,
- texImage->Height, 1);
+void APIENTRY
+_mesa_GetCompressedTextureSubImage(GLuint texture, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLint zoffset, GLsizei width,
+ GLsizei height, GLsizei depth,
+ GLsizei bufSize, void *pixels)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ static const char *caller = "glGetCompressedTextureImage";
+ struct gl_texture_object *texObj;
- pixels = (GLubyte *) pixels + image_stride;
- bufSize -= image_stride;
- }
+ texObj = _mesa_lookup_texture_err(ctx, texture, caller);
+ if (!texObj) {
+ return;
}
- else {
- texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
- if (!texImage)
- return;
- _mesa_get_compressed_texture_image(ctx, texObj, texImage,
- texObj->Target, level, bufSize,
- pixels, true);
+ if (getcompressedteximage_error_check(ctx, texObj, texObj->Target, level,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ bufSize, pixels, caller)) {
+ return;
}
+
+ get_compressed_texture_image(ctx, texObj, texObj->Target, level,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ pixels, caller);
}
diff --git a/src/mesa/main/texgetimage.h b/src/mesa/main/texgetimage.h
index 1fa2f59dcdc..63c75eb931d 100644
--- a/src/mesa/main/texgetimage.h
+++ b/src/mesa/main/texgetimage.h
@@ -37,22 +37,19 @@ extern GLenum
_mesa_base_pack_format(GLenum format);
extern void
-_mesa_GetTexImage_sw(struct gl_context *ctx,
- GLenum format, GLenum type, GLvoid *pixels,
- struct gl_texture_image *texImage);
-
-
-extern void
-_mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
- struct gl_texture_image *texImage,
- GLvoid *data);
+_mesa_GetTexSubImage_sw(struct gl_context *ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLenum format, GLenum type, GLvoid *pixels,
+ struct gl_texture_image *texImage);
extern void
-_mesa_get_texture_image(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage, GLenum target,
- GLint level, GLenum format, GLenum type,
- GLsizei bufSize, GLvoid *pixels, bool dsa);
+_mesa_GetCompressedTexSubImage_sw(struct gl_context *ctx,
+ struct gl_texture_image *texImage,
+ GLint xoffset, GLint yoffset,
+ GLint zoffset, GLsizei width,
+ GLint height, GLint depth,
+ GLvoid *data);
extern void
_mesa_get_compressed_texture_image( struct gl_context *ctx,
@@ -74,6 +71,14 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format,
GLenum type, GLsizei bufSize, GLvoid *pixels);
extern void GLAPIENTRY
+_mesa_GetTextureSubImage(GLuint texture, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLenum format, GLenum type, GLsizei bufSize,
+ void *pixels);
+
+
+extern void GLAPIENTRY
_mesa_GetCompressedTexImage(GLenum target, GLint lod, GLvoid *img);
extern void GLAPIENTRY
@@ -84,4 +89,11 @@ extern void GLAPIENTRY
_mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLsizei bufSize,
GLvoid *pixels);
+extern void APIENTRY
+_mesa_GetCompressedTextureSubImage(GLuint texture, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLint zoffset, GLsizei width,
+ GLsizei height, GLsizei depth,
+ GLsizei bufSize, void *pixels);
+
#endif /* TEXGETIMAGE_H */
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 3d85615fa45..3a556a6ad6e 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1008,7 +1008,7 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target)
case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
- return _mesa_is_desktop_gl(ctx)
+ return (_mesa_is_desktop_gl(ctx) || _mesa_is_gles31(ctx))
&& ctx->Extensions.ARB_texture_multisample
? 1 : 0;
case GL_TEXTURE_EXTERNAL_OES:
@@ -1793,8 +1793,6 @@ GLboolean
_mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
GLenum intFormat)
{
- (void) intFormat; /* not used yet */
-
switch (target) {
case GL_TEXTURE_2D:
case GL_PROXY_TEXTURE_2D:
@@ -1814,6 +1812,16 @@ _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_CUBE_MAP_ARRAY:
return ctx->Extensions.ARB_texture_cube_map_array;
+ case GL_TEXTURE_3D:
+ switch (intFormat) {
+ case GL_COMPRESSED_RGBA_BPTC_UNORM:
+ case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+ case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+ case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+ return ctx->Extensions.ARB_texture_compression_bptc;
+ default:
+ return GL_FALSE;
+ }
default:
return GL_FALSE;
}
@@ -2081,6 +2089,53 @@ texture_formats_agree(GLenum internalFormat,
}
/**
+ * Test the combination of format, type and internal format arguments of
+ * different texture operations on GLES.
+ *
+ * \param ctx GL context.
+ * \param format pixel data format given by the user.
+ * \param type pixel data type given by the user.
+ * \param internalFormat internal format given by the user.
+ * \param dimensions texture image dimensions (must be 1, 2 or 3).
+ * \param callerName name of the caller function to print in the error message
+ *
+ * \return true if a error is found, false otherwise
+ *
+ * Currently, it is used by texture_error_check() and texsubimage_error_check().
+ */
+static bool
+texture_format_error_check_gles(struct gl_context *ctx, GLenum format,
+ GLenum type, GLenum internalFormat,
+ GLuint dimensions, const char *callerName)
+{
+ GLenum err;
+
+ if (_mesa_is_gles3(ctx)) {
+ err = _mesa_es3_error_check_format_and_type(ctx, format, type,
+ internalFormat);
+ if (err != GL_NO_ERROR) {
+ _mesa_error(ctx, err,
+ "%s(format = %s, type = %s, internalformat = %s)",
+ callerName, _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type),
+ _mesa_enum_to_string(internalFormat));
+ return true;
+ }
+ }
+ else {
+ err = _mesa_es_error_check_format_and_type(format, type, dimensions);
+ if (err != GL_NO_ERROR) {
+ _mesa_error(ctx, err, "%s(format = %s, type = %s)",
+ callerName, _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
* Test the glTexImage[123]D() parameters for errors.
*
* \param ctx GL context.
@@ -2151,39 +2206,17 @@ texture_error_check( struct gl_context *ctx,
* Formats and types that require additional extensions (e.g., GL_FLOAT
* requires GL_OES_texture_float) are filtered elsewhere.
*/
-
- if (_mesa_is_gles(ctx)) {
- if (_mesa_is_gles3(ctx)) {
- err = _mesa_es3_error_check_format_and_type(ctx, format, type,
- internalFormat);
- } else {
- if (format != internalFormat) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glTexImage%dD(format = %s, internalFormat = %s)",
- dimensions,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(internalFormat));
- return GL_TRUE;
- }
-
- err = _mesa_es_error_check_format_and_type(format, type, dimensions);
- }
- if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err,
- "glTexImage%dD(format = %s, type = %s, internalFormat = %s)",
- dimensions,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type),
- _mesa_lookup_enum_by_nr(internalFormat));
- return GL_TRUE;
- }
+ if (_mesa_is_gles(ctx) &&
+ texture_format_error_check_gles(ctx, format, type, internalFormat,
+ dimensions, "glTexImage%dD")) {
+ return GL_TRUE;
}
/* Check internalFormat */
if (_mesa_base_tex_format(ctx, internalFormat) < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glTexImage%dD(internalFormat=%s)",
- dimensions, _mesa_lookup_enum_by_nr(internalFormat));
+ dimensions, _mesa_enum_to_string(internalFormat));
return GL_TRUE;
}
@@ -2192,8 +2225,8 @@ texture_error_check( struct gl_context *ctx,
if (err != GL_NO_ERROR) {
_mesa_error(ctx, err,
"glTexImage%dD(incompatible format = %s, type = %s)",
- dimensions, _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
+ dimensions, _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
return GL_TRUE;
}
@@ -2208,8 +2241,8 @@ texture_error_check( struct gl_context *ctx,
if (!texture_formats_agree(internalFormat, format)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glTexImage%dD(incompatible internalFormat = %s, format = %s)",
- dimensions, _mesa_lookup_enum_by_nr(internalFormat),
- _mesa_lookup_enum_by_nr(format));
+ dimensions, _mesa_enum_to_string(internalFormat),
+ _mesa_enum_to_string(format));
return GL_TRUE;
}
@@ -2324,7 +2357,7 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions,
if (!_mesa_is_compressed_format(ctx, internalFormat)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glCompressedTexImage%dD(internalFormat=%s)",
- dimensions, _mesa_lookup_enum_by_nr(internalFormat));
+ dimensions, _mesa_enum_to_string(internalFormat));
return GL_TRUE;
}
@@ -2479,40 +2512,38 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
return GL_TRUE;
}
- /* check target (proxies not allowed) */
- if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)",
- callerName, _mesa_lookup_enum_by_nr(target));
- return GL_TRUE;
- }
-
/* level check */
if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s(level=%d)", callerName, level);
return GL_TRUE;
}
- /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
- * combinations of format and type that can be used. Formats and types
- * that require additional extensions (e.g., GL_FLOAT requires
- * GL_OES_texture_float) are filtered elsewhere.
- */
- if (_mesa_is_gles(ctx) && !_mesa_is_gles3(ctx)) {
- err = _mesa_es_error_check_format_and_type(format, type, dimensions);
- if (err != GL_NO_ERROR) {
- _mesa_error(ctx, err, "%s(format = %s, type = %s)",
- callerName, _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
- return GL_TRUE;
- }
+ texImage = _mesa_select_tex_image(texObj, target, level);
+ if (!texImage) {
+ /* non-existant texture level */
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)",
+ callerName);
+ return GL_TRUE;
}
err = _mesa_error_check_format_and_type(ctx, format, type);
if (err != GL_NO_ERROR) {
_mesa_error(ctx, err,
"%s(incompatible format = %s, type = %s)",
- callerName, _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
+ callerName, _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
+ return GL_TRUE;
+ }
+
+ /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
+ * combinations of format, internalFormat, and type that can be used.
+ * Formats and types that require additional extensions (e.g., GL_FLOAT
+ * requires GL_OES_texture_float) are filtered elsewhere.
+ */
+ if (_mesa_is_gles(ctx) &&
+ texture_format_error_check_gles(ctx, format, type,
+ texImage->InternalFormat,
+ dimensions, callerName)) {
return GL_TRUE;
}
@@ -2523,14 +2554,6 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
return GL_TRUE;
}
- texImage = _mesa_select_tex_image(texObj, target, level);
- if (!texImage) {
- /* non-existant texture level */
- _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture image)",
- callerName);
- return GL_TRUE;
- }
-
if (error_check_subtexture_dimensions(ctx, dimensions,
texImage, xoffset, yoffset, zoffset,
width, height, depth, callerName)) {
@@ -2590,7 +2613,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
/* check target */
if (!legal_texsubimage_target(ctx, dimensions, target, false)) {
_mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%uD(target=%s)",
- dimensions, _mesa_lookup_enum_by_nr(target));
+ dimensions, _mesa_enum_to_string(target));
return GL_TRUE;
}
@@ -2629,13 +2652,6 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
return GL_TRUE;
}
- rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
- if (rb == NULL) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glCopyTexImage%dD(read buffer)", dimensions);
- return GL_TRUE;
- }
-
/* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
* internalFormat.
*/
@@ -2648,18 +2664,25 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
case GL_LUMINANCE_ALPHA:
break;
default:
- _mesa_error(ctx, GL_INVALID_VALUE,
+ _mesa_error(ctx, GL_INVALID_ENUM,
"glCopyTexImage%dD(internalFormat=%s)", dimensions,
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(internalFormat));
return GL_TRUE;
}
}
baseFormat = _mesa_base_tex_format(ctx, internalFormat);
if (baseFormat < 0) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, GL_INVALID_ENUM,
"glCopyTexImage%dD(internalFormat=%s)", dimensions,
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(internalFormat));
+ return GL_TRUE;
+ }
+
+ rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
+ if (rb == NULL) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glCopyTexImage%dD(read buffer)", dimensions);
return GL_TRUE;
}
@@ -2669,7 +2692,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
if (rb_base_format < 0) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glCopyTexImage%dD(internalFormat=%s)", dimensions,
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(internalFormat));
return GL_TRUE;
}
}
@@ -2696,7 +2719,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
if (!valid) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glCopyTexImage%dD(internalFormat=%s)", dimensions,
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(internalFormat));
return GL_TRUE;
}
}
@@ -2735,10 +2758,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
* types for SNORM formats. Also, conversion to SNORM formats is not
* allowed by Table 3.2 on Page 110.
*/
- if(_mesa_is_enum_format_snorm(internalFormat)) {
+ if (_mesa_is_enum_format_snorm(internalFormat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glCopyTexImage%dD(internalFormat=%s)", dimensions,
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(internalFormat));
return GL_TRUE;
}
}
@@ -3103,8 +3126,8 @@ _mesa_choose_texture_format(struct gl_context *ctx,
"DXT compression requested (%s), "
"but libtxc_dxtn library not installed. Using %s "
"instead.",
- _mesa_lookup_enum_by_nr(before),
- _mesa_lookup_enum_by_nr(internalFormat));
+ _mesa_enum_to_string(before),
+ _mesa_enum_to_string(internalFormat));
}
}
@@ -3191,18 +3214,18 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims,
_mesa_debug(ctx,
"glCompressedTexImage%uD %s %d %s %d %d %d %d %p\n",
dims,
- _mesa_lookup_enum_by_nr(target), level,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(target), level,
+ _mesa_enum_to_string(internalFormat),
width, height, depth, border, pixels);
else
_mesa_debug(ctx,
"glTexImage%uD %s %d %s %d %d %d %d %s %s %p\n",
dims,
- _mesa_lookup_enum_by_nr(target), level,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(target), level,
+ _mesa_enum_to_string(internalFormat),
width, height, depth, border,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type), pixels);
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type), pixels);
}
internalFormat = override_internal_format(internalFormat, width, height);
@@ -3210,7 +3233,7 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims,
/* target error checking */
if (!legal_teximage_target(ctx, dims, target)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s%uD(target=%s)",
- func, dims, _mesa_lookup_enum_by_nr(target));
+ func, dims, _mesa_enum_to_string(target));
return;
}
@@ -3313,16 +3336,16 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims,
if (!dimensionsOK) {
_mesa_error(ctx, GL_INVALID_VALUE,
- "glTexImage%uD(invalid width or height or depth)",
- dims);
+ "%s%uD(invalid width or height or depth)",
+ func, dims);
return;
}
if (!sizeOK) {
_mesa_error(ctx, GL_OUT_OF_MEMORY,
- "glTexImage%uD(image too large: %d x %d x %d, %s format)",
- dims, width, height, depth,
- _mesa_lookup_enum_by_nr(internalFormat));
+ "%s%uD(image too large: %d x %d x %d, %s format)",
+ func, dims, width, height, depth,
+ _mesa_enum_to_string(internalFormat));
return;
}
@@ -3495,7 +3518,6 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
_mesa_dirty_texobj(ctx, texObj);
}
_mesa_unlock_texture(ctx, texObj);
-
}
@@ -3515,14 +3537,6 @@ _mesa_texture_sub_image(struct gl_context *ctx, GLuint dims,
{
FLUSH_VERTICES(ctx, 0);
- /* check target (proxies not allowed) */
- if (!legal_texsubimage_target(ctx, dims, target, dsa)) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)",
- dsa ? "ture" : "",
- dims, _mesa_lookup_enum_by_nr(target));
- return;
- }
-
if (ctx->NewState & _NEW_PIXEL)
_mesa_update_state(ctx);
@@ -3572,6 +3586,13 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
+ /* check target (proxies not allowed) */
+ if (!legal_texsubimage_target(ctx, dims, target, false)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)",
+ dims, _mesa_enum_to_string(target));
+ return;
+ }
+
texObj = _mesa_get_current_tex_object(ctx, target);
if (!texObj)
return;
@@ -3589,10 +3610,10 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glTexSubImage%uD %s %d %d %d %d %d %d %d %s %s %p\n",
dims,
- _mesa_lookup_enum_by_nr(target), level,
+ _mesa_enum_to_string(target), level,
xoffset, yoffset, zoffset, width, height, depth,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type), pixels);
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type), pixels);
_mesa_texture_sub_image(ctx, dims, texObj, texImage, target, level,
xoffset, yoffset, zoffset, width, height, depth,
@@ -3621,8 +3642,8 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
"glTextureSubImage%uD %d %d %d %d %d %d %d %d %s %s %p\n",
dims, texture, level,
xoffset, yoffset, zoffset, width, height, depth,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type), pixels);
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type), pixels);
/* Get the texture object by Name. */
texObj = _mesa_lookup_texture(ctx, texture);
@@ -3632,6 +3653,13 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
return;
}
+ /* check target (proxies not allowed) */
+ if (!legal_texsubimage_target(ctx, dims, texObj->Target, true)) {
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)",
+ callerName, _mesa_enum_to_string(texObj->Target));
+ return;
+ }
+
if (texsubimage_error_check(ctx, dims, texObj, texObj->Target, level,
xoffset, yoffset, zoffset,
width, height, depth, format, type,
@@ -3842,8 +3870,7 @@ copytexsubimage_by_slice(struct gl_context *ctx,
}
static GLboolean
-formats_differ_in_component_sizes (mesa_format f1,
- mesa_format f2)
+formats_differ_in_component_sizes(mesa_format f1, mesa_format f2)
{
GLint f1_r_bits = _mesa_get_format_bits(f1, GL_RED_BITS);
GLint f1_g_bits = _mesa_get_format_bits(f1, GL_GREEN_BITS);
@@ -3883,8 +3910,8 @@ copyteximage(struct gl_context *ctx, GLuint dims,
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glCopyTexImage%uD %s %d %s %d %d %d %d %d\n",
dims,
- _mesa_lookup_enum_by_nr(target), level,
- _mesa_lookup_enum_by_nr(internalFormat),
+ _mesa_enum_to_string(target), level,
+ _mesa_enum_to_string(internalFormat),
x, y, width, height, border);
if (ctx->NewState & NEW_COPY_TEX_STATE)
@@ -3916,8 +3943,8 @@ copyteximage(struct gl_context *ctx, GLuint dims,
*/
if (rb->InternalFormat == GL_RGB10_A2) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glCopyTexImage%uD(Reading from GL_RGB10_A2 buffer and"
- " writing to unsized internal format)", dims);
+ "glCopyTexImage%uD(Reading from GL_RGB10_A2 buffer"
+ " and writing to unsized internal format)", dims);
return;
}
}
@@ -4043,7 +4070,7 @@ _mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims,
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "%s %s %d %d %d %d %d %d %d %d\n", caller,
- _mesa_lookup_enum_by_nr(target),
+ _mesa_enum_to_string(target),
level, xoffset, yoffset, zoffset, x, y, width, height);
if (ctx->NewState & NEW_COPY_TEX_STATE)
@@ -4105,7 +4132,7 @@ _mesa_CopyTexSubImage1D( GLenum target, GLint level,
*/
if (!legal_texsubimage_target(ctx, 1, target, false)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4133,7 +4160,7 @@ _mesa_CopyTexSubImage2D( GLenum target, GLint level,
*/
if (!legal_texsubimage_target(ctx, 2, target, false)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4162,7 +4189,7 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level,
*/
if (!legal_texsubimage_target(ctx, 3, target, false)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
@@ -4190,7 +4217,7 @@ _mesa_CopyTextureSubImage1D(GLuint texture, GLint level,
/* Check target (proxies not allowed). */
if (!legal_texsubimage_target(ctx, 1, texObj->Target, true)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
- _mesa_lookup_enum_by_nr(texObj->Target));
+ _mesa_enum_to_string(texObj->Target));
return;
}
@@ -4214,7 +4241,7 @@ _mesa_CopyTextureSubImage2D(GLuint texture, GLint level,
/* Check target (proxies not allowed). */
if (!legal_texsubimage_target(ctx, 2, texObj->Target, true)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
- _mesa_lookup_enum_by_nr(texObj->Target));
+ _mesa_enum_to_string(texObj->Target));
return;
}
@@ -4241,7 +4268,7 @@ _mesa_CopyTextureSubImage3D(GLuint texture, GLint level,
/* Check target (proxies not allowed). */
if (!legal_texsubimage_target(ctx, 3, texObj->Target, true)) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", self,
- _mesa_lookup_enum_by_nr(texObj->Target));
+ _mesa_enum_to_string(texObj->Target));
return;
}
@@ -4288,8 +4315,8 @@ check_clear_tex_image(struct gl_context *ctx,
_mesa_error(ctx, err,
"%s(incompatible format = %s, type = %s)",
function,
- _mesa_lookup_enum_by_nr(format),
- _mesa_lookup_enum_by_nr(type));
+ _mesa_enum_to_string(format),
+ _mesa_enum_to_string(type));
return false;
}
@@ -4298,8 +4325,8 @@ check_clear_tex_image(struct gl_context *ctx,
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(incompatible internalFormat = %s, format = %s)",
function,
- _mesa_lookup_enum_by_nr(internalFormat),
- _mesa_lookup_enum_by_nr(format));
+ _mesa_enum_to_string(internalFormat),
+ _mesa_enum_to_string(format));
return false;
}
@@ -4541,7 +4568,7 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target,
if (dsa && target == GL_TEXTURE_RECTANGLE) {
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid target %s)", caller,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return GL_TRUE;
}
@@ -4549,13 +4576,15 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target,
case 2:
switch (target) {
case GL_TEXTURE_2D:
+ targetOK = GL_TRUE;
+ break;
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
- targetOK = GL_TRUE;
+ targetOK = ctx->Extensions.ARB_texture_cube_map;
break;
default:
targetOK = GL_FALSE;
@@ -4563,52 +4592,59 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target,
}
break;
case 3:
- targetOK = (target == GL_TEXTURE_3D) ||
- (target == GL_TEXTURE_2D_ARRAY) ||
- (target == GL_TEXTURE_CUBE_MAP_ARRAY) ||
- (target == GL_TEXTURE_CUBE_MAP && dsa);
-
- /* OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture
- * Images:
- * "An INVALID_OPERATION error is generated by
- * CompressedTex*SubImage3D if the internal format of the texture is
- * one of the EAC, ETC2, or RGTC formats and either border is
- * non-zero, or the effective target for the texture is not
- * TEXTURE_2D_ARRAY."
- */
- if (target != GL_TEXTURE_2D_ARRAY) {
- bool invalidformat;
+ switch (target) {
+ case GL_TEXTURE_CUBE_MAP:
+ targetOK = dsa && ctx->Extensions.ARB_texture_cube_map;
+ break;
+ case GL_TEXTURE_2D_ARRAY:
+ targetOK = _mesa_is_gles3(ctx) ||
+ (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array);
+ break;
+ case GL_TEXTURE_CUBE_MAP_ARRAY:
+ targetOK = ctx->Extensions.ARB_texture_cube_map_array;
+ break;
+ case GL_TEXTURE_3D:
+ targetOK = GL_TRUE;
+ /*
+ * OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture
+ * Images:
+ * "An INVALID_OPERATION error is generated by
+ * CompressedTex*SubImage3D if the internal format of the texture
+ * is one of the EAC, ETC2, or RGTC formats and either border is
+ * non-zero, or the effective target for the texture is not
+ * TEXTURE_2D_ARRAY."
+ *
+ * NOTE: that's probably a spec error. It should probably say
+ * "... or the effective target for the texture is not
+ * TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP, nor
+ * GL_TEXTURE_CUBE_MAP_ARRAY."
+ * since those targets are 2D images and they support all compression
+ * formats.
+ *
+ * Instead of listing all these, just list those which are allowed,
+ * which is (at this time) only bptc. Otherwise we'd say s3tc (and
+ * more) are valid here, which they are not, but of course not
+ * mentioned by core spec.
+ */
switch (format) {
- /* These came from _mesa_is_compressed_format in glformats.c. */
- /* EAC formats */
- case GL_COMPRESSED_RGBA8_ETC2_EAC:
- case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
- case GL_COMPRESSED_R11_EAC:
- case GL_COMPRESSED_RG11_EAC:
- case GL_COMPRESSED_SIGNED_R11_EAC:
- case GL_COMPRESSED_SIGNED_RG11_EAC:
- /* ETC2 formats */
- case GL_COMPRESSED_RGB8_ETC2:
- case GL_COMPRESSED_SRGB8_ETC2:
- case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
- /* RGTC formats */
- case GL_COMPRESSED_RED_RGTC1:
- case GL_COMPRESSED_SIGNED_RED_RGTC1:
- case GL_COMPRESSED_RG_RGTC2:
- case GL_COMPRESSED_SIGNED_RG_RGTC2:
- invalidformat = true;
- break;
- default:
- invalidformat = false;
- }
- if (invalidformat) {
+ /* These are the only 3D compression formats supported at this time */
+ case GL_COMPRESSED_RGBA_BPTC_UNORM:
+ case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+ case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+ case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+ /* valid format */
+ break;
+ default:
+ /* invalid format */
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(invalid target %s for format %s)", caller,
- _mesa_lookup_enum_by_nr(target),
- _mesa_lookup_enum_by_nr(format));
+ _mesa_enum_to_string(target),
+ _mesa_enum_to_string(format));
return GL_TRUE;
}
+ break;
+ default:
+ targetOK = GL_FALSE;
}
break;
@@ -4621,7 +4657,7 @@ compressed_subtexture_target_check(struct gl_context *ctx, GLenum target,
if (!targetOK) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return GL_TRUE;
}
@@ -4834,8 +4870,7 @@ _mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset,
if (!texObj)
return;
- if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format,
- true,
+ if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format, true,
"glCompressedTextureSubImage1D")) {
return;
}
@@ -4912,8 +4947,7 @@ _mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset,
if (!texObj)
return;
- if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format,
- true,
+ if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format, true,
"glCompressedTextureSubImage2D")) {
return;
}
@@ -4990,8 +5024,7 @@ _mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset,
if (!texObj)
return;
- if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format,
- true,
+ if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format, true,
"glCompressedTextureSubImage3D")) {
return;
}
@@ -5440,7 +5473,6 @@ _mesa_TexBufferRange(GLenum target, GLenum internalFormat, GLuint buffer,
return;
} else {
-
/* OpenGL 4.5 core spec (02.02.2015) says in Section 8.9 Buffer
* Textures (PDF page 254):
* "If buffer is zero, then any buffer object attached to the buffer
@@ -5508,7 +5540,6 @@ _mesa_TextureBufferRange(GLuint texture, GLenum internalFormat, GLuint buffer,
return;
} else {
-
/* OpenGL 4.5 core spec (02.02.2015) says in Section 8.9 Buffer
* Textures (PDF page 254):
* "If buffer is zero, then any buffer object attached to the buffer
@@ -5554,19 +5585,17 @@ check_multisample_target(GLuint dims, GLenum target, bool dsa)
return dims == 2;
case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
return dims == 2 && !dsa;
-
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
return dims == 3;
case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
return dims == 3 && !dsa;
-
default:
return GL_FALSE;
}
}
-void
+static void
_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
struct gl_texture_object *texObj,
GLenum target, GLsizei samples,
@@ -5581,8 +5610,8 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
GLenum sample_count_error;
bool dsa = strstr(func, "ture") ? true : false;
- if (!(ctx->Extensions.ARB_texture_multisample
- && _mesa_is_desktop_gl(ctx))) {
+ if (!((ctx->Extensions.ARB_texture_multisample
+ && _mesa_is_desktop_gl(ctx))) && !_mesa_is_gles31(ctx)) {
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(unsupported)", func);
return;
}
@@ -5605,14 +5634,21 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
if (immutable && !_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(internalformat=%s not legal for immutable-format)",
- func, _mesa_lookup_enum_by_nr(internalformat));
+ func, _mesa_enum_to_string(internalformat));
return;
}
if (!is_renderable_texture_format(ctx, internalformat)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "%s(internalformat=%s)",
- func, _mesa_lookup_enum_by_nr(internalformat));
+ /* Page 172 of OpenGL ES 3.1 spec says:
+ * "An INVALID_ENUM error is generated if sizedinternalformat is not
+ * color-renderable, depth-renderable, or stencil-renderable (as
+ * defined in section 9.4).
+ *
+ * (Same error is also defined for desktop OpenGL for multisample
+ * teximage/texstorage functions.)
+ */
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(internalformat=%s)", func,
+ _mesa_enum_to_string(internalformat));
return;
}
@@ -5671,13 +5707,12 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
else {
if (!dimensionsOK) {
_mesa_error(ctx, GL_INVALID_VALUE,
- "%s(invalid width or height)", func);
+ "%s(invalid width or height)", func);
return;
}
if (!sizeOK) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY,
- "%s(texture too large)", func);
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s(texture too large)", func);
return;
}
@@ -5695,7 +5730,7 @@ _mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
if (width > 0 && height > 0 && depth > 0) {
if (!ctx->Driver.AllocTextureStorage(ctx, texObj, 1,
- width, height, depth)) {
+ width, height, depth)) {
/* tidy up the texture image state. strictly speaking,
* we're allowed to just leave this in whatever state we
* like, but being tidy is good.
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h
index 1eebaa8b631..bf729daf534 100644
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -200,15 +200,6 @@ _mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims,
const char *caller);
extern void
-_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
- struct gl_texture_object *texObj,
- GLenum target, GLsizei samples,
- GLint internalformat, GLsizei width,
- GLsizei height, GLsizei depth,
- GLboolean fixedsamplelocations,
- GLboolean immutable, const char *func);
-
-extern void
_mesa_texture_buffer_range(struct gl_context *ctx,
struct gl_texture_object *texObj,
GLenum internalFormat,
diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c
index c563f1e7434..cd7cfd6a4fb 100644
--- a/src/mesa/main/texobj.c
+++ b/src/mesa/main/texobj.c
@@ -1255,7 +1255,7 @@ create_textures(struct gl_context *ctx, GLenum target,
if (targetIndex < 0) { /* Bad Target */
mtx_unlock(&ctx->Shared->Mutex);
_mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)",
- func, _mesa_lookup_enum_by_nr(texObj->Target));
+ func, _mesa_enum_to_string(texObj->Target));
return;
}
assert(targetIndex < NUM_TEXTURE_TARGETS);
@@ -1606,8 +1606,8 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target)
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
? TEXTURE_CUBE_ARRAY_INDEX : -1;
case GL_TEXTURE_2D_MULTISAMPLE:
- return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
- ? TEXTURE_2D_MULTISAMPLE_INDEX: -1;
+ return ((_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample) ||
+ _mesa_is_gles31(ctx)) ? TEXTURE_2D_MULTISAMPLE_INDEX: -1;
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: -1;
@@ -1642,7 +1642,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glBindTexture %s %d\n",
- _mesa_lookup_enum_by_nr(target), (GLint) texName);
+ _mesa_enum_to_string(target), (GLint) texName);
targetIndex = _mesa_tex_target_to_index(ctx, target);
if (targetIndex < 0) {
@@ -1806,7 +1806,7 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture)
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glBindTextureUnit %s %d\n",
- _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit), (GLint) texture);
+ _mesa_enum_to_string(GL_TEXTURE0+unit), (GLint) texture);
/* Section 8.1 (Texture Objects) of the OpenGL 4.5 core profile spec
* (20141030) says:
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index d74134f41b1..c0611c3e489 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -381,7 +381,7 @@ set_tex_parameteri(struct gl_context *ctx,
if (texObj->Target == GL_TEXTURE_RECTANGLE_ARB && params[0] != 0) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glTex%sParameter(target=%s, param=%d)", suffix,
- _mesa_lookup_enum_by_nr(texObj->Target), params[0]);
+ _mesa_enum_to_string(texObj->Target), params[0]);
return GL_FALSE;
}
incomplete(ctx, texObj);
@@ -500,7 +500,9 @@ set_tex_parameteri(struct gl_context *ctx,
goto invalid_pname;
case GL_DEPTH_STENCIL_TEXTURE_MODE:
- if (_mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_stencil_texturing) {
+ if ((_mesa_is_desktop_gl(ctx) &&
+ ctx->Extensions.ARB_stencil_texturing) ||
+ _mesa_is_gles31(ctx)) {
bool stencil = params[0] == GL_STENCIL_INDEX;
if (!stencil && params[0] != GL_DEPTH_COMPONENT)
goto invalid_param;
@@ -610,22 +612,22 @@ set_tex_parameteri(struct gl_context *ctx,
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
- suffix, _mesa_lookup_enum_by_nr(pname));
+ suffix, _mesa_enum_to_string(pname));
return GL_FALSE;
invalid_param:
_mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(param=%s)",
- suffix, _mesa_lookup_enum_by_nr(params[0]));
+ suffix, _mesa_enum_to_string(params[0]));
return GL_FALSE;
invalid_operation:
_mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sParameter(pname=%s)",
- suffix, _mesa_lookup_enum_by_nr(pname));
+ suffix, _mesa_enum_to_string(pname));
return GL_FALSE;
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
- suffix, _mesa_lookup_enum_by_nr(pname));
+ suffix, _mesa_enum_to_string(pname));
return GL_FALSE;
}
@@ -683,7 +685,7 @@ set_tex_parameterf(struct gl_context *ctx,
if (texObj->Sampler.MaxAnisotropy == params[0])
return GL_FALSE;
- if (params[0] < 1.0) {
+ if (params[0] < 1.0F) {
_mesa_error(ctx, GL_INVALID_VALUE, "glTex%sParameter(param)",
suffix);
return GL_FALSE;
@@ -745,12 +747,12 @@ set_tex_parameterf(struct gl_context *ctx,
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
- suffix, _mesa_lookup_enum_by_nr(pname));
+ suffix, _mesa_enum_to_string(pname));
return GL_FALSE;
invalid_enum:
_mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
- suffix, _mesa_lookup_enum_by_nr(pname));
+ suffix, _mesa_enum_to_string(pname));
return GL_FALSE;
}
@@ -1395,7 +1397,7 @@ get_tex_level_parameter_image(struct gl_context *ctx,
else {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
break;
case GL_TEXTURE_COMPRESSED:
@@ -1444,7 +1446,7 @@ get_tex_level_parameter_image(struct gl_context *ctx,
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
@@ -1528,7 +1530,7 @@ get_tex_level_parameter_buffer(struct gl_context *ctx,
/* Always illegal for GL_TEXTURE_BUFFER */
_mesa_error(ctx, GL_INVALID_OPERATION,
"glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
break;
/* GL_ARB_texture_float */
@@ -1557,7 +1559,7 @@ get_tex_level_parameter_buffer(struct gl_context *ctx,
invalid_pname:
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
}
@@ -1586,7 +1588,7 @@ get_tex_level_parameteriv(struct gl_context *ctx,
if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glGetTex%sLevelParameter[if]v(target=%s)", suffix,
- _mesa_lookup_enum_by_nr(target));
+ _mesa_enum_to_string(target));
return;
}
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index 1af9d47f030..9b5928c4306 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -123,21 +123,21 @@ _mesa_print_texunit_state( struct gl_context *ctx, GLuint unit )
{
const struct gl_texture_unit *texUnit = ctx->Texture.Unit + unit;
printf("Texture Unit %d\n", unit);
- printf(" GL_TEXTURE_ENV_MODE = %s\n", _mesa_lookup_enum_by_nr(texUnit->EnvMode));
- printf(" GL_COMBINE_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.ModeRGB));
- printf(" GL_COMBINE_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.ModeA));
- printf(" GL_SOURCE0_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[0]));
- printf(" GL_SOURCE1_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[1]));
- printf(" GL_SOURCE2_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceRGB[2]));
- printf(" GL_SOURCE0_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[0]));
- printf(" GL_SOURCE1_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[1]));
- printf(" GL_SOURCE2_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.SourceA[2]));
- printf(" GL_OPERAND0_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[0]));
- printf(" GL_OPERAND1_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[1]));
- printf(" GL_OPERAND2_RGB = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandRGB[2]));
- printf(" GL_OPERAND0_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[0]));
- printf(" GL_OPERAND1_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[1]));
- printf(" GL_OPERAND2_ALPHA = %s\n", _mesa_lookup_enum_by_nr(texUnit->Combine.OperandA[2]));
+ printf(" GL_TEXTURE_ENV_MODE = %s\n", _mesa_enum_to_string(texUnit->EnvMode));
+ printf(" GL_COMBINE_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.ModeRGB));
+ printf(" GL_COMBINE_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.ModeA));
+ printf(" GL_SOURCE0_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[0]));
+ printf(" GL_SOURCE1_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[1]));
+ printf(" GL_SOURCE2_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceRGB[2]));
+ printf(" GL_SOURCE0_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[0]));
+ printf(" GL_SOURCE1_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[1]));
+ printf(" GL_SOURCE2_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.SourceA[2]));
+ printf(" GL_OPERAND0_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[0]));
+ printf(" GL_OPERAND1_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[1]));
+ printf(" GL_OPERAND2_RGB = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandRGB[2]));
+ printf(" GL_OPERAND0_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[0]));
+ printf(" GL_OPERAND1_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[1]));
+ printf(" GL_OPERAND2_ALPHA = %s\n", _mesa_enum_to_string(texUnit->Combine.OperandA[2]));
printf(" GL_RGB_SCALE = %d\n", 1 << texUnit->Combine.ScaleShiftRGB);
printf(" GL_ALPHA_SCALE = %d\n", 1 << texUnit->Combine.ScaleShiftA);
printf(" GL_TEXTURE_ENV_COLOR = (%f, %f, %f, %f)\n", texUnit->EnvColor[0], texUnit->EnvColor[1], texUnit->EnvColor[2], texUnit->EnvColor[3]);
@@ -289,23 +289,23 @@ _mesa_ActiveTexture(GLenum texture)
GLuint k;
GET_CURRENT_CONTEXT(ctx);
+ if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+ _mesa_debug(ctx, "glActiveTexture %s\n",
+ _mesa_enum_to_string(texture));
+
+ if (ctx->Texture.CurrentUnit == texUnit)
+ return;
+
k = _mesa_max_tex_unit(ctx);
assert(k <= ARRAY_SIZE(ctx->Texture.Unit));
- if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
- _mesa_debug(ctx, "glActiveTexture %s\n",
- _mesa_lookup_enum_by_nr(texture));
-
if (texUnit >= k) {
_mesa_error(ctx, GL_INVALID_ENUM, "glActiveTexture(texture=%s)",
- _mesa_lookup_enum_by_nr(texture));
+ _mesa_enum_to_string(texture));
return;
}
- if (ctx->Texture.CurrentUnit == texUnit)
- return;
-
FLUSH_VERTICES(ctx, _NEW_TEXTURE);
ctx->Texture.CurrentUnit = texUnit;
@@ -325,16 +325,16 @@ _mesa_ClientActiveTexture(GLenum texture)
if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE))
_mesa_debug(ctx, "glClientActiveTexture %s\n",
- _mesa_lookup_enum_by_nr(texture));
+ _mesa_enum_to_string(texture));
+
+ if (ctx->Array.ActiveTexture == texUnit)
+ return;
if (texUnit >= ctx->Const.MaxTextureCoordUnits) {
_mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture)");
return;
}
- if (ctx->Array.ActiveTexture == texUnit)
- return;
-
FLUSH_VERTICES(ctx, _NEW_ARRAY);
ctx->Array.ActiveTexture = texUnit;
}
diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h
index 662435b47cc..bee8c9c3316 100644
--- a/src/mesa/main/texstate.h
+++ b/src/mesa/main/texstate.h
@@ -77,7 +77,7 @@ _mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func)
* implementation."
*/
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func,
- _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit));
+ _mesa_enum_to_string(GL_TEXTURE0+unit));
return NULL;
}
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index 53cb2c091f8..4a2cc6065df 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -308,7 +308,8 @@ tex_storage_error_check(struct gl_context *ctx,
_mesa_error(ctx, _mesa_is_desktop_gl(ctx)?
GL_INVALID_ENUM : GL_INVALID_OPERATION,
"glTex%sStorage%dD(internalformat = %s)", suffix, dims,
- _mesa_lookup_enum_by_nr(internalformat));
+ _mesa_enum_to_string(internalformat));
+ return GL_TRUE;
}
/* levels check */
@@ -464,21 +465,21 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
if (!legal_texobj_target(ctx, dims, target)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glTexStorage%uD(illegal target=%s)",
- dims, _mesa_lookup_enum_by_nr(target));
+ dims, _mesa_enum_to_string(target));
return;
}
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glTexStorage%uD %s %d %s %d %d %d\n",
dims,
- _mesa_lookup_enum_by_nr(target), levels,
- _mesa_lookup_enum_by_nr(internalformat),
+ _mesa_enum_to_string(target), levels,
+ _mesa_enum_to_string(internalformat),
width, height, depth);
/* Check the format to make sure it is sized. */
if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glTexStorage%uD(internalformat = %s)", dims,
- _mesa_lookup_enum_by_nr(internalformat));
+ _mesa_enum_to_string(internalformat));
return;
}
@@ -504,14 +505,14 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels,
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glTextureStorage%uD %d %d %s %d %d %d\n",
dims, texture, levels,
- _mesa_lookup_enum_by_nr(internalformat),
+ _mesa_enum_to_string(internalformat),
width, height, depth);
/* Check the format to make sure it is sized. */
if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glTextureStorage%uD(internalformat = %s)", dims,
- _mesa_lookup_enum_by_nr(internalformat));
+ _mesa_enum_to_string(internalformat));
return;
}
@@ -529,7 +530,7 @@ texturestorage(GLuint dims, GLuint texture, GLsizei levels,
if (!legal_texobj_target(ctx, dims, texObj->Target)) {
_mesa_error(ctx, GL_INVALID_ENUM,
"glTextureStorage%uD(illegal target=%s)",
- dims, _mesa_lookup_enum_by_nr(texObj->Target));
+ dims, _mesa_enum_to_string(texObj->Target));
return;
}
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 1525205981b..37c05690091 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -787,6 +787,7 @@ texstore_rgba(TEXSTORE_PARAMS)
srcType = GL_FLOAT;
srcRowStride = srcWidth * 4 * sizeof(float);
srcMesaFormat = RGBA32_FLOAT;
+ srcPacking = &ctx->DefaultPacking;
}
src = (GLubyte *)
diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 6b0aed4ea1a..5a3282a40c1 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -313,7 +313,7 @@ target_valid(struct gl_context *ctx, GLenum origTarget, GLenum newTarget)
}
_mesa_error(ctx, GL_INVALID_OPERATION,
"glTextureView(illegal target=%s)",
- _mesa_lookup_enum_by_nr(newTarget));
+ _mesa_enum_to_string(newTarget));
return false;
}
#undef RETURN_IF_SUPPORTED
@@ -435,8 +435,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE))
_mesa_debug(ctx, "glTextureView %d %s %d %s %d %d %d %d\n",
- texture, _mesa_lookup_enum_by_nr(target), origtexture,
- _mesa_lookup_enum_by_nr(internalformat),
+ texture, _mesa_enum_to_string(target), origtexture,
+ _mesa_enum_to_string(internalformat),
minlevel, numlevels, minlayer, numlayers);
if (origtexture == 0) {
@@ -523,8 +523,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
internalformat)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glTextureView(internalformat %s not compatible with origtexture %s)",
- _mesa_lookup_enum_by_nr(internalformat),
- _mesa_lookup_enum_by_nr(origTexObj->Image[0][0]->InternalFormat));
+ _mesa_enum_to_string(internalformat),
+ _mesa_enum_to_string(origTexObj->Image[0][0]->InternalFormat));
return;
}
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index cab5083e81b..036530e91b6 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -978,81 +978,6 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
}
-/**
- * Called via glGetUniformLocation().
- *
- * Returns the uniform index into UniformStorage (also the
- * glGetActiveUniformsiv uniform index), and stores the referenced
- * array offset in *offset, or GL_INVALID_INDEX (-1).
- */
-extern "C" unsigned
-_mesa_get_uniform_location(struct gl_shader_program *shProg,
- const GLchar *name,
- unsigned *out_offset)
-{
- /* Page 80 (page 94 of the PDF) of the OpenGL 2.1 spec says:
- *
- * "The first element of a uniform array is identified using the
- * name of the uniform array appended with "[0]". Except if the last
- * part of the string name indicates a uniform array, then the
- * location of the first element of that array can be retrieved by
- * either using the name of the uniform array, or the name of the
- * uniform array appended with "[0]"."
- *
- * Note: since uniform names are not allowed to use whitespace, and array
- * indices within uniform names are not allowed to use "+", "-", or leading
- * zeros, it follows that each uniform has a unique name up to the possible
- * ambiguity with "[0]" noted above. Therefore we don't need to worry
- * about mal-formed inputs--they will properly fail when we try to look up
- * the uniform name in shProg->UniformHash.
- */
-
- const GLchar *base_name_end;
- long offset = parse_program_resource_name(name, &base_name_end);
- bool array_lookup = offset >= 0;
- char *name_copy;
-
- if (array_lookup) {
- name_copy = (char *) malloc(base_name_end - name + 1);
- memcpy(name_copy, name, base_name_end - name);
- name_copy[base_name_end - name] = '\0';
- } else {
- name_copy = (char *) name;
- offset = 0;
- }
-
- unsigned location = 0;
- const bool found = shProg->UniformHash->get(location, name_copy);
-
- assert(!found
- || strcmp(name_copy, shProg->UniformStorage[location].name) == 0);
-
- /* Free the temporary buffer *before* possibly returning an error.
- */
- if (name_copy != name)
- free(name_copy);
-
- if (!found)
- return GL_INVALID_INDEX;
-
- /* If the uniform is built-in, fail. */
- if (shProg->UniformStorage[location].builtin)
- return GL_INVALID_INDEX;
-
- /* If the uniform is an array, fail if the index is out of bounds.
- * (A negative index is caught above.) This also fails if the uniform
- * is not an array, but the user is trying to index it, because
- * array_elements is zero and offset >= 0.
- */
- if (array_lookup
- && offset >= (long) shProg->UniformStorage[location].array_elements) {
- return GL_INVALID_INDEX;
- }
-
- *out_offset = offset;
- return location;
-}
-
extern "C" bool
_mesa_sampler_uniforms_are_valid(const struct gl_shader_program *shProg,
char *errMsg, size_t errMsgLength)
@@ -1101,18 +1026,23 @@ _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline)
for (unsigned i = 0; i < shProg[idx]->NumUniformStorage; i++) {
const struct gl_uniform_storage *const storage =
&shProg[idx]->UniformStorage[i];
- const glsl_type *const t = (storage->type->is_array())
- ? storage->type->fields.array : storage->type;
- if (!t->is_sampler())
+ if (!storage->type->is_sampler())
continue;
active_samplers++;
- const unsigned count = MAX2(1, storage->type->array_size());
+ const unsigned count = MAX2(1, storage->array_elements);
for (unsigned j = 0; j < count; j++) {
const unsigned unit = storage->storage[j].i;
+ /* FIXME: Samplers are initialized to 0 and Mesa doesn't do a
+ * great job of eliminating unused uniforms currently so for now
+ * don't throw an error if two sampler types both point to 0.
+ */
+ if (unit == 0)
+ continue;
+
/* The types of the samplers associated with a particular texture
* unit must be an exact match. Page 74 (page 89 of the PDF) of
* the OpenGL 3.3 core spec says:
@@ -1122,13 +1052,14 @@ _mesa_sampler_uniforms_pipeline_are_valid(struct gl_pipeline_object *pipeline)
* program object."
*/
if (unit_types[unit] == NULL) {
- unit_types[unit] = t;
- } else if (unit_types[unit] != t) {
+ unit_types[unit] = storage->type;
+ } else if (unit_types[unit] != storage->type) {
pipeline->InfoLog =
ralloc_asprintf(pipeline,
"Texture unit %d is accessed both as %s "
"and %s",
- unit, unit_types[unit]->name, t->name);
+ unit, unit_types[unit]->name,
+ storage->type->name);
return false;
}
}
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index 5548d1d026f..ff1df72e1d6 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -952,7 +952,7 @@ _mesa_GetUniformBlockIndex(GLuint program,
struct gl_program_resource *res =
_mesa_program_resource_find_name(shProg, GL_UNIFORM_BLOCK,
- uniformBlockName);
+ uniformBlockName, NULL);
if (!res)
return GL_INVALID_INDEX;
@@ -987,7 +987,8 @@ _mesa_GetUniformIndices(GLuint program,
for (i = 0; i < uniformCount; i++) {
struct gl_program_resource *res =
- _mesa_program_resource_find_name(shProg, GL_UNIFORM, uniformNames[i]);
+ _mesa_program_resource_find_name(shProg, GL_UNIFORM, uniformNames[i],
+ NULL);
uniformIndices[i] = _mesa_program_resource_index(shProg, res);
}
}
@@ -1092,6 +1093,21 @@ mesa_bufferiv(struct gl_shader_program *shProg, GLenum type,
GL_REFERENCED_BY_VERTEX_SHADER, params,
caller);
return;
+
+ case GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER:
+ case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER:
+ _mesa_program_resource_prop(shProg, res, index,
+ GL_REFERENCED_BY_TESS_CONTROL_SHADER, params,
+ caller);
+ return;
+
+ case GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER:
+ case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER:
+ _mesa_program_resource_prop(shProg, res, index,
+ GL_REFERENCED_BY_TESS_EVALUATION_SHADER, params,
+ caller);
+ return;
+
case GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER:
case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER:
_mesa_program_resource_prop(shProg, res, index,
@@ -1104,16 +1120,10 @@ mesa_bufferiv(struct gl_shader_program *shProg, GLenum type,
GL_REFERENCED_BY_FRAGMENT_SHADER, params,
caller);
return;
- case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER:
- params[0] = GL_FALSE;
- return;
- case GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER:
- params[0] = GL_FALSE;
- return;
default:
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(pname 0x%x (%s))", caller, pname,
- _mesa_lookup_enum_by_nr(pname));
+ _mesa_enum_to_string(pname));
return;
}
}
diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h
index bd7b05e207a..e62eaa53ccc 100644
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -343,10 +343,6 @@ void GLAPIENTRY
_mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
GLboolean transpose, const GLdouble *value);
-unsigned
-_mesa_get_uniform_location(struct gl_shader_program *shProg,
- const GLchar *name, unsigned *offset);
-
void
_mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shader_program,
GLint location, GLsizei count,
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index ebdd9eaf02e..3bab9850588 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -300,7 +300,7 @@ update_array_format(struct gl_context *ctx,
typeBit = type_to_bit(ctx, type);
if (typeBit == 0x0 || (typeBit & legalTypesMask) == 0x0) {
_mesa_error(ctx, GL_INVALID_ENUM, "%s(type = %s)",
- func, _mesa_lookup_enum_by_nr(type));
+ func, _mesa_enum_to_string(type));
return false;
}
@@ -333,7 +333,7 @@ update_array_format(struct gl_context *ctx,
if (bgra_error) {
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(size=GL_BGRA and type=%s)",
- func, _mesa_lookup_enum_by_nr(type));
+ func, _mesa_enum_to_string(type));
return false;
}
@@ -2310,7 +2310,7 @@ print_array(const char *name, GLint index, const struct gl_client_array *array)
else
fprintf(stderr, " %s: ", name);
fprintf(stderr, "Ptr=%p, Type=%s, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n",
- array->Ptr, _mesa_lookup_enum_by_nr(array->Type), array->Size,
+ array->Ptr, _mesa_enum_to_string(array->Type), array->Size,
array->_ElementSize, array->StrideB, array->BufferObj->Name,
(unsigned long) array->BufferObj->Size);
}
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 8bc00ace5c4..fd7ae53ccbd 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -309,7 +309,7 @@ compute_version(const struct gl_extensions *extensions,
extensions->ARB_gpu_shader5 &&
extensions->ARB_gpu_shader_fp64 &&
extensions->ARB_sample_shading &&
- false /*extensions->ARB_shader_subroutine*/ &&
+ extensions->ARB_shader_subroutine &&
extensions->ARB_tessellation_shader &&
extensions->ARB_texture_buffer_object_rgb32 &&
extensions->ARB_texture_cube_map_array &&
diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c
index b27063031c4..7d8914291c3 100644
--- a/src/mesa/main/viewport.c
+++ b/src/mesa/main/viewport.c
@@ -391,8 +391,8 @@ _mesa_ClipControl(GLenum origin, GLenum depth)
if (MESA_VERBOSE&VERBOSE_API)
_mesa_debug(ctx, "glClipControl(%s, %s)\n",
- _mesa_lookup_enum_by_nr(origin),
- _mesa_lookup_enum_by_nr(depth));
+ _mesa_enum_to_string(origin),
+ _mesa_enum_to_string(depth));
ASSERT_OUTSIDE_BEGIN_END(ctx);
@@ -443,12 +443,12 @@ _mesa_ClipControl(GLenum origin, GLenum depth)
*/
void
_mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
- double scale[3], double translate[3])
+ float scale[3], float translate[3])
{
- double x = ctx->ViewportArray[i].X;
- double y = ctx->ViewportArray[i].Y;
- double half_width = 0.5*ctx->ViewportArray[i].Width;
- double half_height = 0.5*ctx->ViewportArray[i].Height;
+ float x = ctx->ViewportArray[i].X;
+ float y = ctx->ViewportArray[i].Y;
+ float half_width = 0.5f * ctx->ViewportArray[i].Width;
+ float half_height = 0.5f * ctx->ViewportArray[i].Height;
double n = ctx->ViewportArray[i].Near;
double f = ctx->ViewportArray[i].Far;
@@ -462,8 +462,8 @@ _mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
translate[1] = half_height + y;
}
if (ctx->Transform.ClipDepthMode == GL_NEGATIVE_ONE_TO_ONE) {
- scale[2] = 0.5*(f - n);
- translate[2] = 0.5*(n + f);
+ scale[2] = 0.5 * (f - n);
+ translate[2] = 0.5 * (n + f);
} else {
scale[2] = f - n;
translate[2] = n;
diff --git a/src/mesa/main/viewport.h b/src/mesa/main/viewport.h
index 899dc2d0bcc..b0675db1096 100644
--- a/src/mesa/main/viewport.h
+++ b/src/mesa/main/viewport.h
@@ -73,6 +73,6 @@ _mesa_ClipControl(GLenum origin, GLenum depth);
extern void
_mesa_get_viewport_xform(struct gl_context *ctx, unsigned i,
- double scale[3], double translate[3]);
+ float scale[3], float translate[3]);
#endif
diff --git a/src/mesa/math/m_clip_tmp.h b/src/mesa/math/m_clip_tmp.h
index e289be7b302..60c00043725 100644
--- a/src/mesa/math/m_clip_tmp.h
+++ b/src/mesa/math/m_clip_tmp.h
@@ -194,13 +194,13 @@ static GLvector4f * TAG(cliptest_points3)( GLvector4f *clip_vec,
STRIDE_LOOP {
const GLfloat cx = from[0], cy = from[1], cz = from[2];
GLubyte mask = 0;
- if (cx > 1.0) mask |= CLIP_RIGHT_BIT;
- else if (cx < -1.0) mask |= CLIP_LEFT_BIT;
- if (cy > 1.0) mask |= CLIP_TOP_BIT;
- else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT;
+ if (cx > 1.0F) mask |= CLIP_RIGHT_BIT;
+ else if (cx < -1.0F) mask |= CLIP_LEFT_BIT;
+ if (cy > 1.0F) mask |= CLIP_TOP_BIT;
+ else if (cy < -1.0F) mask |= CLIP_BOTTOM_BIT;
if (viewport_z_clip) {
- if (cz > 1.0) mask |= CLIP_FAR_BIT;
- else if (cz < -1.0) mask |= CLIP_NEAR_BIT;
+ if (cz > 1.0F) mask |= CLIP_FAR_BIT;
+ else if (cz < -1.0F) mask |= CLIP_NEAR_BIT;
}
clipMask[i] = mask;
tmpOrMask |= mask;
@@ -230,10 +230,10 @@ static GLvector4f * TAG(cliptest_points2)( GLvector4f *clip_vec,
STRIDE_LOOP {
const GLfloat cx = from[0], cy = from[1];
GLubyte mask = 0;
- if (cx > 1.0) mask |= CLIP_RIGHT_BIT;
- else if (cx < -1.0) mask |= CLIP_LEFT_BIT;
- if (cy > 1.0) mask |= CLIP_TOP_BIT;
- else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT;
+ if (cx > 1.0F) mask |= CLIP_RIGHT_BIT;
+ else if (cx < -1.0F) mask |= CLIP_LEFT_BIT;
+ if (cy > 1.0F) mask |= CLIP_TOP_BIT;
+ else if (cy < -1.0F) mask |= CLIP_BOTTOM_BIT;
clipMask[i] = mask;
tmpOrMask |= mask;
tmpAndMask &= mask;
diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
index ecf564c0089..6522200b345 100644
--- a/src/mesa/math/m_matrix.c
+++ b/src/mesa/math/m_matrix.c
@@ -380,7 +380,7 @@ static GLboolean invert_matrix_general( GLmatrix *mat )
if (fabsf(r3[0])>fabsf(r2[0])) SWAP_ROWS(r3, r2);
if (fabsf(r2[0])>fabsf(r1[0])) SWAP_ROWS(r2, r1);
if (fabsf(r1[0])>fabsf(r0[0])) SWAP_ROWS(r1, r0);
- if (0.0 == r0[0]) return GL_FALSE;
+ if (0.0F == r0[0]) return GL_FALSE;
/* eliminate first variable */
m1 = r1[0]/r0[0]; m2 = r2[0]/r0[0]; m3 = r3[0]/r0[0];
@@ -388,31 +388,31 @@ static GLboolean invert_matrix_general( GLmatrix *mat )
s = r0[2]; r1[2] -= m1 * s; r2[2] -= m2 * s; r3[2] -= m3 * s;
s = r0[3]; r1[3] -= m1 * s; r2[3] -= m2 * s; r3[3] -= m3 * s;
s = r0[4];
- if (s != 0.0) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; }
+ if (s != 0.0F) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; }
s = r0[5];
- if (s != 0.0) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; }
+ if (s != 0.0F) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; }
s = r0[6];
- if (s != 0.0) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; }
+ if (s != 0.0F) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; }
s = r0[7];
- if (s != 0.0) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; }
+ if (s != 0.0F) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; }
/* choose pivot - or die */
if (fabsf(r3[1])>fabsf(r2[1])) SWAP_ROWS(r3, r2);
if (fabsf(r2[1])>fabsf(r1[1])) SWAP_ROWS(r2, r1);
- if (0.0 == r1[1]) return GL_FALSE;
+ if (0.0F == r1[1]) return GL_FALSE;
/* eliminate second variable */
m2 = r2[1]/r1[1]; m3 = r3[1]/r1[1];
r2[2] -= m2 * r1[2]; r3[2] -= m3 * r1[2];
r2[3] -= m2 * r1[3]; r3[3] -= m3 * r1[3];
- s = r1[4]; if (0.0 != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; }
- s = r1[5]; if (0.0 != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; }
- s = r1[6]; if (0.0 != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; }
- s = r1[7]; if (0.0 != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; }
+ s = r1[4]; if (0.0F != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; }
+ s = r1[5]; if (0.0F != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; }
+ s = r1[6]; if (0.0F != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; }
+ s = r1[7]; if (0.0F != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; }
/* choose pivot - or die */
if (fabsf(r3[2])>fabsf(r2[2])) SWAP_ROWS(r3, r2);
- if (0.0 == r2[2]) return GL_FALSE;
+ if (0.0F == r2[2]) return GL_FALSE;
/* eliminate third variable */
m3 = r3[2]/r2[2];
@@ -421,7 +421,7 @@ static GLboolean invert_matrix_general( GLmatrix *mat )
r3[7] -= m3 * r2[7];
/* last check */
- if (0.0 == r3[3]) return GL_FALSE;
+ if (0.0F == r3[3]) return GL_FALSE;
s = 1.0F/r3[3]; /* now back substitute row 3 */
r3[4] *= s; r3[5] *= s; r3[6] *= s; r3[7] *= s;
@@ -490,26 +490,26 @@ static GLboolean invert_matrix_3d_general( GLmatrix *mat )
*/
pos = neg = 0.0;
t = MAT(in,0,0) * MAT(in,1,1) * MAT(in,2,2);
- if (t >= 0.0) pos += t; else neg += t;
+ if (t >= 0.0F) pos += t; else neg += t;
t = MAT(in,1,0) * MAT(in,2,1) * MAT(in,0,2);
- if (t >= 0.0) pos += t; else neg += t;
+ if (t >= 0.0F) pos += t; else neg += t;
t = MAT(in,2,0) * MAT(in,0,1) * MAT(in,1,2);
- if (t >= 0.0) pos += t; else neg += t;
+ if (t >= 0.0F) pos += t; else neg += t;
t = -MAT(in,2,0) * MAT(in,1,1) * MAT(in,0,2);
- if (t >= 0.0) pos += t; else neg += t;
+ if (t >= 0.0F) pos += t; else neg += t;
t = -MAT(in,1,0) * MAT(in,0,1) * MAT(in,2,2);
- if (t >= 0.0) pos += t; else neg += t;
+ if (t >= 0.0F) pos += t; else neg += t;
t = -MAT(in,0,0) * MAT(in,2,1) * MAT(in,1,2);
- if (t >= 0.0) pos += t; else neg += t;
+ if (t >= 0.0F) pos += t; else neg += t;
det = pos + neg;
- if (fabsf(det) < 1e-25)
+ if (fabsf(det) < 1e-25F)
return GL_FALSE;
det = 1.0F / det;
@@ -564,7 +564,7 @@ static GLboolean invert_matrix_3d( GLmatrix *mat )
MAT(in,0,1) * MAT(in,0,1) +
MAT(in,0,2) * MAT(in,0,2));
- if (scale == 0.0)
+ if (scale == 0.0F)
return GL_FALSE;
scale = 1.0F / scale;
@@ -799,8 +799,8 @@ _math_matrix_rotate( GLmatrix *mat,
GLfloat m[16];
GLboolean optimized;
- s = (GLfloat) sin( angle * M_PI / 180.0 );
- c = (GLfloat) cos( angle * M_PI / 180.0 );
+ s = sinf( angle * M_PI / 180.0 );
+ c = cosf( angle * M_PI / 180.0 );
memcpy(m, Identity, sizeof(GLfloat)*16);
optimized = GL_FALSE;
@@ -859,7 +859,7 @@ _math_matrix_rotate( GLmatrix *mat,
if (!optimized) {
const GLfloat mag = sqrtf(x * x + y * y + z * z);
- if (mag <= 1.0e-4) {
+ if (mag <= 1.0e-4F) {
/* no rotation, leave mat as-is */
return;
}
@@ -1070,7 +1070,7 @@ _math_matrix_scale( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z )
m[2] *= x; m[6] *= y; m[10] *= z;
m[3] *= x; m[7] *= y; m[11] *= z;
- if (fabsf(x - y) < 1e-8 && fabsf(x - z) < 1e-8)
+ if (fabsf(x - y) < 1e-8F && fabsf(x - z) < 1e-8F)
mat->flags |= MAT_FLAG_UNIFORM_SCALE;
else
mat->flags |= MAT_FLAG_GENERAL_SCALE;
@@ -1111,8 +1111,8 @@ _math_matrix_translate( GLmatrix *mat, GLfloat x, GLfloat y, GLfloat z )
* Transforms Normalized Device Coords to window/Z values.
*/
void
-_math_matrix_viewport(GLmatrix *m, const double scale[3],
- const double translate[3], double depthMax)
+_math_matrix_viewport(GLmatrix *m, const float scale[3],
+ const float translate[3], double depthMax)
{
m->m[MAT_SX] = scale[0];
m->m[MAT_TX] = translate[0];
@@ -1206,7 +1206,7 @@ static void analyse_from_scratch( GLmatrix *mat )
GLuint i;
for (i = 0 ; i < 16 ; i++) {
- if (m[i] == 0.0) mask |= (1<<i);
+ if (m[i] == 0.0F) mask |= (1<<i);
}
if (m[0] == 1.0F) mask |= (1<<16);
@@ -1240,12 +1240,12 @@ static void analyse_from_scratch( GLmatrix *mat )
mat->type = MATRIX_2D;
/* Check for scale */
- if (SQ(mm-1) > SQ(1e-6) ||
- SQ(m4m4-1) > SQ(1e-6))
+ if (SQ(mm-1) > SQ(1e-6F) ||
+ SQ(m4m4-1) > SQ(1e-6F))
mat->flags |= MAT_FLAG_GENERAL_SCALE;
/* Check for rotation */
- if (SQ(mm4) > SQ(1e-6))
+ if (SQ(mm4) > SQ(1e-6F))
mat->flags |= MAT_FLAG_GENERAL_3D;
else
mat->flags |= MAT_FLAG_ROTATION;
@@ -1255,9 +1255,9 @@ static void analyse_from_scratch( GLmatrix *mat )
mat->type = MATRIX_3D_NO_ROT;
/* Check for scale */
- if (SQ(m[0]-m[5]) < SQ(1e-6) &&
- SQ(m[0]-m[10]) < SQ(1e-6)) {
- if (SQ(m[0]-1.0) > SQ(1e-6)) {
+ if (SQ(m[0]-m[5]) < SQ(1e-6F) &&
+ SQ(m[0]-m[10]) < SQ(1e-6F)) {
+ if (SQ(m[0]-1.0F) > SQ(1e-6F)) {
mat->flags |= MAT_FLAG_UNIFORM_SCALE;
}
}
@@ -1275,8 +1275,8 @@ static void analyse_from_scratch( GLmatrix *mat )
mat->type = MATRIX_3D;
/* Check for scale */
- if (SQ(c1-c2) < SQ(1e-6) && SQ(c1-c3) < SQ(1e-6)) {
- if (SQ(c1-1.0) > SQ(1e-6))
+ if (SQ(c1-c2) < SQ(1e-6F) && SQ(c1-c3) < SQ(1e-6F)) {
+ if (SQ(c1-1.0F) > SQ(1e-6F))
mat->flags |= MAT_FLAG_UNIFORM_SCALE;
/* else no scale at all */
}
@@ -1285,10 +1285,10 @@ static void analyse_from_scratch( GLmatrix *mat )
}
/* Check for rotation */
- if (SQ(d1) < SQ(1e-6)) {
+ if (SQ(d1) < SQ(1e-6F)) {
CROSS3( cp, m, m+4 );
SUB_3V( cp, cp, (m+8) );
- if (LEN_SQUARED_3FV(cp) < SQ(1e-6))
+ if (LEN_SQUARED_3FV(cp) < SQ(1e-6F))
mat->flags |= MAT_FLAG_ROTATION;
else
mat->flags |= MAT_FLAG_GENERAL_3D;
diff --git a/src/mesa/math/m_matrix.h b/src/mesa/math/m_matrix.h
index 778d716dce7..c34d9e3022f 100644
--- a/src/mesa/math/m_matrix.h
+++ b/src/mesa/math/m_matrix.h
@@ -122,8 +122,8 @@ _math_matrix_frustum( GLmatrix *mat,
GLfloat nearval, GLfloat farval );
extern void
-_math_matrix_viewport( GLmatrix *m, const double scale[3],
- const double translate[3], double depthMax );
+_math_matrix_viewport( GLmatrix *m, const float scale[3],
+ const float translate[3], double depthMax );
extern void
_math_matrix_set_identity( GLmatrix *dest );
diff --git a/src/mesa/math/m_norm_tmp.h b/src/mesa/math/m_norm_tmp.h
index d3ec1c22ecd..6f1db8d0bd0 100644
--- a/src/mesa/math/m_norm_tmp.h
+++ b/src/mesa/math/m_norm_tmp.h
@@ -80,7 +80,7 @@ TAG(transform_normalize_normals)( const GLmatrix *mat,
}
}
else {
- if (scale != 1.0) {
+ if (scale != 1.0f) {
m0 *= scale, m4 *= scale, m8 *= scale;
m1 *= scale, m5 *= scale, m9 *= scale;
m2 *= scale, m6 *= scale, m10 *= scale;
diff --git a/src/mesa/math/m_vector.h b/src/mesa/math/m_vector.h
index 8551ee7520e..5bd76b8987d 100644
--- a/src/mesa/math/m_vector.h
+++ b/src/mesa/math/m_vector.h
@@ -51,7 +51,7 @@
/**
* Wrap all the information about vectors up in a struct. Has
- * additional fields compared to the other vectors to help us track of
+ * additional fields compared to the other vectors to help us track
* different vertex sizes, and whether we need to clean columns out
* because they contain non-(0,0,0,1) values.
*
@@ -61,7 +61,7 @@
*/
typedef struct {
GLfloat (*data)[4]; /**< may be malloc'd or point to client data */
- GLfloat *start; /**< points somewhere inside of <data> */
+ GLfloat *start; /**< points somewhere inside of GLvector4f::data */
GLuint count; /**< size of the vector (in elements) */
GLuint stride; /**< stride from one element to the next (in bytes) */
GLuint size; /**< 2-4 for vertices and 1-4 for texcoords */
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 3bffe90ff1f..b8b082e2a59 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -534,6 +534,7 @@ type_size(const struct glsl_type *type)
return size;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_SUBROUTINE:
/* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
@@ -1343,6 +1344,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_dFdx_fine:
case ir_unop_dFdy_coarse:
case ir_unop_dFdy_fine:
+ case ir_unop_subroutine_to_int:
assert(!"not supported");
break;
@@ -2385,7 +2387,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
ir_variable *var = node->as_variable();
if ((var == NULL) || (var->data.mode != ir_var_uniform)
- || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0))
+ || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0))
continue;
add.process(var);
@@ -2452,6 +2454,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
break;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_SUBROUTINE:
format = uniform_native;
columns = 1;
break;
@@ -2912,7 +2915,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
|| options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
progress =
- lower_variable_index_to_cond_assign(ir,
+ lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
options->EmitNoIndirectInput,
options->EmitNoIndirectOutput,
options->EmitNoIndirectTemp,
@@ -2977,6 +2980,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
if (prog->LinkStatus) {
if (!ctx->Driver.LinkShader(ctx, prog)) {
prog->LinkStatus = GL_FALSE;
+ } else {
+ build_program_resource_list(ctx, prog);
}
}
diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index 46260b54882..2c52d0db508 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -623,7 +623,7 @@ _mesa_execute_program(struct gl_context * ctx,
GLfloat a[4], result[4];
fetch_vector1(&inst->SrcReg[0], machine, a);
result[0] = result[1] = result[2] = result[3]
- = (GLfloat) cos(a[0]);
+ = cosf(a[0]);
store_vector4(inst, machine, result);
}
break;
@@ -723,7 +723,7 @@ _mesa_execute_program(struct gl_context * ctx,
* result.z = result.x * APPX(result.y)
* We do what the ARB extension says.
*/
- q[2] = (GLfloat) pow(2.0, t[0]);
+ q[2] = exp2f(t[0]);
}
q[1] = t[0] - floor_t0;
q[3] = 1.0F;
@@ -734,7 +734,7 @@ _mesa_execute_program(struct gl_context * ctx,
{
GLfloat a[4], result[4], val;
fetch_vector1(&inst->SrcReg[0], machine, a);
- val = (GLfloat) pow(2.0, a[0]);
+ val = exp2f(a[0]);
/*
if (IS_INF_OR_NAN(val))
val = 1.0e10;
@@ -776,7 +776,7 @@ _mesa_execute_program(struct gl_context * ctx,
if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
GLfloat a[4];
fetch_vector1(&inst->SrcReg[0], machine, a);
- cond = (a[0] != 0.0);
+ cond = (a[0] != 0.0F);
}
else {
cond = eval_condition(machine, inst);
@@ -834,7 +834,7 @@ _mesa_execute_program(struct gl_context * ctx,
val = -FLT_MAX;
}
else {
- val = (float)(log(a[0]) * 1.442695F);
+ val = logf(a[0]) * 1.442695F;
}
result[0] = result[1] = result[2] = result[3] = val;
store_vector4(inst, machine, result);
@@ -853,10 +853,10 @@ _mesa_execute_program(struct gl_context * ctx,
result[1] = a[0];
/* XXX we could probably just use pow() here */
if (a[0] > 0.0F) {
- if (a[1] == 0.0 && a[3] == 0.0)
+ if (a[1] == 0.0F && a[3] == 0.0F)
result[2] = 1.0F;
else
- result[2] = (GLfloat) pow(a[1], a[3]);
+ result[2] = powf(a[1], a[3]);
}
else {
result[2] = 0.0F;
@@ -886,12 +886,12 @@ _mesa_execute_program(struct gl_context * ctx,
int exponent;
GLfloat mantissa = frexpf(t[0], &exponent);
q[0] = (GLfloat) (exponent - 1);
- q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
+ q[1] = 2.0F * mantissa; /* map [.5, 1) -> [1, 2) */
/* The fast LOG2 macro doesn't meet the precision
* requirements.
*/
- q[2] = (float)(log(t[0]) * 1.442695F);
+ q[2] = logf(t[0]) * 1.442695F;
}
}
else {
@@ -1051,7 +1051,7 @@ _mesa_execute_program(struct gl_context * ctx,
fetch_vector1(&inst->SrcReg[0], machine, a);
fetch_vector1(&inst->SrcReg[1], machine, b);
result[0] = result[1] = result[2] = result[3]
- = (GLfloat) pow(a[0], b[0]);
+ = powf(a[0], b[0]);
store_vector4(inst, machine, result);
}
break;
@@ -1095,10 +1095,10 @@ _mesa_execute_program(struct gl_context * ctx,
{
GLfloat a[4], result[4];
fetch_vector1(&inst->SrcReg[0], machine, a);
- result[0] = (GLfloat) cos(a[0]);
- result[1] = (GLfloat) sin(a[0]);
- result[2] = 0.0; /* undefined! */
- result[3] = 0.0; /* undefined! */
+ result[0] = cosf(a[0]);
+ result[1] = sinf(a[0]);
+ result[2] = 0.0F; /* undefined! */
+ result[3] = 0.0F; /* undefined! */
store_vector4(inst, machine, result);
}
break;
@@ -1161,7 +1161,7 @@ _mesa_execute_program(struct gl_context * ctx,
GLfloat a[4], result[4];
fetch_vector1(&inst->SrcReg[0], machine, a);
result[0] = result[1] = result[2] = result[3]
- = (GLfloat) sin(a[0]);
+ = sinf(a[0]);
store_vector4(inst, machine, result);
}
break;
@@ -1360,7 +1360,7 @@ _mesa_execute_program(struct gl_context * ctx,
* zero, we'd probably be fine except for an assert in
* IROUND_POS() which gets triggered by the inf values created.
*/
- if (texcoord[3] != 0.0) {
+ if (texcoord[3] != 0.0F) {
texcoord[0] /= texcoord[3];
texcoord[1] /= texcoord[3];
texcoord[2] /= texcoord[3];
@@ -1380,7 +1380,7 @@ _mesa_execute_program(struct gl_context * ctx,
fetch_vector4(&inst->SrcReg[0], machine, texcoord);
if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
- texcoord[3] != 0.0) {
+ texcoord[3] != 0.0F) {
texcoord[0] /= texcoord[3];
texcoord[1] /= texcoord[3];
texcoord[2] /= texcoord[3];
diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c
index 3811c0d8aa6..e2518e660e6 100644
--- a/src/mesa/program/prog_opt_constant_fold.c
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -38,6 +38,8 @@ src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
for (i = 0; i < num_srcs; i++) {
if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
return false;
+ if (inst->SrcReg[i].RelAddr)
+ return false;
}
return true;
diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c
index e4faa63c06f..bb7c2c6e527 100644
--- a/src/mesa/program/prog_print.c
+++ b/src/mesa/program/prog_print.c
@@ -147,6 +147,8 @@ arb_input_attrib_string(GLuint index, GLenum progType)
"fragment.(twenty-one)", /* VARYING_SLOT_VIEWPORT */
"fragment.(twenty-two)", /* VARYING_SLOT_FACE */
"fragment.(twenty-three)", /* VARYING_SLOT_PNTC */
+ "fragment.(twenty-four)", /* VARYING_SLOT_TESS_LEVEL_OUTER */
+ "fragment.(twenty-five)", /* VARYING_SLOT_TESS_LEVEL_INNER */
"fragment.varying[0]",
"fragment.varying[1]",
"fragment.varying[2]",
@@ -272,6 +274,8 @@ arb_output_attrib_string(GLuint index, GLenum progType)
"result.(twenty-one)", /* VARYING_SLOT_VIEWPORT */
"result.(twenty-two)", /* VARYING_SLOT_FACE */
"result.(twenty-three)", /* VARYING_SLOT_PNTC */
+ "result.(twenty-four)", /* VARYING_SLOT_TESS_LEVEL_OUTER */
+ "result.(twenty-five)", /* VARYING_SLOT_TESS_LEVEL_INNER */
"result.varying[0]",
"result.varying[1]",
"result.varying[2]",
@@ -1015,6 +1019,12 @@ _mesa_write_shader_to_file(const struct gl_shader *shader)
case MESA_SHADER_FRAGMENT:
type = "frag";
break;
+ case MESA_SHADER_TESS_CTRL:
+ type = "tesc";
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ type = "tese";
+ break;
case MESA_SHADER_VERTEX:
type = "vert";
break;
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index c13e61b1630..2d03bba3d12 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -286,6 +286,38 @@ _mesa_init_compute_program(struct gl_context *ctx,
/**
+ * Initialize a new tessellation control program object.
+ */
+struct gl_program *
+_mesa_init_tess_ctrl_program(struct gl_context *ctx,
+ struct gl_tess_ctrl_program *prog,
+ GLenum target, GLuint id)
+{
+ if (prog) {
+ init_program_struct(&prog->Base, target, id);
+ return &prog->Base;
+ }
+ return NULL;
+}
+
+
+/**
+ * Initialize a new tessellation evaluation program object.
+ */
+struct gl_program *
+_mesa_init_tess_eval_program(struct gl_context *ctx,
+ struct gl_tess_eval_program *prog,
+ GLenum target, GLuint id)
+{
+ if (prog) {
+ init_program_struct(&prog->Base, target, id);
+ return &prog->Base;
+ }
+ return NULL;
+}
+
+
+/**
* Initialize a new geometry program object.
*/
struct gl_program *
@@ -333,6 +365,16 @@ _mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id)
CALLOC_STRUCT(gl_geometry_program),
target, id);
break;
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ prog = _mesa_init_tess_ctrl_program(ctx,
+ CALLOC_STRUCT(gl_tess_ctrl_program),
+ target, id);
+ break;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ prog = _mesa_init_tess_eval_program(ctx,
+ CALLOC_STRUCT(gl_tess_eval_program),
+ target, id);
+ break;
case GL_COMPUTE_PROGRAM_NV:
prog = _mesa_init_compute_program(ctx,
CALLOC_STRUCT(gl_compute_program),
@@ -554,6 +596,23 @@ _mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog)
gpc->UsesStreams = gp->UsesStreams;
}
break;
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ {
+ const struct gl_tess_ctrl_program *tcp = gl_tess_ctrl_program_const(prog);
+ struct gl_tess_ctrl_program *tcpc = gl_tess_ctrl_program(clone);
+ tcpc->VerticesOut = tcp->VerticesOut;
+ }
+ break;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ {
+ const struct gl_tess_eval_program *tep = gl_tess_eval_program_const(prog);
+ struct gl_tess_eval_program *tepc = gl_tess_eval_program(clone);
+ tepc->PrimitiveMode = tep->PrimitiveMode;
+ tepc->Spacing = tep->Spacing;
+ tepc->VertexOrder = tep->VertexOrder;
+ tepc->PointMode = tep->PointMode;
+ }
+ break;
default:
_mesa_problem(NULL, "Unexpected target in _mesa_clone_program");
}
diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h
index 2d92ab2f118..a894147cafd 100644
--- a/src/mesa/program/program.h
+++ b/src/mesa/program/program.h
@@ -79,6 +79,16 @@ _mesa_init_fragment_program(struct gl_context *ctx,
GLenum target, GLuint id);
extern struct gl_program *
+_mesa_init_tess_ctrl_program(struct gl_context *ctx,
+ struct gl_tess_ctrl_program *prog,
+ GLenum target, GLuint id);
+
+extern struct gl_program *
+_mesa_init_tess_eval_program(struct gl_context *ctx,
+ struct gl_tess_eval_program *prog,
+ GLenum target, GLuint id);
+
+extern struct gl_program *
_mesa_init_geometry_program(struct gl_context *ctx,
struct gl_geometry_program *prog,
GLenum target, GLuint id);
@@ -147,6 +157,25 @@ _mesa_reference_compprog(struct gl_context *ctx,
(struct gl_program *) prog);
}
+
+static inline void
+_mesa_reference_tesscprog(struct gl_context *ctx,
+ struct gl_tess_ctrl_program **ptr,
+ struct gl_tess_ctrl_program *prog)
+{
+ _mesa_reference_program(ctx, (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
+static inline void
+_mesa_reference_tesseprog(struct gl_context *ctx,
+ struct gl_tess_eval_program **ptr,
+ struct gl_tess_eval_program *prog)
+{
+ _mesa_reference_program(ctx, (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
extern struct gl_program *
_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog);
@@ -157,6 +186,20 @@ _mesa_clone_vertex_program(struct gl_context *ctx,
return (struct gl_vertex_program *) _mesa_clone_program(ctx, &prog->Base);
}
+static inline struct gl_tess_ctrl_program *
+_mesa_clone_tess_ctrl_program(struct gl_context *ctx,
+ const struct gl_tess_ctrl_program *prog)
+{
+ return (struct gl_tess_ctrl_program *) _mesa_clone_program(ctx, &prog->Base);
+}
+
+static inline struct gl_tess_eval_program *
+_mesa_clone_tess_eval_program(struct gl_context *ctx,
+ const struct gl_tess_eval_program *prog)
+{
+ return (struct gl_tess_eval_program *) _mesa_clone_program(ctx, &prog->Base);
+}
+
static inline struct gl_geometry_program *
_mesa_clone_geometry_program(struct gl_context *ctx,
const struct gl_geometry_program *prog)
@@ -216,6 +259,10 @@ _mesa_program_enum_to_shader_stage(GLenum v)
return MESA_SHADER_FRAGMENT;
case GL_GEOMETRY_PROGRAM_NV:
return MESA_SHADER_GEOMETRY;
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ return MESA_SHADER_TESS_CTRL;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ return MESA_SHADER_TESS_EVAL;
case GL_COMPUTE_PROGRAM_NV:
return MESA_SHADER_COMPUTE;
default:
@@ -235,6 +282,10 @@ _mesa_shader_stage_to_program(unsigned stage)
return GL_FRAGMENT_PROGRAM_ARB;
case MESA_SHADER_GEOMETRY:
return GL_GEOMETRY_PROGRAM_NV;
+ case MESA_SHADER_TESS_CTRL:
+ return GL_TESS_CONTROL_PROGRAM_NV;
+ case MESA_SHADER_TESS_EVAL:
+ return GL_TESS_EVALUATION_PROGRAM_NV;
case MESA_SHADER_COMPUTE:
return GL_COMPUTE_PROGRAM_NV;
}
@@ -244,7 +295,9 @@ _mesa_shader_stage_to_program(unsigned stage)
}
-/* Cast wrappers from gl_program to gl_vertex/geometry/fragment_program */
+/* Cast wrappers from gl_program to derived program types.
+ * (e.g. gl_vertex_program)
+ */
static inline struct gl_fragment_program *
gl_fragment_program(struct gl_program *prog)
@@ -297,6 +350,31 @@ gl_compute_program_const(const struct gl_program *prog)
return (const struct gl_compute_program *) prog;
}
+static inline struct gl_tess_ctrl_program *
+gl_tess_ctrl_program(struct gl_program *prog)
+{
+ return (struct gl_tess_ctrl_program *) prog;
+}
+
+static inline const struct gl_tess_ctrl_program *
+gl_tess_ctrl_program_const(const struct gl_program *prog)
+{
+ return (const struct gl_tess_ctrl_program *) prog;
+}
+
+
+static inline struct gl_tess_eval_program *
+gl_tess_eval_program(struct gl_program *prog)
+{
+ return (struct gl_tess_eval_program *) prog;
+}
+
+static inline const struct gl_tess_eval_program *
+gl_tess_eval_program_const(const struct gl_program *prog)
+{
+ return (const struct gl_tess_eval_program *) prog;
+}
+
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/mesa/program/program_parse_extra.c b/src/mesa/program/program_parse_extra.c
index 32b54afc57b..71f86d13ace 100644
--- a/src/mesa/program/program_parse_extra.c
+++ b/src/mesa/program/program_parse_extra.c
@@ -163,6 +163,8 @@ _mesa_ARBvp_parse_option(struct asm_parser_state *state, const char *option)
int
_mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option)
{
+ unsigned fog_option;
+
/* All of the options currently supported start with "ARB_". The code is
* currently structured with nested if-statements because eventually options
* that start with "NV_" will be supported. This structure will result in
@@ -177,20 +179,42 @@ _mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option)
if (strncmp(option, "fog_", 4) == 0) {
option += 4;
- if (state->option.Fog == OPTION_NONE) {
- if (strcmp(option, "exp") == 0) {
- state->option.Fog = OPTION_FOG_EXP;
- return 1;
- } else if (strcmp(option, "exp2") == 0) {
- state->option.Fog = OPTION_FOG_EXP2;
- return 1;
- } else if (strcmp(option, "linear") == 0) {
- state->option.Fog = OPTION_FOG_LINEAR;
- return 1;
- }
- }
+ if (strcmp(option, "exp") == 0) {
+ fog_option = OPTION_FOG_EXP;
+ } else if (strcmp(option, "exp2") == 0) {
+ fog_option = OPTION_FOG_EXP2;
+ } else if (strcmp(option, "linear") == 0) {
+ fog_option = OPTION_FOG_LINEAR;
+ } else {
+ /* invalid option */
+ return 0;
+ }
- return 0;
+ if (state->option.Fog == OPTION_NONE) {
+ state->option.Fog = fog_option;
+ return 1;
+ }
+
+ /* The ARB_fragment_program specification instructs us to handle
+ * redundant options in two seemingly contradictory ways:
+ *
+ * Section 3.11.4.5.1 says:
+ * "Only one fog application option may be specified by any given
+ * fragment program. A fragment program that specifies more than one
+ * of the program options "ARB_fog_exp", "ARB_fog_exp2", and
+ * "ARB_fog_linear", will fail to load."
+ *
+ * Issue 27 says:
+ * "The three mandatory options are ARB_fog_exp, ARB_fog_exp2, and
+ * ARB_fog_linear. As these options are mutually exclusive by
+ * nature, specifying more than one is not useful. If more than one
+ * is specified, the last one encountered in the <optionSequence>
+ * will be the one to actually modify the execution environment."
+ *
+ * We choose to allow programs to specify the same OPTION redundantly,
+ * but fail to load programs that specify contradictory options.
+ */
+ return state->option.Fog == fog_option ? 1 : 0;
} else if (strncmp(option, "precision_hint_", 15) == 0) {
option += 15;
diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c
index 428f2d9d7d7..43dbadd4a7e 100644
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@@ -46,9 +46,10 @@ static const struct st_tracked_state *atoms[] =
&st_update_depth_stencil_alpha,
&st_update_clip,
- &st_finalize_textures,
&st_update_fp,
&st_update_gp,
+ &st_update_tep,
+ &st_update_tcp,
&st_update_vp,
&st_update_rasterizer,
@@ -59,17 +60,24 @@ static const struct st_tracked_state *atoms[] =
&st_update_vertex_texture,
&st_update_fragment_texture,
&st_update_geometry_texture,
+ &st_update_tessctrl_texture,
+ &st_update_tesseval_texture,
&st_update_sampler, /* depends on update_*_texture for swizzle */
&st_update_framebuffer,
&st_update_msaa,
&st_update_sample_shading,
&st_update_vs_constants,
+ &st_update_tcs_constants,
+ &st_update_tes_constants,
&st_update_gs_constants,
&st_update_fs_constants,
&st_bind_vs_ubos,
+ &st_bind_tcs_ubos,
+ &st_bind_tes_ubos,
&st_bind_fs_ubos,
&st_bind_gs_ubos,
&st_update_pixel_transfer,
+ &st_update_tess,
/* this must be done after the vertex program update */
&st_update_array
diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h
index c50111d501f..a24842baa4f 100644
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@@ -52,6 +52,8 @@ extern const struct st_tracked_state st_update_clip;
extern const struct st_tracked_state st_update_depth_stencil_alpha;
extern const struct st_tracked_state st_update_fp;
extern const struct st_tracked_state st_update_gp;
+extern const struct st_tracked_state st_update_tep;
+extern const struct st_tracked_state st_update_tcp;
extern const struct st_tracked_state st_update_vp;
extern const struct st_tracked_state st_update_rasterizer;
extern const struct st_tracked_state st_update_polygon_stipple;
@@ -64,14 +66,20 @@ extern const struct st_tracked_state st_update_sampler;
extern const struct st_tracked_state st_update_fragment_texture;
extern const struct st_tracked_state st_update_vertex_texture;
extern const struct st_tracked_state st_update_geometry_texture;
-extern const struct st_tracked_state st_finalize_textures;
+extern const struct st_tracked_state st_update_tessctrl_texture;
+extern const struct st_tracked_state st_update_tesseval_texture;
extern const struct st_tracked_state st_update_fs_constants;
extern const struct st_tracked_state st_update_gs_constants;
+extern const struct st_tracked_state st_update_tes_constants;
+extern const struct st_tracked_state st_update_tcs_constants;
extern const struct st_tracked_state st_update_vs_constants;
extern const struct st_tracked_state st_bind_fs_ubos;
extern const struct st_tracked_state st_bind_vs_ubos;
extern const struct st_tracked_state st_bind_gs_ubos;
+extern const struct st_tracked_state st_bind_tcs_ubos;
+extern const struct st_tracked_state st_bind_tes_ubos;
extern const struct st_tracked_state st_update_pixel_transfer;
+extern const struct st_tracked_state st_update_tess;
GLuint st_compare_func_to_pipe(GLenum func);
diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c
index f82c1332afc..506a770499f 100644
--- a/src/mesa/state_tracker/st_atom_clip.c
+++ b/src/mesa/state_tracker/st_atom_clip.c
@@ -59,8 +59,11 @@ static void update_clip( struct st_context *st )
memcpy(clip.ucp,
use_eye ? ctx->Transform.EyeUserPlane
: ctx->Transform._ClipUserPlane, sizeof(clip.ucp));
- st->state.clip = clip;
- cso_set_clip(st->cso_context, &clip);
+
+ if (memcmp(&st->state.clip, &clip, sizeof(clip)) != 0) {
+ st->state.clip = clip;
+ st->pipe->set_clip_state(st->pipe, &clip);
+ }
}
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index a54e0d9dbf5..6affb4d84d5 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -59,7 +59,9 @@ void st_upload_constants( struct st_context *st,
{
assert(shader_type == PIPE_SHADER_VERTEX ||
shader_type == PIPE_SHADER_FRAGMENT ||
- shader_type == PIPE_SHADER_GEOMETRY);
+ shader_type == PIPE_SHADER_GEOMETRY ||
+ shader_type == PIPE_SHADER_TESS_CTRL ||
+ shader_type == PIPE_SHADER_TESS_EVAL);
/* update constants */
if (params && params->NumParameters) {
@@ -178,6 +180,50 @@ const struct st_tracked_state st_update_gs_constants = {
update_gs_constants /* update */
};
+/* Tessellation control shader:
+ */
+static void update_tcs_constants(struct st_context *st )
+{
+ struct st_tessctrl_program *tcp = st->tcp;
+ struct gl_program_parameter_list *params;
+
+ if (tcp) {
+ params = tcp->Base.Base.Parameters;
+ st_upload_constants( st, params, PIPE_SHADER_TESS_CTRL );
+ }
+}
+
+const struct st_tracked_state st_update_tcs_constants = {
+ "st_update_tcs_constants", /* name */
+ { /* dirty */
+ _NEW_PROGRAM_CONSTANTS, /* mesa */
+ ST_NEW_TESSCTRL_PROGRAM, /* st */
+ },
+ update_tcs_constants /* update */
+};
+
+/* Tessellation evaluation shader:
+ */
+static void update_tes_constants(struct st_context *st )
+{
+ struct st_tesseval_program *tep = st->tep;
+ struct gl_program_parameter_list *params;
+
+ if (tep) {
+ params = tep->Base.Base.Parameters;
+ st_upload_constants( st, params, PIPE_SHADER_TESS_EVAL );
+ }
+}
+
+const struct st_tracked_state st_update_tes_constants = {
+ "st_update_tes_constants", /* name */
+ { /* dirty */
+ _NEW_PROGRAM_CONSTANTS, /* mesa */
+ ST_NEW_TESSEVAL_PROGRAM, /* st */
+ },
+ update_tes_constants /* update */
+};
+
static void st_bind_ubos(struct st_context *st,
struct gl_shader *shader,
unsigned shader_type)
@@ -275,3 +321,43 @@ const struct st_tracked_state st_bind_gs_ubos = {
},
bind_gs_ubos
};
+
+static void bind_tcs_ubos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ if (!prog)
+ return;
+
+ st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_ubos = {
+ "st_bind_tcs_ubos",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_UNIFORM_BUFFER,
+ },
+ bind_tcs_ubos
+};
+
+static void bind_tes_ubos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (!prog)
+ return;
+
+ st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_ubos = {
+ "st_bind_tes_ubos",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_UNIFORM_BUFFER,
+ },
+ bind_tes_ubos
+};
diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c
index c4bca8d09b5..d9cc97029fb 100644
--- a/src/mesa/state_tracker/st_atom_depth.c
+++ b/src/mesa/state_tracker/st_atom_depth.c
@@ -105,10 +105,17 @@ update_depth_stencil_alpha(struct st_context *st)
memset(dsa, 0, sizeof(*dsa));
memset(&sr, 0, sizeof(sr));
- if (ctx->Depth.Test && ctx->DrawBuffer->Visual.depthBits > 0) {
- dsa->depth.enabled = 1;
- dsa->depth.writemask = ctx->Depth.Mask;
- dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func);
+ if (ctx->DrawBuffer->Visual.depthBits > 0) {
+ if (ctx->Depth.Test) {
+ dsa->depth.enabled = 1;
+ dsa->depth.writemask = ctx->Depth.Mask;
+ dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func);
+ }
+ if (ctx->Depth.BoundsTest) {
+ dsa->depth.bounds_test = 1;
+ dsa->depth.bounds_min = ctx->Depth.BoundsMin;
+ dsa->depth.bounds_max = ctx->Depth.BoundsMax;
+ }
}
if (ctx->Stencil.Enabled && ctx->DrawBuffer->Visual.stencilBits > 0) {
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index b68eb16d7be..4252c27962e 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -245,6 +245,7 @@ update_shader_samplers(struct st_context *st,
GLuint unit;
GLbitfield samplers_used;
const GLuint old_max = *num_samplers;
+ const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];
samplers_used = prog->SamplersUsed;
@@ -261,13 +262,11 @@ update_shader_samplers(struct st_context *st,
const GLuint texUnit = prog->SamplerUnits[unit];
convert_sampler(st, sampler, texUnit);
-
+ states[unit] = sampler;
*num_samplers = unit + 1;
-
- cso_single_sampler(st->cso_context, shader_stage, unit, sampler);
}
else if (samplers_used != 0 || unit < old_max) {
- cso_single_sampler(st->cso_context, shader_stage, unit, NULL);
+ states[unit] = NULL;
}
else {
/* if we've reset all the old samplers and we have no more new ones */
@@ -275,7 +274,7 @@ update_shader_samplers(struct st_context *st,
}
}
- cso_single_sampler_done(st->cso_context, shader_stage);
+ cso_set_samplers(st->cso_context, shader_stage, *num_samplers, states);
}
@@ -306,6 +305,22 @@ update_samplers(struct st_context *st)
st->state.samplers[PIPE_SHADER_GEOMETRY],
&st->state.num_samplers[PIPE_SHADER_GEOMETRY]);
}
+ if (ctx->TessCtrlProgram._Current) {
+ update_shader_samplers(st,
+ PIPE_SHADER_TESS_CTRL,
+ &ctx->TessCtrlProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits,
+ st->state.samplers[PIPE_SHADER_TESS_CTRL],
+ &st->state.num_samplers[PIPE_SHADER_TESS_CTRL]);
+ }
+ if (ctx->TessEvalProgram._Current) {
+ update_shader_samplers(st,
+ PIPE_SHADER_TESS_EVAL,
+ &ctx->TessEvalProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits,
+ st->state.samplers[PIPE_SHADER_TESS_EVAL],
+ &st->state.num_samplers[PIPE_SHADER_TESS_EVAL]);
+ }
}
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index ad8d2624fc9..fee15a980f3 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -50,24 +50,6 @@
/**
- * Return pointer to a pass-through fragment shader.
- * This shader is used when a texture is missing/incomplete.
- */
-static void *
-get_passthrough_fs(struct st_context *st)
-{
- if (!st->passthrough_fs) {
- st->passthrough_fs =
- util_make_fragment_passthrough_shader(st->pipe, TGSI_SEMANTIC_COLOR,
- TGSI_INTERPOLATE_PERSPECTIVE,
- TRUE);
- }
-
- return st->passthrough_fs;
-}
-
-
-/**
* Update fragment program state/atom. This involves translating the
* Mesa fragment program into a gallium fragment program and binding it.
*/
@@ -96,15 +78,8 @@ update_fp( struct st_context *st )
st_reference_fragprog(st, &st->fp, stfp);
- if (st->missing_textures) {
- /* use a pass-through frag shader that uses no textures */
- void *fs = get_passthrough_fs(st);
- cso_set_fragment_shader_handle(st->cso_context, fs);
- }
- else {
- cso_set_fragment_shader_handle(st->cso_context,
- st->fp_variant->driver_shader);
- }
+ cso_set_fragment_shader_handle(st->cso_context,
+ st->fp_variant->driver_shader);
}
@@ -210,3 +185,75 @@ const struct st_tracked_state st_update_gp = {
},
update_gp /* update */
};
+
+
+
+static void
+update_tcp( struct st_context *st )
+{
+ struct st_tessctrl_program *sttcp;
+ struct st_tcp_variant_key key;
+
+ if (!st->ctx->TessCtrlProgram._Current) {
+ cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+ return;
+ }
+
+ sttcp = st_tessctrl_program(st->ctx->TessCtrlProgram._Current);
+ assert(sttcp->Base.Base.Target == GL_TESS_CONTROL_PROGRAM_NV);
+
+ memset(&key, 0, sizeof(key));
+ key.st = st;
+
+ st->tcp_variant = st_get_tcp_variant(st, sttcp, &key);
+
+ st_reference_tesscprog(st, &st->tcp, sttcp);
+
+ cso_set_tessctrl_shader_handle(st->cso_context,
+ st->tcp_variant->driver_shader);
+}
+
+const struct st_tracked_state st_update_tcp = {
+ "st_update_tcp", /* name */
+ { /* dirty */
+ 0, /* mesa */
+ ST_NEW_TESSCTRL_PROGRAM /* st */
+ },
+ update_tcp /* update */
+};
+
+
+
+static void
+update_tep( struct st_context *st )
+{
+ struct st_tesseval_program *sttep;
+ struct st_tep_variant_key key;
+
+ if (!st->ctx->TessEvalProgram._Current) {
+ cso_set_tesseval_shader_handle(st->cso_context, NULL);
+ return;
+ }
+
+ sttep = st_tesseval_program(st->ctx->TessEvalProgram._Current);
+ assert(sttep->Base.Base.Target == GL_TESS_EVALUATION_PROGRAM_NV);
+
+ memset(&key, 0, sizeof(key));
+ key.st = st;
+
+ st->tep_variant = st_get_tep_variant(st, sttep, &key);
+
+ st_reference_tesseprog(st, &st->tep, sttep);
+
+ cso_set_tesseval_shader_handle(st->cso_context,
+ st->tep_variant->driver_shader);
+}
+
+const struct st_tracked_state st_update_tep = {
+ "st_update_tep", /* name */
+ { /* dirty */
+ 0, /* mesa */
+ ST_NEW_TESSEVAL_PROGRAM /* st */
+ },
+ update_tep /* update */
+};
diff --git a/src/mesa/state_tracker/st_atom_tess.c b/src/mesa/state_tracker/st_atom_tess.c
new file mode 100644
index 00000000000..8e6287a900c
--- /dev/null
+++ b/src/mesa/state_tracker/st_atom_tess.c
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Marek Olšák <[email protected]>
+ */
+
+
+#include "main/macros.h"
+#include "st_context.h"
+#include "pipe/p_context.h"
+#include "st_atom.h"
+
+
+static void
+update_tess(struct st_context *st)
+{
+ const struct gl_context *ctx = st->ctx;
+ struct pipe_context *pipe = st->pipe;
+
+ if (!pipe->set_tess_state)
+ return;
+
+ pipe->set_tess_state(pipe,
+ ctx->TessCtrlProgram.patch_default_outer_level,
+ ctx->TessCtrlProgram.patch_default_inner_level);
+}
+
+
+const struct st_tracked_state st_update_tess = {
+ "update_tess", /* name */
+ { /* dirty */
+ 0, /* mesa */
+ ST_NEW_TESS_STATE, /* st */
+ },
+ update_tess /* update */
+};
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index 04ba86448fc..31e0f6ba06c 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -103,7 +103,8 @@ swizzle_swizzle(unsigned swizzle1, unsigned swizzle2)
*/
static unsigned
compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
- enum pipe_format actualFormat)
+ enum pipe_format actualFormat,
+ unsigned glsl_version)
{
switch (baseFormat) {
case GL_RGBA:
@@ -157,8 +158,26 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
case GL_INTENSITY:
return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
case GL_ALPHA:
- return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO,
- SWIZZLE_ZERO, SWIZZLE_X);
+ /* The texture(sampler*Shadow) functions from GLSL 1.30 ignore
+ * the depth mode and return float, while older shadow* functions
+ * and ARB_fp instructions return vec4 according to the depth mode.
+ *
+ * The problem with the GLSL 1.30 functions is that GL_ALPHA forces
+ * them to return 0, breaking them completely.
+ *
+ * A proper fix would increase code complexity and that's not worth
+ * it for a rarely used feature such as the GL_ALPHA depth mode
+ * in GL3. Therefore, change GL_ALPHA to GL_INTENSITY for all
+ * shaders that use GLSL 1.30 or later.
+ *
+ * BTW, it's required that sampler views are updated when
+ * shaders change (check_sampler_swizzle takes care of that).
+ */
+ if (glsl_version && glsl_version >= 130)
+ return SWIZZLE_XXXX;
+ else
+ return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO,
+ SWIZZLE_ZERO, SWIZZLE_X);
case GL_RED:
return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO,
SWIZZLE_ZERO, SWIZZLE_ONE);
@@ -174,7 +193,8 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
static unsigned
-get_texture_format_swizzle(const struct st_texture_object *stObj)
+get_texture_format_swizzle(const struct st_texture_object *stObj,
+ unsigned glsl_version)
{
GLenum baseFormat = _mesa_texture_base_format(&stObj->base);
unsigned tex_swizzle;
@@ -182,7 +202,8 @@ get_texture_format_swizzle(const struct st_texture_object *stObj)
if (baseFormat != GL_NONE) {
tex_swizzle = compute_texture_format_swizzle(baseFormat,
stObj->base.DepthMode,
- stObj->pt->format);
+ stObj->pt->format,
+ glsl_version);
}
else {
tex_swizzle = SWIZZLE_XYZW;
@@ -201,9 +222,9 @@ get_texture_format_swizzle(const struct st_texture_object *stObj)
*/
static boolean
check_sampler_swizzle(const struct st_texture_object *stObj,
- struct pipe_sampler_view *sv)
+ struct pipe_sampler_view *sv, unsigned glsl_version)
{
- unsigned swizzle = get_texture_format_swizzle(stObj);
+ unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) ||
(sv->swizzle_g != GET_SWZ(swizzle, 1)) ||
@@ -232,11 +253,11 @@ static unsigned last_layer(struct st_texture_object *stObj)
static struct pipe_sampler_view *
st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
struct st_texture_object *stObj,
- const struct gl_sampler_object *samp,
- enum pipe_format format)
+ enum pipe_format format,
+ unsigned glsl_version)
{
struct pipe_sampler_view templ;
- unsigned swizzle = get_texture_format_swizzle(stObj);
+ unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version);
u_sampler_view_default_template(&templ,
stObj->pt,
@@ -283,8 +304,8 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
static struct pipe_sampler_view *
st_get_texture_sampler_view_from_stobj(struct st_context *st,
struct st_texture_object *stObj,
- const struct gl_sampler_object *samp,
- enum pipe_format format)
+ enum pipe_format format,
+ unsigned glsl_version)
{
struct pipe_sampler_view **sv;
const struct st_texture_image *firstImage;
@@ -306,7 +327,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
/* if sampler view has changed dereference it */
if (*sv) {
- if (check_sampler_swizzle(stObj, *sv) ||
+ if (check_sampler_swizzle(stObj, *sv, glsl_version) ||
(format != (*sv)->format) ||
gl_target_to_pipe(stObj->base.Target) != (*sv)->target ||
stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level ||
@@ -318,7 +339,8 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
}
if (!*sv) {
- *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj, samp, format);
+ *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj,
+ format, glsl_version);
} else if ((*sv)->context != st->pipe) {
/* Recreate view in correct context, use existing view as template */
@@ -334,7 +356,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
static GLboolean
update_single_texture(struct st_context *st,
struct pipe_sampler_view **sampler_view,
- GLuint texUnit)
+ GLuint texUnit, unsigned glsl_version)
{
struct gl_context *ctx = st->ctx;
const struct gl_sampler_object *samp;
@@ -374,8 +396,9 @@ update_single_texture(struct st_context *st,
}
}
- *sampler_view = st_get_texture_sampler_view_from_stobj(st, stObj, samp,
- view_format);
+ *sampler_view =
+ st_get_texture_sampler_view_from_stobj(st, stObj, view_format,
+ glsl_version);
return GL_TRUE;
}
@@ -383,7 +406,7 @@ update_single_texture(struct st_context *st,
static void
update_textures(struct st_context *st,
- unsigned shader_stage,
+ gl_shader_stage mesa_shader,
const struct gl_program *prog,
unsigned max_units,
struct pipe_sampler_view **sampler_views,
@@ -392,6 +415,10 @@ update_textures(struct st_context *st,
const GLuint old_max = *num_textures;
GLbitfield samplers_used = prog->SamplersUsed;
GLuint unit;
+ struct gl_shader_program *shader =
+ st->ctx->_Shader->CurrentProgram[mesa_shader];
+ unsigned glsl_version = shader ? shader->Version : 0;
+ unsigned shader_stage = st_shader_stage_to_ptarget(mesa_shader);
if (samplers_used == 0x0 && old_max == 0)
return;
@@ -406,7 +433,8 @@ update_textures(struct st_context *st,
const GLuint texUnit = prog->SamplerUnits[unit];
GLboolean retval;
- retval = update_single_texture(st, &sampler_view, texUnit);
+ retval = update_single_texture(st, &sampler_view, texUnit,
+ glsl_version);
if (retval == GL_FALSE)
continue;
@@ -435,7 +463,7 @@ update_vertex_textures(struct st_context *st)
if (ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits > 0) {
update_textures(st,
- PIPE_SHADER_VERTEX,
+ MESA_SHADER_VERTEX,
&ctx->VertexProgram._Current->Base,
ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits,
st->state.sampler_views[PIPE_SHADER_VERTEX],
@@ -450,7 +478,7 @@ update_fragment_textures(struct st_context *st)
const struct gl_context *ctx = st->ctx;
update_textures(st,
- PIPE_SHADER_FRAGMENT,
+ MESA_SHADER_FRAGMENT,
&ctx->FragmentProgram._Current->Base,
ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits,
st->state.sampler_views[PIPE_SHADER_FRAGMENT],
@@ -465,7 +493,7 @@ update_geometry_textures(struct st_context *st)
if (ctx->GeometryProgram._Current) {
update_textures(st,
- PIPE_SHADER_GEOMETRY,
+ MESA_SHADER_GEOMETRY,
&ctx->GeometryProgram._Current->Base,
ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits,
st->state.sampler_views[PIPE_SHADER_GEOMETRY],
@@ -474,11 +502,43 @@ update_geometry_textures(struct st_context *st)
}
+static void
+update_tessctrl_textures(struct st_context *st)
+{
+ const struct gl_context *ctx = st->ctx;
+
+ if (ctx->TessCtrlProgram._Current) {
+ update_textures(st,
+ MESA_SHADER_TESS_CTRL,
+ &ctx->TessCtrlProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits,
+ st->state.sampler_views[PIPE_SHADER_TESS_CTRL],
+ &st->state.num_sampler_views[PIPE_SHADER_TESS_CTRL]);
+ }
+}
+
+
+static void
+update_tesseval_textures(struct st_context *st)
+{
+ const struct gl_context *ctx = st->ctx;
+
+ if (ctx->TessEvalProgram._Current) {
+ update_textures(st,
+ MESA_SHADER_TESS_EVAL,
+ &ctx->TessEvalProgram._Current->Base,
+ ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits,
+ st->state.sampler_views[PIPE_SHADER_TESS_EVAL],
+ &st->state.num_sampler_views[PIPE_SHADER_TESS_EVAL]);
+ }
+}
+
+
const struct st_tracked_state st_update_fragment_texture = {
"st_update_texture", /* name */
{ /* dirty */
_NEW_TEXTURE, /* mesa */
- ST_NEW_FRAGMENT_PROGRAM, /* st */
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
},
update_fragment_textures /* update */
};
@@ -488,7 +548,7 @@ const struct st_tracked_state st_update_vertex_texture = {
"st_update_vertex_texture", /* name */
{ /* dirty */
_NEW_TEXTURE, /* mesa */
- ST_NEW_VERTEX_PROGRAM, /* st */
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
},
update_vertex_textures /* update */
};
@@ -498,52 +558,27 @@ const struct st_tracked_state st_update_geometry_texture = {
"st_update_geometry_texture", /* name */
{ /* dirty */
_NEW_TEXTURE, /* mesa */
- ST_NEW_GEOMETRY_PROGRAM, /* st */
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
},
update_geometry_textures /* update */
};
-
-static void
-finalize_textures(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- struct gl_fragment_program *fprog = ctx->FragmentProgram._Current;
- const GLboolean prev_missing_textures = st->missing_textures;
- GLuint su;
-
- st->missing_textures = GL_FALSE;
-
- for (su = 0; su < ctx->Const.MaxTextureCoordUnits; su++) {
- if (fprog->Base.SamplersUsed & (1 << su)) {
- const GLuint texUnit = fprog->Base.SamplerUnits[su];
- struct gl_texture_object *texObj
- = ctx->Texture.Unit[texUnit]._Current;
-
- if (texObj) {
- GLboolean retval;
-
- retval = st_finalize_texture(ctx, st->pipe, texObj);
- if (!retval) {
- /* out of mem */
- st->missing_textures = GL_TRUE;
- continue;
- }
- }
- }
- }
-
- if (prev_missing_textures != st->missing_textures)
- st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
-}
+const struct st_tracked_state st_update_tessctrl_texture = {
+ "st_update_tessctrl_texture", /* name */
+ { /* dirty */
+ _NEW_TEXTURE, /* mesa */
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
+ },
+ update_tessctrl_textures /* update */
+};
-const struct st_tracked_state st_finalize_textures = {
- "st_finalize_textures", /* name */
- { /* dirty */
- _NEW_TEXTURE, /* mesa */
- 0, /* st */
+const struct st_tracked_state st_update_tesseval_texture = {
+ "st_update_tesseval_texture", /* name */
+ { /* dirty */
+ _NEW_TEXTURE, /* mesa */
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */
},
- finalize_textures /* update */
+ update_tesseval_textures /* update */
};
diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c
index 2f62590c4f1..9a692cecade 100644
--- a/src/mesa/state_tracker/st_atom_viewport.c
+++ b/src/mesa/state_tracker/st_atom_viewport.c
@@ -64,7 +64,7 @@ update_viewport( struct st_context *st )
*/
for (i = 0; i < ctx->Const.MaxViewports; i++)
{
- double scale[3], translate[3];
+ float scale[3], translate[3];
_mesa_get_viewport_xform(ctx, i, scale, translate);
st->state.viewport[i].scale[0] = scale[0];
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index c881e194f70..01a96c18264 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -446,8 +446,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
assert(height <= (GLsizei)maxSize);
cso_save_rasterizer(cso);
- cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
- cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+ cso_save_fragment_samplers(cso);
+ cso_save_fragment_sampler_views(cso);
cso_save_viewport(cso);
cso_save_fragment_shader(cso);
cso_save_stream_outputs(cso);
@@ -535,8 +535,8 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* restore state */
cso_restore_rasterizer(cso);
- cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
- cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+ cso_restore_fragment_samplers(cso);
+ cso_restore_fragment_sampler_views(cso);
cso_restore_viewport(cso);
cso_restore_fragment_shader(cso);
cso_restore_vertex_shader(cso);
diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c
index 6d9371852c5..139690615d6 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -39,7 +39,7 @@
#include "st_cb_bitmap.h"
#include "st_cb_blit.h"
#include "st_cb_fbo.h"
-#include "st_atom.h"
+#include "st_manager.h"
#include "util/u_format.h"
@@ -92,7 +92,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
} clip;
struct pipe_blit_info blit;
- st_validate_state(st);
+ st_manager_validate_framebuffers(st);
/* Make sure bitmap rendering has landed in the framebuffers */
st_flush_bitmap_cache(st);
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index a6a98c83aa6..b372697026b 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -33,6 +33,7 @@
#include "main/imports.h"
#include "main/image.h"
#include "main/bufferobj.h"
+#include "main/blit.h"
#include "main/format_pack.h"
#include "main/macros.h"
#include "main/mtypes.h"
@@ -688,8 +689,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
cso_save_rasterizer(cso);
cso_save_viewport(cso);
- cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
- cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+ cso_save_fragment_samplers(cso);
+ cso_save_fragment_sampler_views(cso);
cso_save_fragment_shader(cso);
cso_save_stream_outputs(cso);
cso_save_vertex_shader(cso);
@@ -756,6 +757,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* texture sampling state: */
{
struct pipe_sampler_state sampler;
+ const struct pipe_sampler_state *states[2] = {&sampler, &sampler};
+
memset(&sampler, 0, sizeof(sampler));
sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
@@ -765,11 +768,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
sampler.normalized_coords = normalized;
- cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 0, &sampler);
- if (num_sampler_view > 1) {
- cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 1, &sampler);
- }
- cso_single_sampler_done(cso, PIPE_SHADER_FRAGMENT);
+ cso_set_samplers(cso, PIPE_SHADER_FRAGMENT,
+ num_sampler_view > 1 ? 2 : 1, states);
}
/* viewport state: viewport matching window dims */
@@ -816,8 +816,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
/* restore state */
cso_restore_rasterizer(cso);
cso_restore_viewport(cso);
- cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
- cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+ cso_restore_fragment_samplers(cso);
+ cso_restore_fragment_sampler_views(cso);
cso_restore_fragment_shader(cso);
cso_restore_vertex_shader(cso);
cso_restore_tessctrl_shader(cso);
@@ -1313,31 +1313,6 @@ st_get_color_read_renderbuffer(struct gl_context *ctx)
/**
- * \return TRUE if two regions overlap, FALSE otherwise
- */
-static boolean
-regions_overlap(int srcX0, int srcY0,
- int srcX1, int srcY1,
- int dstX0, int dstY0,
- int dstX1, int dstY1)
-{
- if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
- return FALSE; /* src completely left of dst */
-
- if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
- return FALSE; /* dst completely left of src */
-
- if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
- return FALSE; /* src completely above dst */
-
- if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
- return FALSE; /* dst completely above src */
-
- return TRUE; /* some overlap */
-}
-
-
-/**
* Try to do a glCopyPixels for simple cases with a blit by calling
* pipe->blit().
*
@@ -1420,8 +1395,8 @@ blit_copy_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
}
if (rbRead != rbDraw ||
- !regions_overlap(readX, readY, readX + readW, readY + readH,
- drawX, drawY, drawX + drawW, drawY + drawH)) {
+ !_mesa_regions_overlap(readX, readY, readX + readW, readY + readH,
+ drawX, drawY, drawX + drawW, drawY + drawH)) {
struct pipe_blit_info blit;
memset(&blit, 0, sizeof(blit));
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 0399eef7204..57075904450 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -511,8 +511,6 @@ st_render_texture(struct gl_context *ctx,
strb->rtt_layered = att->Layered;
pipe_resource_reference(&strb->texture, pt);
- pipe_surface_release(pipe, &strb->surface);
-
st_update_renderbuffer_surface(st, strb);
strb->Base.Format = st_pipe_format_to_mesa_format(pt->format);
diff --git a/src/mesa/state_tracker/st_cb_perfmon.h b/src/mesa/state_tracker/st_cb_perfmon.h
index 13d3627de5d..0b195de47fe 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.h
+++ b/src/mesa/state_tracker/st_cb_perfmon.h
@@ -46,7 +46,7 @@ struct st_perf_counter_object
/**
* Cast wrapper
*/
-static INLINE struct st_perf_monitor_object *
+static inline struct st_perf_monitor_object *
st_perf_monitor_object(struct gl_perf_monitor_object *q)
{
return (struct st_perf_monitor_object *)q;
diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c
index 6aa7d5796d9..3029909d12d 100644
--- a/src/mesa/state_tracker/st_cb_program.c
+++ b/src/mesa/state_tracker/st_cb_program.c
@@ -68,6 +68,12 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog)
case GL_GEOMETRY_PROGRAM_NV:
st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
break;
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
+ break;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
+ break;
}
}
@@ -84,6 +90,8 @@ st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
+ st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
+ st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
}
@@ -110,6 +118,16 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
return _mesa_init_geometry_program(ctx, &prog->Base, target, id);
}
+ case GL_TESS_CONTROL_PROGRAM_NV: {
+ struct st_tessctrl_program *prog = ST_CALLOC_STRUCT(st_tessctrl_program);
+ return _mesa_init_tess_ctrl_program(ctx, &prog->Base, target, id);
+ }
+
+ case GL_TESS_EVALUATION_PROGRAM_NV: {
+ struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program);
+ return _mesa_init_tess_eval_program(ctx, &prog->Base, target, id);
+ }
+
default:
assert(0);
return NULL;
@@ -157,6 +175,28 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog)
free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
}
break;
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ {
+ struct st_tessctrl_program *sttcp =
+ (struct st_tessctrl_program *) prog;
+
+ st_release_tcp_variants(st, sttcp);
+
+ if (sttcp->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi);
+ }
+ break;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ {
+ struct st_tesseval_program *sttep =
+ (struct st_tesseval_program *) prog;
+
+ st_release_tep_variants(st, sttep);
+
+ if (sttep->glsl_to_tgsi)
+ free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
+ }
+ break;
default:
assert(0); /* problem */
}
@@ -214,6 +254,24 @@ st_program_string_notify( struct gl_context *ctx,
if (st->vp == stvp)
st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
}
+ else if (target == GL_TESS_CONTROL_PROGRAM_NV) {
+ struct st_tessctrl_program *sttcp =
+ (struct st_tessctrl_program *) prog;
+
+ st_release_tcp_variants(st, sttcp);
+
+ if (st->tcp == sttcp)
+ st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM;
+ }
+ else if (target == GL_TESS_EVALUATION_PROGRAM_NV) {
+ struct st_tesseval_program *sttep =
+ (struct st_tesseval_program *) prog;
+
+ st_release_tep_variants(st, sttep);
+
+ if (st->tep == sttep)
+ st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM;
+ }
if (ST_DEBUG & DEBUG_PRECOMPILE)
st_precompile_shader_variant(st, prog);
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index 272cbb91d52..b9997dacfd2 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -254,7 +254,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
* st_feedback_draw_vbo doesn't check for that flag. */
ctx->Array._DrawArrays = rs->arrays;
st_feedback_draw_vbo(ctx, &rs->prim, 1, NULL, GL_TRUE, 0, 1,
- NULL, NULL);
+ NULL, 0, NULL);
ctx->Array._DrawArrays = saved_arrays;
/* restore draw's rasterization stage depending on rendermode */
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index d95a608d32e..18ea43fa71a 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -43,6 +43,30 @@
#include "state_tracker/st_format.h"
#include "state_tracker/st_texture.h"
+static boolean
+needs_integer_signed_unsigned_conversion(const struct gl_context *ctx,
+ GLenum format, GLenum type)
+{
+ struct gl_renderbuffer *rb =
+ _mesa_get_read_renderbuffer_for_format(ctx, format);
+
+ assert(rb);
+
+ GLenum srcType = _mesa_get_format_datatype(rb->Format);
+
+ if ((srcType == GL_INT &&
+ (type == GL_UNSIGNED_INT ||
+ type == GL_UNSIGNED_SHORT ||
+ type == GL_UNSIGNED_BYTE)) ||
+ (srcType == GL_UNSIGNED_INT &&
+ (type == GL_INT ||
+ type == GL_SHORT ||
+ type == GL_BYTE))) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
/**
* This uses a blit to copy the read buffer to a texture format which matches
@@ -123,6 +147,10 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
goto fallback;
}
+ if (needs_integer_signed_unsigned_conversion(ctx, format, type)) {
+ goto fallback;
+ }
+
/* Convert the source format to what is expected by ReadPixels
* and see if it's supported. */
src_format = util_format_linear(src->format);
diff --git a/src/mesa/state_tracker/st_cb_syncobj.c b/src/mesa/state_tracker/st_cb_syncobj.c
index 6d875b851a2..ec2687fba53 100644
--- a/src/mesa/state_tracker/st_cb_syncobj.c
+++ b/src/mesa/state_tracker/st_cb_syncobj.c
@@ -81,7 +81,13 @@ static void st_check_sync(struct gl_context *ctx, struct gl_sync_object *obj)
struct pipe_screen *screen = st_context(ctx)->pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;
- if (so->fence && screen->fence_signalled(screen, so->fence)) {
+ /* If the fence doesn't exist, assume it's signalled. */
+ if (!so->fence) {
+ so->b.StatusFlag = GL_TRUE;
+ return;
+ }
+
+ if (screen->fence_finish(screen, so->fence, 0)) {
screen->fence_reference(screen, &so->fence, NULL);
so->b.StatusFlag = GL_TRUE;
}
@@ -94,6 +100,12 @@ static void st_client_wait_sync(struct gl_context *ctx,
struct pipe_screen *screen = st_context(ctx)->pipe->screen;
struct st_sync_object *so = (struct st_sync_object*)obj;
+ /* If the fence doesn't exist, assume it's signalled. */
+ if (!so->fence) {
+ so->b.StatusFlag = GL_TRUE;
+ return;
+ }
+
/* We don't care about GL_SYNC_FLUSH_COMMANDS_BIT, because flush is
* already called when creating a fence. */
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 7ea3846fff1..715d69c0c68 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -896,7 +896,7 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
/**
- * Called via ctx->Driver.GetTexImage()
+ * Called via ctx->Driver.GetTexSubImage()
*
* This uses a blit to copy the texture to a texture format which matches
* the format and type combo and then a fast read-back is done using memcpy.
@@ -910,16 +910,15 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
* we do here should be free in such cases.
*/
static void
-st_GetTexImage(struct gl_context * ctx,
- GLenum format, GLenum type, GLvoid * pixels,
- struct gl_texture_image *texImage)
+st_GetTexSubImage(struct gl_context * ctx,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLsizei width, GLsizei height, GLint depth,
+ GLenum format, GLenum type, GLvoid * pixels,
+ struct gl_texture_image *texImage)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
struct pipe_screen *screen = pipe->screen;
- GLuint width = texImage->Width;
- GLuint height = texImage->Height;
- GLuint depth = texImage->Depth;
struct st_texture_image *stImage = st_texture_image(texImage);
struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
struct pipe_resource *src = stObj->pt;
@@ -1054,7 +1053,7 @@ st_GetTexImage(struct gl_context * ctx,
}
}
- /* create the destination texture */
+ /* create the destination texture of size (width X height X depth) */
memset(&dst_templ, 0, sizeof(dst_templ));
dst_templ.target = pipe_target;
dst_templ.format = dst_format;
@@ -1076,6 +1075,10 @@ st_GetTexImage(struct gl_context * ctx,
height = 1;
}
+ assert(texImage->Face == 0 ||
+ texImage->TexObject->MinLayer == 0 ||
+ zoffset == 0);
+
memset(&blit, 0, sizeof(blit));
blit.src.resource = src;
blit.src.level = texImage->Level + texImage->TexObject->MinLevel;
@@ -1083,9 +1086,11 @@ st_GetTexImage(struct gl_context * ctx,
blit.dst.resource = dst;
blit.dst.level = 0;
blit.dst.format = dst->format;
- blit.src.box.x = blit.dst.box.x = 0;
- blit.src.box.y = blit.dst.box.y = 0;
- blit.src.box.z = texImage->Face + texImage->TexObject->MinLayer;
+ blit.src.box.x = xoffset;
+ blit.dst.box.x = 0;
+ blit.src.box.y = yoffset;
+ blit.dst.box.y = 0;
+ blit.src.box.z = texImage->Face + texImage->TexObject->MinLayer + zoffset;
blit.dst.box.z = 0;
blit.src.box.width = blit.dst.box.width = width;
blit.src.box.height = blit.dst.box.height = height;
@@ -1206,7 +1211,9 @@ end:
fallback:
if (!done) {
- _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage);
+ _mesa_GetTexSubImage_sw(ctx, xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels, texImage);
}
}
@@ -1876,11 +1883,11 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->CopyTexSubImage = st_CopyTexSubImage;
functions->GenerateMipmap = st_generate_mipmap;
- functions->GetTexImage = st_GetTexImage;
+ functions->GetTexSubImage = st_GetTexSubImage;
/* compressed texture functions */
functions->CompressedTexImage = st_CompressedTexImage;
- functions->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw;
+ functions->GetCompressedTexSubImage = _mesa_GetCompressedTexSubImage_sw;
functions->NewTextureObject = st_NewTextureObject;
functions->NewTextureImage = st_NewTextureImage;
diff --git a/src/mesa/state_tracker/st_cb_xformfb.c b/src/mesa/state_tracker/st_cb_xformfb.c
index 07c118e227b..0c01cd5ab78 100644
--- a/src/mesa/state_tracker/st_cb_xformfb.c
+++ b/src/mesa/state_tracker/st_cb_xformfb.c
@@ -54,9 +54,9 @@ struct st_transform_feedback_object {
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
/* This encapsulates the count that can be used as a source for draw_vbo.
- * It contains a stream output target from the last call of
- * EndTransformFeedback. */
- struct pipe_stream_output_target *draw_count;
+ * It contains stream output targets from the last call of
+ * EndTransformFeedback for each stream. */
+ struct pipe_stream_output_target *draw_count[MAX_VERTEX_STREAMS];
};
static inline struct st_transform_feedback_object *
@@ -88,7 +88,8 @@ st_delete_transform_feedback(struct gl_context *ctx,
st_transform_feedback_object(obj);
unsigned i;
- pipe_so_target_reference(&sobj->draw_count, NULL);
+ for (i = 0; i < ARRAY_SIZE(sobj->draw_count); i++)
+ pipe_so_target_reference(&sobj->draw_count[i], NULL);
/* Unreference targets. */
for (i = 0; i < sobj->num_targets; i++) {
@@ -123,9 +124,12 @@ st_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
struct st_buffer_object *bo = st_buffer_object(sobj->base.Buffers[i]);
if (bo && bo->buffer) {
+ unsigned stream =
+ obj->shader_program->LinkedTransformFeedback.BufferStream[i];
+
/* Check whether we need to recreate the target. */
if (!sobj->targets[i] ||
- sobj->targets[i] == sobj->draw_count ||
+ sobj->targets[i] == sobj->draw_count[stream] ||
sobj->targets[i]->buffer != bo->buffer ||
sobj->targets[i]->buffer_offset != sobj->base.Offset[i] ||
sobj->targets[i]->buffer_size != sobj->base.Size[i]) {
@@ -178,24 +182,6 @@ st_resume_transform_feedback(struct gl_context *ctx,
}
-static struct pipe_stream_output_target *
-st_transform_feedback_get_draw_target(struct gl_transform_feedback_object *obj)
-{
- struct st_transform_feedback_object *sobj =
- st_transform_feedback_object(obj);
- unsigned i;
-
- for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) {
- if (sobj->targets[i]) {
- return sobj->targets[i];
- }
- }
-
- assert(0);
- return NULL;
-}
-
-
static void
st_end_transform_feedback(struct gl_context *ctx,
struct gl_transform_feedback_object *obj)
@@ -203,22 +189,41 @@ st_end_transform_feedback(struct gl_context *ctx,
struct st_context *st = st_context(ctx);
struct st_transform_feedback_object *sobj =
st_transform_feedback_object(obj);
+ unsigned i;
cso_set_stream_outputs(st->cso_context, 0, NULL, NULL);
- pipe_so_target_reference(&sobj->draw_count,
- st_transform_feedback_get_draw_target(obj));
+ /* The next call to glDrawTransformFeedbackStream should use the vertex
+ * count from the last call to glEndTransformFeedback.
+ * Therefore, save the targets for each stream.
+ *
+ * NULL means the vertex counter is 0 (initial state).
+ */
+ for (i = 0; i < ARRAY_SIZE(sobj->draw_count); i++)
+ pipe_so_target_reference(&sobj->draw_count[i], NULL);
+
+ for (i = 0; i < ARRAY_SIZE(sobj->targets); i++) {
+ unsigned stream =
+ obj->shader_program->LinkedTransformFeedback.BufferStream[i];
+
+ /* Is it not bound or already set for this stream? */
+ if (!sobj->targets[i] || sobj->draw_count[stream])
+ continue;
+
+ pipe_so_target_reference(&sobj->draw_count[stream], sobj->targets[i]);
+ }
}
-void
+bool
st_transform_feedback_draw_init(struct gl_transform_feedback_object *obj,
- struct pipe_draw_info *out)
+ unsigned stream, struct pipe_draw_info *out)
{
struct st_transform_feedback_object *sobj =
st_transform_feedback_object(obj);
- out->count_from_stream_output = sobj->draw_count;
+ out->count_from_stream_output = sobj->draw_count[stream];
+ return out->count_from_stream_output != NULL;
}
diff --git a/src/mesa/state_tracker/st_cb_xformfb.h b/src/mesa/state_tracker/st_cb_xformfb.h
index 998c418257b..444d11842c5 100644
--- a/src/mesa/state_tracker/st_cb_xformfb.h
+++ b/src/mesa/state_tracker/st_cb_xformfb.h
@@ -38,9 +38,9 @@ struct pipe_draw_info;
extern void
st_init_xformfb_functions(struct dd_function_table *functions);
-extern void
+extern bool
st_transform_feedback_draw_init(struct gl_transform_feedback_object *obj,
- struct pipe_draw_info *out);
+ unsigned stream, struct pipe_draw_info *out);
#endif /* ST_CB_XFORMFB_H */
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index ed9ed0f1b6c..72c23cad4bc 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -287,6 +287,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
/* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3;
+ if (!ctx->Extensions.ARB_gpu_shader5) {
+ for (i = 0; i < MESA_SHADER_STAGES; i++)
+ ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
+ }
+
_mesa_compute_version(ctx);
if (ctx->Version == 0) {
@@ -308,6 +313,8 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
f->NewArray = ST_NEW_VERTEX_ARRAYS;
f->NewRasterizerDiscard = ST_NEW_RASTERIZER;
f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
+ f->NewDefaultTessLevels = ST_NEW_TESS_STATE;
+ f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
}
struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
@@ -369,6 +376,8 @@ void st_destroy_context( struct st_context *st )
st_reference_fragprog(st, &st->fp, NULL);
st_reference_geomprog(st, &st->gp, NULL);
st_reference_vertprog(st, &st->vp, NULL);
+ st_reference_tesscprog(st, &st->tcp, NULL);
+ st_reference_tesseprog(st, &st->tep, NULL);
/* release framebuffer surfaces */
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index dac5a4b9006..81d5480431a 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -53,11 +53,14 @@ struct u_upload_mgr;
#define ST_NEW_FRAGMENT_PROGRAM (1 << 1)
#define ST_NEW_VERTEX_PROGRAM (1 << 2)
#define ST_NEW_FRAMEBUFFER (1 << 3)
-/* gap, re-use it */
+#define ST_NEW_TESS_STATE (1 << 4)
#define ST_NEW_GEOMETRY_PROGRAM (1 << 5)
#define ST_NEW_VERTEX_ARRAYS (1 << 6)
#define ST_NEW_RASTERIZER (1 << 7)
#define ST_NEW_UNIFORM_BUFFER (1 << 8)
+#define ST_NEW_TESSCTRL_PROGRAM (1 << 9)
+#define ST_NEW_TESSEVAL_PROGRAM (1 << 10)
+#define ST_NEW_SAMPLER_VIEWS (1 << 11)
struct st_state_flags {
@@ -137,7 +140,6 @@ struct st_context
struct st_state_flags dirty;
- GLboolean missing_textures;
GLboolean vertdata_edgeflags;
GLboolean edgeflag_culls_prims;
@@ -147,10 +149,14 @@ struct st_context
struct st_vertex_program *vp; /**< Currently bound vertex program */
struct st_fragment_program *fp; /**< Currently bound fragment program */
struct st_geometry_program *gp; /**< Currently bound geometry program */
+ struct st_tessctrl_program *tcp; /**< Currently bound tess control program */
+ struct st_tesseval_program *tep; /**< Currently bound tess eval program */
struct st_vp_variant *vp_variant;
struct st_fp_variant *fp_variant;
struct st_gp_variant *gp_variant;
+ struct st_tcp_variant *tcp_variant;
+ struct st_tep_variant *tep_variant;
struct gl_texture_object *default_texture;
@@ -272,6 +278,29 @@ st_fb_orientation(const struct gl_framebuffer *fb)
}
+static inline unsigned
+st_shader_stage_to_ptarget(gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ return PIPE_SHADER_VERTEX;
+ case MESA_SHADER_FRAGMENT:
+ return PIPE_SHADER_FRAGMENT;
+ case MESA_SHADER_GEOMETRY:
+ return PIPE_SHADER_GEOMETRY;
+ case MESA_SHADER_TESS_CTRL:
+ return PIPE_SHADER_TESS_CTRL;
+ case MESA_SHADER_TESS_EVAL:
+ return PIPE_SHADER_TESS_EVAL;
+ case MESA_SHADER_COMPUTE:
+ return PIPE_SHADER_COMPUTE;
+ }
+
+ assert(!"should not be reached");
+ return PIPE_SHADER_VERTEX;
+}
+
+
/** clear-alloc a struct-sized object, with casting */
#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T))
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 8b43582c14b..957fcfd410e 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -164,6 +164,7 @@ translate_prim(const struct gl_context *ctx, unsigned prim)
STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
STATIC_ASSERT(GL_QUADS == PIPE_PRIM_QUADS);
STATIC_ASSERT(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
+ STATIC_ASSERT(GL_PATCHES == PIPE_PRIM_PATCHES);
return prim;
}
@@ -183,6 +184,7 @@ st_draw_vbo(struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
struct st_context *st = st_context(ctx);
@@ -241,7 +243,8 @@ st_draw_vbo(struct gl_context *ctx,
/* Transform feedback drawing is always non-indexed. */
/* Set info.count_from_stream_output. */
if (tfb_vertcount) {
- st_transform_feedback_draw_init(tfb_vertcount, &info);
+ if (!st_transform_feedback_draw_init(tfb_vertcount, stream, &info))
+ return;
}
}
@@ -260,6 +263,7 @@ st_draw_vbo(struct gl_context *ctx,
info.count = prims[i].count;
info.start_instance = prims[i].base_instance;
info.instance_count = prims[i].num_instances;
+ info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices;
info.index_bias = prims[i].basevertex;
if (!ib) {
info.min_index = info.start;
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index 780d4bde713..a973c8a4a5d 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -56,6 +56,7 @@ st_draw_vbo(struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect);
extern void
@@ -67,6 +68,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect);
/**
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index 177f6b5aefa..88c10a8f150 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -117,6 +117,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
struct st_context *st = st_context(ctx);
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index b1057f3eadd..17f572f80fb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -165,6 +165,14 @@ void st_init_limits(struct pipe_screen *screen,
pc = &c->Program[MESA_SHADER_GEOMETRY];
options = &c->ShaderCompilerOptions[MESA_SHADER_GEOMETRY];
break;
+ case PIPE_SHADER_TESS_CTRL:
+ pc = &c->Program[MESA_SHADER_TESS_CTRL];
+ options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_CTRL];
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ pc = &c->Program[MESA_SHADER_TESS_EVAL];
+ options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL];
+ break;
default:
/* compute shader, etc. */
continue;
@@ -245,8 +253,12 @@ void st_init_limits(struct pipe_screen *screen,
options->LowerClipDistance = true;
}
+ c->LowerTessLevel = true;
+
c->MaxCombinedTextureImageUnits =
_min(c->Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits +
c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits,
MAX_COMBINED_TEXTURE_IMAGE_UNITS);
@@ -266,6 +278,9 @@ void st_init_limits(struct pipe_screen *screen,
c->MaxVarying = MIN2(c->MaxVarying, MAX_VARYING);
c->MaxGeometryOutputVertices = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES);
c->MaxGeometryTotalOutputComponents = screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS);
+ c->MaxTessPatchComponents =
+ MAX2(screen->get_param(screen, PIPE_CAP_MAX_SHADER_PATCH_VARYINGS),
+ MAX_VARYING) * 4;
c->MinProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MIN_TEXEL_OFFSET);
c->MaxProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MAX_TEXEL_OFFSET);
@@ -301,6 +316,8 @@ void st_init_limits(struct pipe_screen *screen,
screen->get_param(screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT);
c->MaxCombinedUniformBlocks = c->MaxUniformBufferBindings =
c->Program[MESA_SHADER_VERTEX].MaxUniformBlocks +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks +
c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks +
c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks;
assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
@@ -417,12 +434,14 @@ void st_init_extensions(struct pipe_screen *screen,
static const struct st_extension_cap_mapping cap_mapping[] = {
{ o(ARB_base_instance), PIPE_CAP_START_INSTANCE },
- { o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT },
+ { o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT },
+ { o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED },
{ o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE },
{ o(ARB_depth_texture), PIPE_CAP_TEXTURE_SHADOW_MAP },
{ o(ARB_draw_buffers_blend), PIPE_CAP_INDEP_BLEND_FUNC },
{ o(ARB_draw_instanced), PIPE_CAP_TGSI_INSTANCEID },
{ o(ARB_fragment_program_shadow), PIPE_CAP_TEXTURE_SHADOW_MAP },
+ { o(ARB_framebuffer_object), PIPE_CAP_MIXED_FRAMEBUFFER_SIZES },
{ o(ARB_instanced_arrays), PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR },
{ o(ARB_occlusion_query), PIPE_CAP_OCCLUSION_QUERY },
{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY },
@@ -432,6 +451,8 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_shader_stencil_export), PIPE_CAP_SHADER_STENCIL_EXPORT },
{ o(ARB_shader_texture_lod), PIPE_CAP_SM3 },
{ o(ARB_shadow), PIPE_CAP_TEXTURE_SHADOW_MAP },
+ { o(ARB_texture_buffer_object), PIPE_CAP_TEXTURE_BUFFER_OBJECTS },
+ { o(ARB_texture_gather), PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS },
{ o(ARB_texture_mirror_clamp_to_edge), PIPE_CAP_TEXTURE_MIRROR_CLAMP },
{ o(ARB_texture_non_power_of_two), PIPE_CAP_NPOT_TEXTURES },
{ o(ARB_timer_query), PIPE_CAP_QUERY_TIMESTAMP },
@@ -452,11 +473,14 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ATI_separate_stencil), PIPE_CAP_TWO_SIDED_STENCIL },
{ o(ATI_texture_mirror_once), PIPE_CAP_TEXTURE_MIRROR_CLAMP },
{ o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER },
+ { o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART },
{ o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER },
/* GL_NV_point_sprite is not supported by gallium because we don't
* support the GL_POINT_SPRITE_R_MODE_NV option. */
{ o(OES_standard_derivatives), PIPE_CAP_SM3 },
+ { o(OES_texture_float_linear), PIPE_CAP_TEXTURE_FLOAT_LINEAR },
+ { o(OES_texture_half_float_linear), PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR },
{ o(ARB_texture_cube_map_array), PIPE_CAP_CUBE_MAP_ARRAY },
{ o(ARB_texture_multisample), PIPE_CAP_TEXTURE_MULTISAMPLE },
{ o(ARB_texture_query_lod), PIPE_CAP_TEXTURE_QUERY_LOD },
@@ -467,6 +491,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_texture_view), PIPE_CAP_SAMPLER_VIEW_TARGET },
{ o(ARB_clip_control), PIPE_CAP_CLIP_HALFZ },
{ o(EXT_polygon_offset_clamp), PIPE_CAP_POLYGON_OFFSET_CLAMP },
+ { o(EXT_depth_bounds_test), PIPE_CAP_DEPTH_BOUNDS_TEST },
};
/* Required: render target and sampler support */
@@ -475,6 +500,12 @@ void st_init_extensions(struct pipe_screen *screen,
{ PIPE_FORMAT_R32G32B32A32_FLOAT,
PIPE_FORMAT_R16G16B16A16_FLOAT } },
+ { { o(OES_texture_float) },
+ { PIPE_FORMAT_R32G32B32A32_FLOAT } },
+
+ { { o(OES_texture_half_float) },
+ { PIPE_FORMAT_R16G16B16A16_FLOAT } },
+
{ { o(ARB_texture_rgb10_a2ui) },
{ PIPE_FORMAT_R10G10B10A2_UINT,
PIPE_FORMAT_B10G10R10A2_UINT },
@@ -556,7 +587,8 @@ void st_init_extensions(struct pipe_screen *screen,
PIPE_FORMAT_R8G8B8A8_UNORM },
GL_TRUE }, /* at least one format must be supported */
- { { o(ARB_stencil_texturing) },
+ { { o(ARB_stencil_texturing),
+ o(ARB_texture_stencil8) },
{ PIPE_FORMAT_X24S8_UINT,
PIPE_FORMAT_S8X24_UINT },
GL_TRUE }, /* at least one format must be supported */
@@ -650,9 +682,6 @@ void st_init_extensions(struct pipe_screen *screen,
ARRAY_SIZE(vertex_mapping), PIPE_BUFFER,
PIPE_BIND_VERTEX_BUFFER);
- if (extensions->ARB_stencil_texturing)
- extensions->ARB_texture_stencil8 = GL_TRUE;
-
/* Figure out GLSL support. */
glsl_feature_level = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL);
@@ -693,6 +722,7 @@ void st_init_extensions(struct pipe_screen *screen,
extensions->OES_depth_texture_cube_map = GL_TRUE;
extensions->ARB_shading_language_420pack = GL_TRUE;
extensions->ARB_texture_query_levels = GL_TRUE;
+ extensions->ARB_shader_subroutine = GL_TRUE;
if (!options->disable_shader_bit_encoding) {
extensions->ARB_shader_bit_encoding = GL_TRUE;
@@ -723,20 +753,9 @@ void st_init_extensions(struct pipe_screen *screen,
extensions->ANGLE_texture_compression_dxt = GL_FALSE;
}
- if (screen->get_shader_param(screen, PIPE_SHADER_GEOMETRY,
+ if (screen->get_shader_param(screen, PIPE_SHADER_TESS_CTRL,
PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
-#if 0 /* XXX re-enable when GLSL compiler again supports geometry shaders */
- extensions->ARB_geometry_shader4 = GL_TRUE;
-#endif
- }
-
- if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART)) {
- extensions->NV_primitive_restart = GL_TRUE;
- }
-
- /* ARB_color_buffer_float. */
- if (screen->get_param(screen, PIPE_CAP_VERTEX_COLOR_UNCLAMPED)) {
- extensions->ARB_color_buffer_float = GL_TRUE;
+ extensions->ARB_tessellation_shader = GL_TRUE;
}
if (screen->fence_finish) {
@@ -823,9 +842,7 @@ void st_init_extensions(struct pipe_screen *screen,
consts->MinMapBufferAlignment =
screen->get_param(screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
- if (screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS)) {
- extensions->ARB_texture_buffer_object = GL_TRUE;
-
+ if (extensions->ARB_texture_buffer_object) {
consts->MaxTextureBufferSize =
_min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE),
(1u << 31) - 1);
@@ -840,10 +857,6 @@ void st_init_extensions(struct pipe_screen *screen,
PIPE_BIND_SAMPLER_VIEW);
}
- if (screen->get_param(screen, PIPE_CAP_MIXED_FRAMEBUFFER_SIZES)) {
- extensions->ARB_framebuffer_object = GL_TRUE;
- }
-
/* Unpacking a varying in the fragment shader costs 1 texture indirection.
* If the number of available texture indirections is very limited, then we
* prefer to disable varying packing rather than run the risk of varying
@@ -868,9 +881,6 @@ void st_init_extensions(struct pipe_screen *screen,
extensions->AMD_vertex_shader_viewport_index = GL_TRUE;
}
- if (consts->MaxProgramTextureGatherComponents > 0)
- extensions->ARB_texture_gather = GL_TRUE;
-
/* GL_ARB_ES3_compatibility.
*
* Assume that ES3 is supported if GLSL 3.30 is supported.
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 25e30c7deb2..6f007273c73 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -158,9 +158,12 @@ public:
{
this->file = file;
this->index = index;
+ this->index2D = 0;
this->writemask = writemask;
this->cond_mask = COND_TR;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
this->type = type;
this->array_id = 0;
}
@@ -169,9 +172,12 @@ public:
{
this->file = file;
this->index = 0;
+ this->index2D = 0;
this->writemask = writemask;
this->cond_mask = COND_TR;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
this->type = type;
this->array_id = 0;
}
@@ -181,9 +187,12 @@ public:
this->type = GLSL_TYPE_ERROR;
this->file = PROGRAM_UNDEFINED;
this->index = 0;
+ this->index2D = 0;
this->writemask = 0;
this->cond_mask = COND_TR;
this->reladdr = NULL;
+ this->reladdr2 = NULL;
+ this->has_index2 = false;
this->array_id = 0;
}
@@ -191,11 +200,14 @@ public:
gl_register_file file; /**< PROGRAM_* from Mesa */
int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
+ int index2D;
int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
GLuint cond_mask:4;
int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
/** Register index should be offset by the integer in this reg. */
st_src_reg *reladdr;
+ st_src_reg *reladdr2;
+ bool has_index2;
unsigned array_id;
};
@@ -207,9 +219,9 @@ st_src_reg::st_src_reg(st_dst_reg reg)
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
this->reladdr = reg.reladdr;
- this->index2D = 0;
- this->reladdr2 = NULL;
- this->has_index2 = false;
+ this->index2D = reg.index2D;
+ this->reladdr2 = reg.reladdr2;
+ this->has_index2 = reg.has_index2;
this->double_reg2 = false;
this->array_id = reg.array_id;
}
@@ -222,6 +234,9 @@ st_dst_reg::st_dst_reg(st_src_reg reg)
this->writemask = WRITEMASK_XYZW;
this->cond_mask = COND_TR;
this->reladdr = reg.reladdr;
+ this->index2D = reg.index2D;
+ this->reladdr2 = reg.reladdr2;
+ this->has_index2 = reg.has_index2;
this->array_id = reg.array_id;
}
@@ -551,8 +566,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
* reg directly for one of the regs, and preload the other reladdr
* sources into temps.
*/
- num_reladdr += dst.reladdr != NULL;
- num_reladdr += dst1.reladdr != NULL;
+ num_reladdr += dst.reladdr != NULL || dst.reladdr2;
+ num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
@@ -563,8 +578,11 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
reladdr_to_temp(ir, &src1, &num_reladdr);
reladdr_to_temp(ir, &src0, &num_reladdr);
- if (dst.reladdr) {
- emit_arl(ir, address_reg, *dst.reladdr);
+ if (dst.reladdr || dst.reladdr2) {
+ if (dst.reladdr)
+ emit_arl(ir, address_reg, *dst.reladdr);
+ if (dst.reladdr2)
+ emit_arl(ir, address_reg2, *dst.reladdr2);
num_reladdr--;
}
if (dst1.reladdr) {
@@ -590,7 +608,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
inst->function = NULL;
/* Update indirect addressing status used by TGSI */
- if (dst.reladdr) {
+ if (dst.reladdr || dst.reladdr2) {
switch(dst.file) {
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
@@ -797,7 +815,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
case TGSI_OPCODE_##c: \
if (type == GLSL_TYPE_DOUBLE) \
op = TGSI_OPCODE_##d; \
- else if (type == GLSL_TYPE_INT) \
+ else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
op = TGSI_OPCODE_##i; \
else if (type == GLSL_TYPE_UINT) \
op = TGSI_OPCODE_##u; \
@@ -1090,6 +1108,7 @@ type_size(const struct glsl_type *type)
return size;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
+ case GLSL_TYPE_SUBROUTINE:
/* Samplers take up one slot in UNIFORMS[], but they're baked in
* at link time.
*/
@@ -1470,6 +1489,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
result_src = op[0];
}
break;
+ case ir_unop_subroutine_to_int:
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+ break;
case ir_unop_abs:
emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
break;
@@ -2243,7 +2265,10 @@ is_inout_array(unsigned stage, ir_variable *var, bool *is_2d)
*is_2d = false;
- if (stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) {
+ if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
+ (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
+ stage == MESA_SHADER_TESS_CTRL) &&
+ !var->data.patch) {
if (!var->type->is_array())
return false; /* a system value probably */
@@ -2355,7 +2380,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
static void
shrink_array_declarations(struct array_decl *arrays, unsigned count,
- GLbitfield64 usage_mask)
+ GLbitfield64 usage_mask,
+ GLbitfield patch_usage_mask)
{
unsigned i, j;
@@ -2367,8 +2393,15 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count,
/* Shrink the beginning. */
for (j = 0; j < decl->array_size; j++) {
- if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
- break;
+ if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
+ if (patch_usage_mask &
+ BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
+ break;
+ }
+ else {
+ if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
+ break;
+ }
decl->mesa_index++;
decl->array_size--;
@@ -2377,8 +2410,15 @@ shrink_array_declarations(struct array_decl *arrays, unsigned count,
/* Shrink the end. */
for (j = decl->array_size-1; j >= 0; j--) {
- if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
- break;
+ if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
+ if (patch_usage_mask &
+ BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
+ break;
+ }
+ else {
+ if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
+ break;
+ }
decl->array_size--;
}
@@ -2391,22 +2431,34 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
ir_constant *index;
st_src_reg src;
int element_size = type_size(ir->type);
- bool is_2D_input;
+ bool is_2D = false;
index = ir->array_index->constant_expression_value();
ir->array->accept(this);
src = this->result;
- is_2D_input = this->prog->Target == GL_GEOMETRY_PROGRAM_NV &&
- src.file == PROGRAM_INPUT &&
- ir->array->ir_type != ir_type_dereference_array;
+ if (ir->array->ir_type != ir_type_dereference_array) {
+ switch (this->prog->Target) {
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
+ !ir->variable_referenced()->data.patch;
+ break;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ is_2D = src.file == PROGRAM_INPUT &&
+ !ir->variable_referenced()->data.patch;
+ break;
+ case GL_GEOMETRY_PROGRAM_NV:
+ is_2D = src.file == PROGRAM_INPUT;
+ break;
+ }
+ }
- if (is_2D_input)
+ if (is_2D)
element_size = 1;
if (index) {
- if (is_2D_input) {
+ if (is_2D) {
src.index2D = index->value.i[0];
src.has_index2 = true;
} else
@@ -2433,7 +2485,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
/* If there was already a relative address register involved, add the
* new and the old together to get the new offset.
*/
- if (!is_2D_input && src.reladdr != NULL) {
+ if (!is_2D && src.reladdr != NULL) {
st_src_reg accum_reg = get_temp(native_integers ?
glsl_type::int_type : glsl_type::float_type);
@@ -2443,7 +2495,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
index_reg = accum_reg;
}
- if (is_2D_input) {
+ if (is_2D) {
src.reladdr2 = ralloc(mem_ctx, st_src_reg);
memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
src.index2D = 0;
@@ -3430,7 +3482,10 @@ glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
void
glsl_to_tgsi_visitor::visit(ir_barrier *ir)
{
- unreachable("Not implemented!");
+ assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
+ this->prog->Target == GL_COMPUTE_PROGRAM_NV);
+
+ emit_asm(ir, TGSI_OPCODE_BARRIER);
}
glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
@@ -3553,7 +3608,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
{
int tempWritesSize = 0;
unsigned *tempWrites = NULL;
- unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+ unsigned outputWrites[VARYING_SLOT_TESS_MAX];
memset(outputWrites, 0, sizeof(outputWrites));
@@ -3561,8 +3616,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
unsigned prevWriteMask = 0;
/* Give up if we encounter relative addressing or flow control. */
- if (inst->dst[0].reladdr ||
- inst->dst[1].reladdr ||
+ if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
+ inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
tgsi_get_opcode_info(inst->op)->is_branch ||
inst->op == TGSI_OPCODE_BGNSUB ||
inst->op == TGSI_OPCODE_CONT ||
@@ -3573,7 +3628,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
}
if (inst->dst[0].file == PROGRAM_OUTPUT) {
- assert(inst->dst[0].index < MAX_PROGRAM_OUTPUTS);
+ assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
prevWriteMask = outputWrites[inst->dst[0].index];
outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
} else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
@@ -3940,6 +3995,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
!(inst->dst[0].file == inst->src[0].file &&
inst->dst[0].index == inst->src[0].index) &&
!inst->dst[0].reladdr &&
+ !inst->dst[0].reladdr2 &&
!inst->saturate &&
inst->src[0].file != PROGRAM_ARRAY &&
!inst->src[0].reladdr &&
@@ -4527,6 +4583,14 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
TGSI_SEMANTIC_SAMPLEID,
TGSI_SEMANTIC_SAMPLEPOS,
TGSI_SEMANTIC_SAMPLEMASK,
+
+ /* Tessellation shaders
+ */
+ TGSI_SEMANTIC_TESSCOORD,
+ TGSI_SEMANTIC_VERTICESIN,
+ TGSI_SEMANTIC_PRIMID,
+ TGSI_SEMANTIC_TESSOUTER,
+ TGSI_SEMANTIC_TESSINNER,
};
/**
@@ -4651,6 +4715,9 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index,
if (!array_id) {
if (t->procType == TGSI_PROCESSOR_FRAGMENT)
assert(index < FRAG_RESULT_MAX);
+ else if (t->procType == TGSI_PROCESSOR_TESS_CTRL ||
+ t->procType == TGSI_PROCESSOR_TESS_EVAL)
+ assert(index < VARYING_SLOT_TESS_MAX);
else
assert(index < VARYING_SLOT_MAX);
@@ -4790,6 +4857,14 @@ translate_dst(struct st_translate *t,
dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
}
+ if (dst_reg->has_index2) {
+ if (dst_reg->reladdr2)
+ dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
+ dst_reg->index2D);
+ else
+ dst = ureg_dst_dimension(dst, dst_reg->index2D);
+ }
+
return dst;
}
@@ -5271,6 +5346,8 @@ st_translate_program(
TGSI_SEMANTIC_VERTEXID_NOBASE);
assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] ==
TGSI_SEMANTIC_BASEVERTEX);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
+ TGSI_SEMANTIC_TESSCOORD);
t = CALLOC_STRUCT(st_translate);
if (!t) {
@@ -5313,6 +5390,8 @@ st_translate_program(
}
break;
case TGSI_PROCESSOR_GEOMETRY:
+ case TGSI_PROCESSOR_TESS_EVAL:
+ case TGSI_PROCESSOR_TESS_CTRL:
for (i = 0; i < numInputs; i++) {
unsigned array_id = 0;
unsigned array_size;
@@ -5347,6 +5426,8 @@ st_translate_program(
case TGSI_PROCESSOR_FRAGMENT:
break;
case TGSI_PROCESSOR_GEOMETRY:
+ case TGSI_PROCESSOR_TESS_EVAL:
+ case TGSI_PROCESSOR_TESS_CTRL:
case TGSI_PROCESSOR_VERTEX:
for (i = 0; i < numOutputs; i++) {
unsigned array_id = 0;
@@ -5461,6 +5542,7 @@ st_translate_program(
struct pipe_screen *pscreen = st->pipe->screen;
assert(procType == TGSI_PROCESSOR_VERTEX);
assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
+ (void) pscreen;
if (!ctx->Const.NativeIntegers) {
struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
@@ -5611,25 +5693,6 @@ out:
/* ----------------------------- End TGSI code ------------------------------ */
-static unsigned
-shader_stage_to_ptarget(gl_shader_stage stage)
-{
- switch (stage) {
- case MESA_SHADER_VERTEX:
- return PIPE_SHADER_VERTEX;
- case MESA_SHADER_FRAGMENT:
- return PIPE_SHADER_FRAGMENT;
- case MESA_SHADER_GEOMETRY:
- return PIPE_SHADER_GEOMETRY;
- case MESA_SHADER_COMPUTE:
- return PIPE_SHADER_COMPUTE;
- }
-
- assert(!"should not be reached");
- return PIPE_SHADER_VERTEX;
-}
-
-
/**
* Convert a shader's GLSL IR into a Mesa gl_program, although without
* generating Mesa IR.
@@ -5646,7 +5709,7 @@ get_mesa_program(struct gl_context *ctx,
struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
struct pipe_screen *pscreen = ctx->st->pipe->screen;
- unsigned ptarget = shader_stage_to_ptarget(shader->Stage);
+ unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
validate_ir_tree(shader->ir);
@@ -5673,7 +5736,7 @@ get_mesa_program(struct gl_context *ctx,
prog->Parameters);
/* Remove reads from output registers. */
- lower_output_reads(shader->ir);
+ lower_output_reads(shader->Stage, shader->ir);
/* Emit intermediate IR for main(). */
visit_exec_list(shader->ir, v);
@@ -5721,7 +5784,11 @@ get_mesa_program(struct gl_context *ctx,
/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
v->simplify_cmp();
- v->copy_propagate();
+
+ if (shader->Type != GL_TESS_CONTROL_SHADER &&
+ shader->Type != GL_TESS_EVALUATION_SHADER)
+ v->copy_propagate();
+
while (v->eliminate_dead_code());
v->merge_two_dsts();
@@ -5745,9 +5812,9 @@ get_mesa_program(struct gl_context *ctx,
do_set_program_inouts(shader->ir, prog, shader->Stage);
shrink_array_declarations(v->input_arrays, v->num_input_arrays,
- prog->InputsRead);
+ prog->InputsRead, prog->PatchInputsRead);
shrink_array_declarations(v->output_arrays, v->num_output_arrays,
- prog->OutputsWritten);
+ prog->OutputsWritten, prog->PatchOutputsWritten);
count_resources(v, prog);
/* This must be done before the uniform storage is associated. */
@@ -5776,6 +5843,8 @@ get_mesa_program(struct gl_context *ctx,
struct st_vertex_program *stvp;
struct st_fragment_program *stfp;
struct st_geometry_program *stgp;
+ struct st_tessctrl_program *sttcp;
+ struct st_tesseval_program *sttep;
switch (shader->Type) {
case GL_VERTEX_SHADER:
@@ -5790,6 +5859,14 @@ get_mesa_program(struct gl_context *ctx,
stgp = (struct st_geometry_program *)prog;
stgp->glsl_to_tgsi = v;
break;
+ case GL_TESS_CONTROL_SHADER:
+ sttcp = (struct st_tessctrl_program *)prog;
+ sttcp->glsl_to_tgsi = v;
+ break;
+ case GL_TESS_EVALUATION_SHADER:
+ sttep = (struct st_tesseval_program *)prog;
+ sttep->glsl_to_tgsi = v;
+ break;
default:
assert(!"should not be reached");
return NULL;
@@ -5800,6 +5877,71 @@ get_mesa_program(struct gl_context *ctx,
extern "C" {
+static void
+st_dump_program_for_shader_db(struct gl_context *ctx,
+ struct gl_shader_program *prog)
+{
+ /* Dump only successfully compiled and linked shaders to the specified
+ * file. This is for shader-db.
+ *
+ * These options allow some pre-processing of shaders while dumping,
+ * because some apps have ill-formed shaders.
+ */
+ const char *dump_filename = os_get_option("ST_DUMP_SHADERS");
+ const char *insert_directives = os_get_option("ST_DUMP_INSERT");
+
+ if (dump_filename && prog->Name != 0) {
+ FILE *f = fopen(dump_filename, "a");
+
+ if (f) {
+ for (unsigned i = 0; i < prog->NumShaders; i++) {
+ const struct gl_shader *sh = prog->Shaders[i];
+ const char *source;
+ bool skip_version = false;
+
+ if (!sh)
+ continue;
+
+ source = sh->Source;
+
+ /* This string mustn't be changed. shader-db uses it to find
+ * where the shader begins.
+ */
+ fprintf(f, "GLSL %s shader %d source for linked program %d:\n",
+ _mesa_shader_stage_to_string(sh->Stage),
+ i, prog->Name);
+
+ /* Dump the forced version if set. */
+ if (ctx->Const.ForceGLSLVersion) {
+ fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion);
+ skip_version = true;
+ }
+
+ /* Insert directives (optional). */
+ if (insert_directives) {
+ if (!ctx->Const.ForceGLSLVersion && prog->Version)
+ fprintf(f, "#version %i\n", prog->Version);
+ fprintf(f, "%s\n", insert_directives);
+ skip_version = true;
+ }
+
+ if (skip_version && strncmp(source, "#version ", 9) == 0) {
+ const char *next_line = strstr(source, "\n");
+
+ if (next_line)
+ source = next_line + 1;
+ else
+ continue;
+ }
+
+ fprintf(f, "%s", source);
+ fprintf(f, "\n");
+ }
+ fclose(f);
+ }
+ }
+}
+
/**
* Link a shader.
* Called via ctx->Driver.LinkShader()
@@ -5821,7 +5963,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[stage];
- unsigned ptarget = shader_stage_to_ptarget(stage);
+ unsigned ptarget = st_shader_stage_to_ptarget(stage);
bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
@@ -5832,7 +5974,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
*/
if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
- lower_variable_index_to_cond_assign(ir,
+ lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
options->EmitNoIndirectInput,
options->EmitNoIndirectOutput,
options->EmitNoIndirectTemp,
@@ -5920,6 +6062,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
_mesa_reference_program(ctx, &linked_prog, NULL);
}
+ st_dump_program_for_shader_db(ctx, prog);
return GL_TRUE;
}
diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index a2dee6298fa..2e2c8ffaed9 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -368,6 +368,7 @@ st_visual_to_context_mode(const struct st_visual *visual,
mode->rgbBits = mode->redBits +
mode->greenBits + mode->blueBits + mode->alphaBits;
+ mode->sRGBCapable = util_format_is_srgb(visual->color_format);
}
if (visual->depth_stencil_format != PIPE_FORMAT_NONE) {
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index fa792bc349b..e62dd7aab80 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -163,6 +163,68 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
}
+/**
+ * Delete a tessellation control program variant. Note the caller must unlink
+ * the variant from the linked list.
+ */
+static void
+delete_tcp_variant(struct st_context *st, struct st_tcp_variant *tcpv)
+{
+ if (tcpv->driver_shader)
+ cso_delete_tessctrl_shader(st->cso_context, tcpv->driver_shader);
+
+ free(tcpv);
+}
+
+
+/**
+ * Free all variants of a tessellation control program.
+ */
+void
+st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp)
+{
+ struct st_tcp_variant *tcpv;
+
+ for (tcpv = sttcp->variants; tcpv; ) {
+ struct st_tcp_variant *next = tcpv->next;
+ delete_tcp_variant(st, tcpv);
+ tcpv = next;
+ }
+
+ sttcp->variants = NULL;
+}
+
+
+/**
+ * Delete a tessellation evaluation program variant. Note the caller must
+ * unlink the variant from the linked list.
+ */
+static void
+delete_tep_variant(struct st_context *st, struct st_tep_variant *tepv)
+{
+ if (tepv->driver_shader)
+ cso_delete_tesseval_shader(st->cso_context, tepv->driver_shader);
+
+ free(tepv);
+}
+
+
+/**
+ * Free all variants of a tessellation evaluation program.
+ */
+void
+st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep)
+{
+ struct st_tep_variant *tepv;
+
+ for (tepv = sttep->variants; tepv; ) {
+ struct st_tep_variant *next = tepv->next;
+ delete_tep_variant(st, tepv);
+ tepv = next;
+ }
+
+ sttep->variants = NULL;
+}
/**
@@ -870,61 +932,52 @@ st_get_fp_variant(struct st_context *st,
/**
- * Translate a geometry program to create a new variant.
+ * Translate a program. This is common code for geometry and tessellation
+ * shaders.
*/
-static struct st_gp_variant *
-st_translate_geometry_program(struct st_context *st,
- struct st_geometry_program *stgp,
- const struct st_gp_variant_key *key)
+static void
+st_translate_program_common(struct st_context *st,
+ struct gl_program *prog,
+ struct glsl_to_tgsi_visitor *glsl_to_tgsi,
+ struct ureg_program *ureg,
+ unsigned tgsi_processor,
+ struct pipe_shader_state *out_state)
{
- GLuint inputSlotToAttr[VARYING_SLOT_MAX];
- GLuint inputMapping[VARYING_SLOT_MAX];
- GLuint outputSlotToAttr[VARYING_SLOT_MAX];
- GLuint outputMapping[VARYING_SLOT_MAX];
- struct pipe_context *pipe = st->pipe;
+ GLuint inputSlotToAttr[VARYING_SLOT_TESS_MAX];
+ GLuint inputMapping[VARYING_SLOT_TESS_MAX];
+ GLuint outputSlotToAttr[VARYING_SLOT_TESS_MAX];
+ GLuint outputMapping[VARYING_SLOT_TESS_MAX];
GLuint attr;
- uint gs_num_inputs = 0;
-
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+ uint num_inputs = 0;
- ubyte gs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
- ubyte gs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
- uint gs_num_outputs = 0;
+ ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
+ uint num_outputs = 0;
GLint i;
- struct ureg_program *ureg;
- struct pipe_shader_state state = {0};
- struct st_gp_variant *gpv;
-
- gpv = CALLOC_STRUCT(st_gp_variant);
- if (!gpv)
- return NULL;
-
- ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen);
- if (ureg == NULL) {
- free(gpv);
- return NULL;
- }
memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
memset(inputMapping, 0, sizeof(inputMapping));
memset(outputSlotToAttr, 0, sizeof(outputSlotToAttr));
memset(outputMapping, 0, sizeof(outputMapping));
+ memset(out_state, 0, sizeof(*out_state));
/*
* Convert Mesa program inputs to TGSI input register semantics.
*/
for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
- if ((stgp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) {
- const GLuint slot = gs_num_inputs++;
+ if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) {
+ const GLuint slot = num_inputs++;
inputMapping[attr] = slot;
inputSlotToAttr[slot] = attr;
switch (attr) {
case VARYING_SLOT_PRIMITIVE_ID:
+ assert(tgsi_processor == TGSI_PROCESSOR_GEOMETRY);
input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
input_semantic_index[slot] = 0;
break;
@@ -976,19 +1029,33 @@ st_translate_geometry_program(struct st_context *st,
/* fall through */
case VARYING_SLOT_VAR0:
default:
- assert(attr >= VARYING_SLOT_VAR0 && attr < VARYING_SLOT_MAX);
+ assert(attr >= VARYING_SLOT_VAR0 ||
+ (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
input_semantic_index[slot] =
st_get_generic_varying_index(st, attr);
- break;
+ break;
}
}
}
+ /* Also add patch inputs. */
+ for (attr = 0; attr < 32; attr++) {
+ if (prog->PatchInputsRead & (1 << attr)) {
+ GLuint slot = num_inputs++;
+ GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
+
+ inputMapping[patch_attr] = slot;
+ inputSlotToAttr[slot] = patch_attr;
+ input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
+ input_semantic_index[slot] = attr;
+ }
+ }
+
/* initialize output semantics to defaults */
for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
- gs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
- gs_output_semantic_index[i] = 0;
+ output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
+ output_semantic_index[i] = 0;
}
/*
@@ -996,8 +1063,8 @@ st_translate_geometry_program(struct st_context *st,
* mapping and the semantic information for each output.
*/
for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
- if (stgp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) {
- GLuint slot = gs_num_outputs++;
+ if (prog->OutputsWritten & BITFIELD64_BIT(attr)) {
+ GLuint slot = num_outputs++;
outputMapping[attr] = slot;
outputSlotToAttr[slot] = attr;
@@ -1005,56 +1072,64 @@ st_translate_geometry_program(struct st_context *st,
switch (attr) {
case VARYING_SLOT_POS:
assert(slot == 0);
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_COL0:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_COL1:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
- gs_output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_BFC0:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_BFC1:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
- gs_output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_FOGC:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_PSIZ:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_VERTEX:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_DIST0:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_CLIP_DIST1:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
- gs_output_semantic_index[slot] = 1;
+ output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
+ output_semantic_index[slot] = 1;
break;
case VARYING_SLOT_LAYER:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_PRIMITIVE_ID:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_VIEWPORT:
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
- gs_output_semantic_index[slot] = 0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
+ output_semantic_index[slot] = 0;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ output_semantic_name[slot] = TGSI_SEMANTIC_TESSOUTER;
+ output_semantic_index[slot] = 0;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ output_semantic_name[slot] = TGSI_SEMANTIC_TESSINNER;
+ output_semantic_index[slot] = 0;
break;
case VARYING_SLOT_TEX0:
case VARYING_SLOT_TEX1:
@@ -1065,36 +1140,44 @@ st_translate_geometry_program(struct st_context *st,
case VARYING_SLOT_TEX6:
case VARYING_SLOT_TEX7:
if (st->needs_texcoord_semantic) {
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
- gs_output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
+ output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
+ output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
break;
}
/* fall through */
case VARYING_SLOT_VAR0:
default:
- assert(slot < ARRAY_SIZE(gs_output_semantic_name));
- assert(attr >= VARYING_SLOT_VAR0);
- gs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- gs_output_semantic_index[slot] =
+ assert(slot < ARRAY_SIZE(output_semantic_name));
+ assert(attr >= VARYING_SLOT_VAR0 ||
+ (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
+ output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+ output_semantic_index[slot] =
st_get_generic_varying_index(st, attr);
- break;
+ break;
}
}
}
- ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
- ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
- ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
- stgp->Base.VerticesOut);
- ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);
+ /* Also add patch outputs. */
+ for (attr = 0; attr < 32; attr++) {
+ if (prog->PatchOutputsWritten & (1 << attr)) {
+ GLuint slot = num_outputs++;
+ GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
+
+ outputMapping[patch_attr] = slot;
+ outputSlotToAttr[slot] = patch_attr;
+ output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
+ output_semantic_index[slot] = attr;
+ }
+ }
st_translate_program(st->ctx,
- TGSI_PROCESSOR_GEOMETRY,
+ tgsi_processor,
ureg,
- stgp->glsl_to_tgsi,
- &stgp->Base.Base,
+ glsl_to_tgsi,
+ prog,
/* inputs */
- gs_num_inputs,
+ num_inputs,
inputMapping,
inputSlotToAttr,
input_semantic_name,
@@ -1102,30 +1185,64 @@ st_translate_geometry_program(struct st_context *st,
NULL,
NULL,
/* outputs */
- gs_num_outputs,
+ num_outputs,
outputMapping,
outputSlotToAttr,
- gs_output_semantic_name,
- gs_output_semantic_index,
+ output_semantic_name,
+ output_semantic_index,
FALSE,
FALSE);
- state.tokens = ureg_get_tokens(ureg, NULL);
+ out_state->tokens = ureg_get_tokens(ureg, NULL);
ureg_destroy(ureg);
- st_translate_stream_output_info(stgp->glsl_to_tgsi,
+ st_translate_stream_output_info(glsl_to_tgsi,
outputMapping,
- &state.stream_output);
+ &out_state->stream_output);
if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
- _mesa_print_program(&stgp->Base.Base);
+ _mesa_print_program(prog);
debug_printf("\n");
}
if (ST_DEBUG & DEBUG_TGSI) {
- tgsi_dump(state.tokens, 0);
+ tgsi_dump(out_state->tokens, 0);
debug_printf("\n");
}
+}
+
+
+/**
+ * Translate a geometry program to create a new variant.
+ */
+static struct st_gp_variant *
+st_translate_geometry_program(struct st_context *st,
+ struct st_geometry_program *stgp,
+ const struct st_gp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct ureg_program *ureg;
+ struct st_gp_variant *gpv;
+ struct pipe_shader_state state;
+
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen);
+ if (ureg == NULL)
+ return NULL;
+
+ ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType);
+ ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType);
+ ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
+ stgp->Base.VerticesOut);
+ ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations);
+
+ st_translate_program_common(st, &stgp->Base.Base, stgp->glsl_to_tgsi, ureg,
+ TGSI_PROCESSOR_GEOMETRY, &state);
+
+ gpv = CALLOC_STRUCT(st_gp_variant);
+ if (!gpv) {
+ ureg_free_tokens(state.tokens);
+ return NULL;
+ }
/* fill in new variant */
gpv->driver_shader = pipe->create_gs_state(pipe, &state);
@@ -1168,6 +1285,168 @@ st_get_gp_variant(struct st_context *st,
/**
+ * Translate a tessellation control program to create a new variant.
+ */
+static struct st_tcp_variant *
+st_translate_tessctrl_program(struct st_context *st,
+ struct st_tessctrl_program *sttcp,
+ const struct st_tcp_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct ureg_program *ureg;
+ struct st_tcp_variant *tcpv;
+ struct pipe_shader_state state;
+
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, pipe->screen);
+ if (ureg == NULL) {
+ return NULL;
+ }
+
+ ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
+ sttcp->Base.VerticesOut);
+
+ st_translate_program_common(st, &sttcp->Base.Base, sttcp->glsl_to_tgsi,
+ ureg, TGSI_PROCESSOR_TESS_CTRL, &state);
+
+ tcpv = CALLOC_STRUCT(st_tcp_variant);
+ if (!tcpv) {
+ ureg_free_tokens(state.tokens);
+ return NULL;
+ }
+
+ /* fill in new variant */
+ tcpv->driver_shader = pipe->create_tcs_state(pipe, &state);
+ tcpv->key = *key;
+
+ ureg_free_tokens(state.tokens);
+ return tcpv;
+}
+
+
+/**
+ * Get/create tessellation control program variant.
+ */
+struct st_tcp_variant *
+st_get_tcp_variant(struct st_context *st,
+ struct st_tessctrl_program *sttcp,
+ const struct st_tcp_variant_key *key)
+{
+ struct st_tcp_variant *tcpv;
+
+ /* Search for existing variant */
+ for (tcpv = sttcp->variants; tcpv; tcpv = tcpv->next) {
+ if (memcmp(&tcpv->key, key, sizeof(*key)) == 0) {
+ break;
+ }
+ }
+
+ if (!tcpv) {
+ /* create new */
+ tcpv = st_translate_tessctrl_program(st, sttcp, key);
+ if (tcpv) {
+ /* insert into list */
+ tcpv->next = sttcp->variants;
+ sttcp->variants = tcpv;
+ }
+ }
+
+ return tcpv;
+}
+
+
+/**
+ * Translate a tessellation evaluation program to create a new variant.
+ */
+static struct st_tep_variant *
+st_translate_tesseval_program(struct st_context *st,
+ struct st_tesseval_program *sttep,
+ const struct st_tep_variant_key *key)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct ureg_program *ureg;
+ struct st_tep_variant *tepv;
+ struct pipe_shader_state state;
+
+ ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, pipe->screen);
+ if (ureg == NULL) {
+ return NULL;
+ }
+
+ if (sttep->Base.PrimitiveMode == GL_ISOLINES)
+ ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
+ else
+ ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, sttep->Base.PrimitiveMode);
+
+ switch (sttep->Base.Spacing) {
+ case GL_EQUAL:
+ ureg_property(ureg, TGSI_PROPERTY_TES_SPACING, PIPE_TESS_SPACING_EQUAL);
+ break;
+ case GL_FRACTIONAL_EVEN:
+ ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
+ PIPE_TESS_SPACING_FRACTIONAL_EVEN);
+ break;
+ case GL_FRACTIONAL_ODD:
+ ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
+ PIPE_TESS_SPACING_FRACTIONAL_ODD);
+ break;
+ default:
+ assert(0);
+ }
+
+ ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
+ sttep->Base.VertexOrder == GL_CW);
+ ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, sttep->Base.PointMode);
+
+ st_translate_program_common(st, &sttep->Base.Base, sttep->glsl_to_tgsi,
+ ureg, TGSI_PROCESSOR_TESS_EVAL, &state);
+
+ tepv = CALLOC_STRUCT(st_tep_variant);
+ if (!tepv) {
+ ureg_free_tokens(state.tokens);
+ return NULL;
+ }
+
+ /* fill in new variant */
+ tepv->driver_shader = pipe->create_tes_state(pipe, &state);
+ tepv->key = *key;
+
+ ureg_free_tokens(state.tokens);
+ return tepv;
+}
+
+
+/**
+ * Get/create tessellation evaluation program variant.
+ */
+struct st_tep_variant *
+st_get_tep_variant(struct st_context *st,
+ struct st_tesseval_program *sttep,
+ const struct st_tep_variant_key *key)
+{
+ struct st_tep_variant *tepv;
+
+ /* Search for existing variant */
+ for (tepv = sttep->variants; tepv; tepv = tepv->next) {
+ if (memcmp(&tepv->key, key, sizeof(*key)) == 0) {
+ break;
+ }
+ }
+
+ if (!tepv) {
+ /* create new */
+ tepv = st_translate_tesseval_program(st, sttep, key);
+ if (tepv) {
+ /* insert into list */
+ tepv->next = sttep->variants;
+ sttep->variants = tepv;
+ }
+ }
+
+ return tepv;
+}
+
+
+/**
* Vert/Geom/Frag programs have per-context variants. Free all the
* variants attached to the given program which match the given context.
*/
@@ -1240,6 +1519,48 @@ destroy_program_variants(struct st_context *st, struct gl_program *program)
}
}
break;
+ case GL_TESS_CONTROL_PROGRAM_NV:
+ {
+ struct st_tessctrl_program *sttcp =
+ (struct st_tessctrl_program *) program;
+ struct st_tcp_variant *tcpv, **prevPtr = &sttcp->variants;
+
+ for (tcpv = sttcp->variants; tcpv; ) {
+ struct st_tcp_variant *next = tcpv->next;
+ if (tcpv->key.st == st) {
+ /* unlink from list */
+ *prevPtr = next;
+ /* destroy this variant */
+ delete_tcp_variant(st, tcpv);
+ }
+ else {
+ prevPtr = &tcpv->next;
+ }
+ tcpv = next;
+ }
+ }
+ break;
+ case GL_TESS_EVALUATION_PROGRAM_NV:
+ {
+ struct st_tesseval_program *sttep =
+ (struct st_tesseval_program *) program;
+ struct st_tep_variant *tepv, **prevPtr = &sttep->variants;
+
+ for (tepv = sttep->variants; tepv; ) {
+ struct st_tep_variant *next = tepv->next;
+ if (tepv->key.st == st) {
+ /* unlink from list */
+ *prevPtr = next;
+ /* destroy this variant */
+ delete_tep_variant(st, tepv);
+ }
+ else {
+ prevPtr = &tepv->next;
+ }
+ tepv = next;
+ }
+ }
+ break;
default:
_mesa_problem(NULL, "Unexpected program target 0x%x in "
"destroy_program_variants_cb()", program->Target);
@@ -1276,6 +1597,8 @@ destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
case GL_VERTEX_SHADER:
case GL_FRAGMENT_SHADER:
case GL_GEOMETRY_SHADER:
+ case GL_TESS_CONTROL_SHADER:
+ case GL_TESS_EVALUATION_SHADER:
{
destroy_program_variants(st, shader->Program);
}
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index bb77eb6ed65..7013993fe38 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -188,7 +188,7 @@ struct st_gp_variant_key
*/
struct st_gp_variant
{
- /* Parameters which generated this translated version of a vertex */
+ /* Parameters which generated this variant. */
struct st_gp_variant_key key;
void *driver_shader;
@@ -210,6 +210,76 @@ struct st_geometry_program
+/** Tessellation control program variant key */
+struct st_tcp_variant_key
+{
+ struct st_context *st; /**< variants are per-context */
+ /* no other fields yet */
+};
+
+
+/**
+ * Tessellation control program variant.
+ */
+struct st_tcp_variant
+{
+ /* Parameters which generated this variant. */
+ struct st_tcp_variant_key key;
+
+ void *driver_shader;
+
+ struct st_tcp_variant *next;
+};
+
+
+/**
+ * Derived from Mesa gl_tess_ctrl_program:
+ */
+struct st_tessctrl_program
+{
+ struct gl_tess_ctrl_program Base; /**< The Mesa tess ctrl program */
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+ struct st_tcp_variant *variants;
+};
+
+
+
+/** Tessellation evaluation program variant key */
+struct st_tep_variant_key
+{
+ struct st_context *st; /**< variants are per-context */
+ /* no other fields yet */
+};
+
+
+/**
+ * Tessellation evaluation program variant.
+ */
+struct st_tep_variant
+{
+ /* Parameters which generated this variant. */
+ struct st_tep_variant_key key;
+
+ void *driver_shader;
+
+ struct st_tep_variant *next;
+};
+
+
+/**
+ * Derived from Mesa gl_tess_eval_program:
+ */
+struct st_tesseval_program
+{
+ struct gl_tess_eval_program Base; /**< The Mesa tess eval program */
+ struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+ struct st_tep_variant *variants;
+};
+
+
+
static inline struct st_fragment_program *
st_fragment_program( struct gl_fragment_program *fp )
{
@@ -229,6 +299,18 @@ st_geometry_program( struct gl_geometry_program *gp )
return (struct st_geometry_program *)gp;
}
+static inline struct st_tessctrl_program *
+st_tessctrl_program( struct gl_tess_ctrl_program *tcp )
+{
+ return (struct st_tessctrl_program *)tcp;
+}
+
+static inline struct st_tesseval_program *
+st_tesseval_program( struct gl_tess_eval_program *tep )
+{
+ return (struct st_tesseval_program *)tep;
+}
+
static inline void
st_reference_vertprog(struct st_context *st,
struct st_vertex_program **ptr,
@@ -259,6 +341,26 @@ st_reference_fragprog(struct st_context *st,
(struct gl_program *) prog);
}
+static inline void
+st_reference_tesscprog(struct st_context *st,
+ struct st_tessctrl_program **ptr,
+ struct st_tessctrl_program *prog)
+{
+ _mesa_reference_program(st->ctx,
+ (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
+static inline void
+st_reference_tesseprog(struct st_context *st,
+ struct st_tesseval_program **ptr,
+ struct st_tesseval_program *prog)
+{
+ _mesa_reference_program(st->ctx,
+ (struct gl_program **) ptr,
+ (struct gl_program *) prog);
+}
+
/**
* This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots.
*/
@@ -302,6 +404,16 @@ st_get_gp_variant(struct st_context *st,
struct st_geometry_program *stgp,
const struct st_gp_variant_key *key);
+extern struct st_tcp_variant *
+st_get_tcp_variant(struct st_context *st,
+ struct st_tessctrl_program *stgp,
+ const struct st_tcp_variant_key *key);
+
+extern struct st_tep_variant *
+st_get_tep_variant(struct st_context *st,
+ struct st_tesseval_program *stgp,
+ const struct st_tep_variant_key *key);
+
extern void
st_prepare_vertex_program(struct gl_context *ctx,
@@ -325,6 +437,14 @@ st_release_gp_variants(struct st_context *st,
struct st_geometry_program *stgp);
extern void
+st_release_tcp_variants(struct st_context *st,
+ struct st_tessctrl_program *stgp);
+
+extern void
+st_release_tep_variants(struct st_context *st,
+ struct st_tesseval_program *stgp);
+
+extern void
st_destroy_program_variants(struct st_context *st);
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 6beb21e3389..52b094330b9 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -462,6 +462,11 @@ st_texture_get_sampler_view(struct st_context *st,
return free;
}
+
+/**
+ * For the given texture object, release any sampler views which belong
+ * to the calling context.
+ */
void
st_texture_release_sampler_view(struct st_context *st,
struct st_texture_object *stObj)
@@ -478,6 +483,11 @@ st_texture_release_sampler_view(struct st_context *st,
}
}
+
+/**
+ * Release all sampler views attached to the given texture object, regardless
+ * of the context.
+ */
void
st_texture_release_all_sampler_views(struct st_context *st,
struct st_texture_object *stObj)
diff --git a/src/mesa/swrast/s_aaline.c b/src/mesa/swrast/s_aaline.c
index f3258e813a6..de5b42b9f6b 100644
--- a/src/mesa/swrast/s_aaline.c
+++ b/src/mesa/swrast/s_aaline.c
@@ -116,11 +116,11 @@ compute_plane(GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1,
const GLfloat b = pz * py;
const GLfloat c = px * px + py * py;
const GLfloat d = -(a * x0 + b * y0 + c * z0);
- if (a == 0.0 && b == 0.0 && c == 0.0 && d == 0.0) {
- plane[0] = 0.0;
- plane[1] = 0.0;
- plane[2] = 1.0;
- plane[3] = 0.0;
+ if (a == 0.0F && b == 0.0F && c == 0.0F && d == 0.0F) {
+ plane[0] = 0.0F;
+ plane[1] = 0.0F;
+ plane[2] = 1.0F;
+ plane[3] = 0.0F;
}
else {
plane[0] = a;
@@ -135,9 +135,9 @@ compute_plane(GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1,
static inline void
constant_plane(GLfloat value, GLfloat plane[4])
{
- plane[0] = 0.0;
- plane[1] = 0.0;
- plane[2] = -1.0;
+ plane[0] = 0.0F;
+ plane[1] = 0.0F;
+ plane[2] = -1.0F;
plane[3] = value;
}
@@ -160,8 +160,8 @@ static inline GLfloat
solve_plane_recip(GLfloat x, GLfloat y, const GLfloat plane[4])
{
const GLfloat denom = plane[3] + plane[0] * x + plane[1] * y;
- if (denom == 0.0)
- return 0.0;
+ if (denom == 0.0F)
+ return 0.0F;
else
return -plane[2] / denom;
}
@@ -374,7 +374,7 @@ segment(struct gl_context *ctx,
if (x0 < x1) {
xLeft = x0 - line->halfWidth;
xRight = x1 + line->halfWidth;
- if (line->dy >= 0.0) {
+ if (line->dy >= 0.0F) {
yBot = y0 - 3.0F * line->halfWidth;
yTop = y0 + line->halfWidth;
}
@@ -386,7 +386,7 @@ segment(struct gl_context *ctx,
else {
xLeft = x1 - line->halfWidth;
xRight = x0 + line->halfWidth;
- if (line->dy <= 0.0) {
+ if (line->dy <= 0.0F) {
yBot = y1 - 3.0F * line->halfWidth;
yTop = y1 + line->halfWidth;
}
@@ -420,7 +420,7 @@ segment(struct gl_context *ctx,
if (y0 < y1) {
yBot = y0 - line->halfWidth;
yTop = y1 + line->halfWidth;
- if (line->dx >= 0.0) {
+ if (line->dx >= 0.0F) {
xLeft = x0 - 3.0F * line->halfWidth;
xRight = x0 + line->halfWidth;
}
@@ -432,7 +432,7 @@ segment(struct gl_context *ctx,
else {
yBot = y1 - line->halfWidth;
yTop = y0 + line->halfWidth;
- if (line->dx <= 0.0) {
+ if (line->dx <= 0.0F) {
xLeft = x1 - 3.0F * line->halfWidth;
xRight = x1 + line->halfWidth;
}
diff --git a/src/mesa/swrast/s_aalinetemp.h b/src/mesa/swrast/s_aalinetemp.h
index f1d078fd89b..bebb131a5d1 100644
--- a/src/mesa/swrast/s_aalinetemp.h
+++ b/src/mesa/swrast/s_aalinetemp.h
@@ -44,7 +44,7 @@ NAME(plot)(struct gl_context *ctx, struct LineInfo *line, int ix, int iy)
(void) swrast;
- if (coverage == 0.0)
+ if (coverage == 0.0F)
return;
line->span.end++;
@@ -123,7 +123,7 @@ NAME(line)(struct gl_context *ctx, const SWvertex *v0, const SWvertex *v1)
ctx->Const.MinLineWidthAA,
ctx->Const.MaxLineWidthAA);
- if (line.len == 0.0 || IS_INF_OR_NAN(line.len))
+ if (line.len == 0.0F || IS_INF_OR_NAN(line.len))
return;
INIT_SPAN(line.span, GL_LINE);
diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c
index 9e029db25ce..2974deed41b 100644
--- a/src/mesa/swrast/s_atifragshader.c
+++ b/src/mesa/swrast/s_atifragshader.c
@@ -436,13 +436,13 @@ execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
for (i = 0; i < 3; i++) {
dst[optype][i] =
(src[optype][2][i] >
- 0.5) ? src[optype][0][i] : src[optype][1][i];
+ 0.5F) ? src[optype][0][i] : src[optype][1][i];
}
}
else {
dst[optype][3] =
(src[optype][2][3] >
- 0.5) ? src[optype][0][3] : src[optype][1][3];
+ 0.5F) ? src[optype][0][3] : src[optype][1][3];
}
break;
diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c
index 68c83e44e12..0dbccc0f61d 100644
--- a/src/mesa/swrast/s_copypix.c
+++ b/src/mesa/swrast/s_copypix.c
@@ -27,6 +27,7 @@
#include "main/context.h"
#include "main/condrender.h"
#include "main/macros.h"
+#include "main/blit.h"
#include "main/pixeltransfer.h"
#include "main/imports.h"
@@ -51,20 +52,9 @@ regions_overlap(GLint srcx, GLint srcy,
GLint width, GLint height,
GLfloat zoomX, GLfloat zoomY)
{
- if (zoomX == 1.0 && zoomY == 1.0) {
- /* no zoom */
- if (srcx >= dstx + width || (srcx + width <= dstx)) {
- return GL_FALSE;
- }
- else if (srcy < dsty) { /* this is OK */
- return GL_FALSE;
- }
- else if (srcy > dsty + height) {
- return GL_FALSE;
- }
- else {
- return GL_TRUE;
- }
+ if (zoomX == 1.0F && zoomY == 1.0F) {
+ return _mesa_regions_overlap(srcx, srcy, srcx + width, srcy + height,
+ dstx, dsty, dstx + width, dsty + height);
}
else {
/* add one pixel of slop when zooming, just to be safe */
@@ -211,8 +201,8 @@ scale_and_bias_z(struct gl_context *ctx, GLuint width,
GLuint i;
if (depthMax <= 0xffffff &&
- ctx->Pixel.DepthScale == 1.0 &&
- ctx->Pixel.DepthBias == 0.0) {
+ ctx->Pixel.DepthScale == 1.0F &&
+ ctx->Pixel.DepthBias == 0.0F) {
/* no scale or bias and no clamping and no worry of overflow */
const GLfloat depthMaxF = ctx->DrawBuffer->_DepthMaxF;
for (i = 0; i < width; i++) {
diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
index 134f897c039..ffadc05a732 100644
--- a/src/mesa/swrast/s_depth.c
+++ b/src/mesa/swrast/s_depth.c
@@ -419,8 +419,8 @@ _swrast_depth_bounds_test( struct gl_context *ctx, SWspan *span )
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct gl_renderbuffer *rb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
GLubyte *zStart;
- GLuint zMin = (GLuint) (ctx->Depth.BoundsMin * fb->_DepthMaxF + 0.5F);
- GLuint zMax = (GLuint) (ctx->Depth.BoundsMax * fb->_DepthMaxF + 0.5F);
+ GLuint zMin = (GLuint)((double)ctx->Depth.BoundsMin * 0xffffffff);
+ GLuint zMax = (GLuint)((double)ctx->Depth.BoundsMax * 0xffffffff);
GLubyte *mask = span->array->mask;
const GLuint count = span->end;
GLuint i;
@@ -444,6 +444,16 @@ _swrast_depth_bounds_test( struct gl_context *ctx, SWspan *span )
zBufferVals = (const GLuint *) zStart;
}
else {
+ /* Round the bounds to the precision of the zbuffer. */
+ if (rb->Format == MESA_FORMAT_Z_UNORM16) {
+ zMin = (zMin & 0xffff0000) | (zMin >> 16);
+ zMax = (zMax & 0xffff0000) | (zMax >> 16);
+ } else {
+ /* 24 bits */
+ zMin = (zMin & 0xffffff00) | (zMin >> 24);
+ zMax = (zMax & 0xffffff00) | (zMax >> 24);
+ }
+
/* unpack Z values into a temporary array */
if (span->arrayMask & SPAN_XY) {
get_z32_values(ctx, rb, count, span->array->x, span->array->y,
diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c
index fb677ee1b16..dc6827ede9f 100644
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -264,7 +264,7 @@ draw_stencil_pixels( struct gl_context *ctx, GLint x, GLint y,
const struct gl_pixelstore_attrib *unpack,
const GLvoid *pixels )
{
- const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+ const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
const GLenum destType = GL_UNSIGNED_BYTE;
GLint row;
GLubyte *values;
@@ -309,8 +309,8 @@ draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y,
const GLvoid *pixels )
{
const GLboolean scaleOrBias
- = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
- const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+ = ctx->Pixel.DepthScale != 1.0f || ctx->Pixel.DepthBias != 0.0f;
+ const GLboolean zoom = ctx->Pixel.ZoomX != 1.0f || ctx->Pixel.ZoomY != 1.0f;
SWspan span;
INIT_SPAN(span, GL_BITMAP);
@@ -415,7 +415,7 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
const GLvoid *pixels )
{
const GLint imgX = x, imgY = y;
- const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
+ const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
GLbitfield transferOps = ctx->_ImageTransferState;
SWspan span;
@@ -601,10 +601,10 @@ draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
{
const GLint imgX = x, imgY = y;
const GLboolean scaleOrBias
- = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+ = ctx->Pixel.DepthScale != 1.0F || ctx->Pixel.DepthBias != 0.0F;
const GLuint stencilMask = ctx->Stencil.WriteMask[0];
const GLenum stencilType = GL_UNSIGNED_BYTE;
- const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+ const GLboolean zoom = ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F;
struct gl_renderbuffer *depthRb, *stencilRb;
struct gl_pixelstore_attrib clippedUnpack = *unpack;
diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
index 175915a5a0b..4fbf66b9db7 100644
--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -243,9 +243,9 @@ run_program(struct gl_context *ctx, SWspan *span, GLuint start, GLuint end)
/* Store result depth/z */
if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
const GLfloat depth = machine->Outputs[FRAG_RESULT_DEPTH][2];
- if (depth <= 0.0)
+ if (depth <= 0.0F)
span->array->z[i] = 0;
- else if (depth >= 1.0)
+ else if (depth >= 1.0F)
span->array->z[i] = ctx->DrawBuffer->_DepthMax;
else
span->array->z[i] =
diff --git a/src/mesa/swrast/s_lines.c b/src/mesa/swrast/s_lines.c
index 58bd2fc720a..ab8da7db289 100644
--- a/src/mesa/swrast/s_lines.c
+++ b/src/mesa/swrast/s_lines.c
@@ -241,7 +241,7 @@ _swrast_choose_line( struct gl_context *ctx )
USE(general_line);
}
else if (ctx->Depth.Test
- || ctx->Line.Width != 1.0
+ || ctx->Line.Width != 1.0F
|| ctx->Line.StippleFlag) {
/* no texture, but Z, fog, width>1, stipple, etc. */
#if CHAN_BITS == 32
@@ -252,7 +252,7 @@ _swrast_choose_line( struct gl_context *ctx )
}
else {
assert(!ctx->Depth.Test);
- assert(ctx->Line.Width == 1.0);
+ assert(ctx->Line.Width == 1.0F);
/* simple lines */
USE(simple_no_z_rgba_line);
}
diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c
index 2212c95fa9a..d9aae73302c 100644
--- a/src/mesa/swrast/s_points.c
+++ b/src/mesa/swrast/s_points.c
@@ -208,9 +208,9 @@ sprite_point(struct gl_context *ctx, const SWvertex *vert)
else {
/* even size */
/* 0.501 factor allows conformance to pass */
- xmin = (GLint) (x + 0.501) - iRadius;
+ xmin = (GLint) (x + 0.501F) - iRadius;
xmax = xmin + iSize - 1;
- ymin = (GLint) (y + 0.501) - iRadius;
+ ymin = (GLint) (y + 0.501F) - iRadius;
ymax = ymin + iSize - 1;
}
@@ -423,9 +423,9 @@ large_point(struct gl_context *ctx, const SWvertex *vert)
else {
/* even size */
/* 0.501 factor allows conformance to pass */
- xmin = (GLint) (x + 0.501) - iRadius;
+ xmin = (GLint) (x + 0.501F) - iRadius;
xmax = xmin + iSize - 1;
- ymin = (GLint) (y + 0.501) - iRadius;
+ ymin = (GLint) (y + 0.501F) - iRadius;
ymax = ymin + iSize - 1;
}
@@ -552,7 +552,7 @@ _swrast_choose_point(struct gl_context *ctx)
else if (ctx->Point.SmoothFlag) {
swrast->Point = smooth_point;
}
- else if (size > 1.0 ||
+ else if (size > 1.0F ||
ctx->Point._Attenuated ||
ctx->VertexProgram.PointSizeEnabled) {
swrast->Point = large_point;
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index 3db10e163d7..cd939ba9510 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -506,7 +506,7 @@ interpolate_texcoords(struct gl_context *ctx, SWspan *span)
/* LOD is calculated directly in the ansiotropic filter, we can
* skip the normal lambda function as the result is ignored.
*/
- if (samp->MaxAnisotropy > 1.0 &&
+ if (samp->MaxAnisotropy > 1.0F &&
samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
needLambda = GL_FALSE;
}
@@ -886,16 +886,16 @@ apply_aa_coverage(SWspan *span)
GLubyte (*rgba)[4] = span->array->rgba8;
for (i = 0; i < span->end; i++) {
const GLfloat a = rgba[i][ACOMP] * coverage[i];
- rgba[i][ACOMP] = (GLubyte) CLAMP(a, 0.0, 255.0);
- assert(coverage[i] >= 0.0);
- assert(coverage[i] <= 1.0);
+ rgba[i][ACOMP] = (GLubyte) CLAMP(a, 0.0F, 255.0F);
+ assert(coverage[i] >= 0.0F);
+ assert(coverage[i] <= 1.0F);
}
}
else if (span->array->ChanType == GL_UNSIGNED_SHORT) {
GLushort (*rgba)[4] = span->array->rgba16;
for (i = 0; i < span->end; i++) {
const GLfloat a = rgba[i][ACOMP] * coverage[i];
- rgba[i][ACOMP] = (GLushort) CLAMP(a, 0.0, 65535.0);
+ rgba[i][ACOMP] = (GLushort) CLAMP(a, 0.0F, 65535.0F);
}
}
else {
diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c
index 453bd36367b..da4a013634c 100644
--- a/src/mesa/swrast/s_texcombine.c
+++ b/src/mesa/swrast/s_texcombine.c
@@ -670,8 +670,8 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span )
}
}
- if (samp->MinLod != -1000.0 ||
- samp->MaxLod != 1000.0) {
+ if (samp->MinLod != -1000.0F ||
+ samp->MaxLod != 1000.0F) {
/* apply LOD clamping to lambda */
const GLfloat min = samp->MinLod;
const GLfloat max = samp->MaxLod;
@@ -682,7 +682,7 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span )
}
}
}
- else if (samp->MaxAnisotropy > 1.0 &&
+ else if (samp->MaxAnisotropy > 1.0F &&
samp->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
/* sample_lambda_2d_aniso is beeing used as texture_sample_func,
* it requires the current SWspan *span as an additional parameter.
diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c
index abc1727cf29..314170fc751 100644
--- a/src/mesa/swrast/s_texfilter.c
+++ b/src/mesa/swrast/s_texfilter.c
@@ -1902,7 +1902,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx,
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
const GLboolean adjustLOD =
(texUnit->LodBias + samp->LodBias != 0.0F)
- || (samp->MinLod != -1000.0 || samp->MaxLod != 1000.0);
+ || (samp->MinLod != -1000.0F || samp->MaxLod != 1000.0F);
GLuint i;
@@ -1973,8 +1973,8 @@ sample_lambda_2d_aniso(struct gl_context *ctx,
ctx->Const.MaxTextureLodBias);
lod += bias;
- if (samp->MinLod != -1000.0 ||
- samp->MaxLod != 1000.0) {
+ if (samp->MinLod != -1000.0F ||
+ samp->MaxLod != 1000.0F) {
/* apply LOD clamping to lambda */
lod = CLAMP(lod, samp->MinLod, samp->MaxLod);
}
@@ -3713,7 +3713,7 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx,
const struct gl_sampler_object *sampler)
{
if (!t || !_mesa_is_texture_complete(t, sampler)) {
- return &null_sample_func;
+ return null_sample_func;
}
else {
const GLboolean needLambda =
@@ -3722,32 +3722,32 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx,
switch (t->Target) {
case GL_TEXTURE_1D:
if (is_depth_texture(t)) {
- return &sample_depth_texture;
+ return sample_depth_texture;
}
else if (needLambda) {
- return &sample_lambda_1d;
+ return sample_lambda_1d;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_1d;
+ return sample_linear_1d;
}
else {
assert(sampler->MinFilter == GL_NEAREST);
- return &sample_nearest_1d;
+ return sample_nearest_1d;
}
case GL_TEXTURE_2D:
if (is_depth_texture(t)) {
- return &sample_depth_texture;
+ return sample_depth_texture;
}
else if (needLambda) {
/* Anisotropic filtering extension. Activated only if mipmaps are used */
- if (sampler->MaxAnisotropy > 1.0 &&
+ if (sampler->MaxAnisotropy > 1.0F &&
sampler->MinFilter == GL_LINEAR_MIPMAP_LINEAR) {
- return &sample_lambda_2d_aniso;
+ return sample_lambda_2d_aniso;
}
- return &sample_lambda_2d;
+ return sample_lambda_2d;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_2d;
+ return sample_linear_2d;
}
else {
/* check for a few optimized cases */
@@ -3772,72 +3772,72 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx,
}
case GL_TEXTURE_3D:
if (needLambda) {
- return &sample_lambda_3d;
+ return sample_lambda_3d;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_3d;
+ return sample_linear_3d;
}
else {
assert(sampler->MinFilter == GL_NEAREST);
- return &sample_nearest_3d;
+ return sample_nearest_3d;
}
case GL_TEXTURE_CUBE_MAP:
if (needLambda) {
- return &sample_lambda_cube;
+ return sample_lambda_cube;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_cube;
+ return sample_linear_cube;
}
else {
assert(sampler->MinFilter == GL_NEAREST);
- return &sample_nearest_cube;
+ return sample_nearest_cube;
}
case GL_TEXTURE_RECTANGLE_NV:
if (is_depth_texture(t)) {
- return &sample_depth_texture;
+ return sample_depth_texture;
}
else if (needLambda) {
- return &sample_lambda_rect;
+ return sample_lambda_rect;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_rect;
+ return sample_linear_rect;
}
else {
assert(sampler->MinFilter == GL_NEAREST);
- return &sample_nearest_rect;
+ return sample_nearest_rect;
}
case GL_TEXTURE_1D_ARRAY_EXT:
if (is_depth_texture(t)) {
- return &sample_depth_texture;
+ return sample_depth_texture;
}
else if (needLambda) {
- return &sample_lambda_1d_array;
+ return sample_lambda_1d_array;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_1d_array;
+ return sample_linear_1d_array;
}
else {
assert(sampler->MinFilter == GL_NEAREST);
- return &sample_nearest_1d_array;
+ return sample_nearest_1d_array;
}
case GL_TEXTURE_2D_ARRAY_EXT:
if (is_depth_texture(t)) {
- return &sample_depth_texture;
+ return sample_depth_texture;
}
else if (needLambda) {
- return &sample_lambda_2d_array;
+ return sample_lambda_2d_array;
}
else if (sampler->MinFilter == GL_LINEAR) {
- return &sample_linear_2d_array;
+ return sample_linear_2d_array;
}
else {
assert(sampler->MinFilter == GL_NEAREST);
- return &sample_nearest_2d_array;
+ return sample_nearest_2d_array;
}
default:
_mesa_problem(ctx,
"invalid target in _swrast_choose_texture_sample_func");
- return &null_sample_func;
+ return null_sample_func;
}
}
}
diff --git a/src/mesa/swrast/s_tritemp.h b/src/mesa/swrast/s_tritemp.h
index fddbbfd99d6..1d71839713c 100644
--- a/src/mesa/swrast/s_tritemp.h
+++ b/src/mesa/swrast/s_tritemp.h
@@ -242,7 +242,7 @@ static void NAME(struct gl_context *ctx, const SWvertex *v0,
if (IS_INF_OR_NAN(area) || area == 0.0F)
return;
- if (area * bf * swrast->_BackfaceCullSign < 0.0)
+ if (area * bf * swrast->_BackfaceCullSign < 0.0F)
return;
oneOverArea = 1.0F / area;
diff --git a/src/mesa/swrast/s_zoom.c b/src/mesa/swrast/s_zoom.c
index 9879e2a5f10..34b8eb19657 100644
--- a/src/mesa/swrast/s_zoom.c
+++ b/src/mesa/swrast/s_zoom.c
@@ -114,7 +114,7 @@ unzoom_x(GLfloat zoomX, GLint imageX, GLint zx)
(zx - imageX) / zoomX = x - imageX;
*/
GLint x;
- if (zoomX < 0.0)
+ if (zoomX < 0.0F)
zx++;
x = imageX + (GLint) ((zx - imageX) / zoomX);
return x;
diff --git a/src/mesa/swrast_setup/ss_tritmp.h b/src/mesa/swrast_setup/ss_tritmp.h
index c38c76a4adb..adb77bd3247 100644
--- a/src/mesa/swrast_setup/ss_tritmp.h
+++ b/src/mesa/swrast_setup/ss_tritmp.h
@@ -58,7 +58,7 @@ static void TAG(triangle)(struct gl_context *ctx, GLuint e0, GLuint e1, GLuint e
if (IND & (SS_TWOSIDE_BIT | SS_UNFILLED_BIT))
{
- facing = (cc < 0.0) ^ ctx->Polygon._FrontBit;
+ facing = (cc < 0.0F) ^ ctx->Polygon._FrontBit;
if (IND & SS_UNFILLED_BIT)
mode = facing ? ctx->Polygon.BackMode : ctx->Polygon.FrontMode;
@@ -138,7 +138,7 @@ static void TAG(triangle)(struct gl_context *ctx, GLuint e0, GLuint e1, GLuint e
* so no MRD value is used here.
*/
offset = ctx->Polygon.OffsetUnits;
- if (cc * cc > 1e-16) {
+ if (cc * cc > 1e-16F) {
const GLfloat ez = z[0] - z[2];
const GLfloat fz = z[1] - z[2];
const GLfloat oneOverArea = 1.0F / cc;
diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c
index bc77ba8bf95..b5c0b3e1f5b 100644
--- a/src/mesa/tnl/t_context.c
+++ b/src/mesa/tnl/t_context.c
@@ -190,7 +190,7 @@ _tnl_InvalidateState( struct gl_context *ctx, GLuint new_state )
}
if (new_state & (_NEW_VIEWPORT | _NEW_BUFFERS)) {
- double scale[3], translate[3];
+ float scale[3], translate[3];
_mesa_get_viewport_xform(ctx, 0, scale, translate);
_math_matrix_viewport(&tnl->_WindowMap, scale, translate,
ctx->DrawBuffer->_DepthMaxF);
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c
index 6adf1dce676..c130ab3f93d 100644
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -257,7 +257,7 @@ static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx,
GLuint i;
for (i = 0; i < count; i++) {
- *bptr++ = ((GLfloat *)ptr)[0] == 1.0;
+ *bptr++ = ((GLfloat *)ptr)[0] == 1.0F;
ptr += stride;
}
@@ -425,6 +425,7 @@ void _tnl_draw_prims(struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
@@ -451,7 +452,7 @@ void _tnl_draw_prims(struct gl_context *ctx,
printf("%s %d..%d\n", __func__, min_index, max_index);
for (i = 0; i < nr_prims; i++)
printf("prim %d: %s start %d count %d\n", i,
- _mesa_lookup_enum_by_nr(prim[i].mode),
+ _mesa_enum_to_string(prim[i].mode),
prim[i].start,
prim[i].count);
}
diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
index d4b45bac9ac..4bd9ac8539e 100644
--- a/src/mesa/tnl/t_rasterpos.c
+++ b/src/mesa/tnl/t_rasterpos.c
@@ -148,7 +148,7 @@ shade_rastpos(struct gl_context *ctx,
SUB_3V(VP, light->_Position, vertex);
/* d = length(VP) */
d = (GLfloat) LEN_3FV( VP );
- if (d > 1.0e-6) {
+ if (d > 1.0e-6F) {
/* normalize VP */
GLfloat invd = 1.0F / d;
SELF_SCALE_SCALAR_3V(VP, invd);
@@ -172,7 +172,7 @@ shade_rastpos(struct gl_context *ctx,
}
}
- if (attenuation < 1e-3)
+ if (attenuation < 1e-3F)
continue;
n_dot_VP = DOT3( normal, VP );
@@ -219,7 +219,7 @@ shade_rastpos(struct gl_context *ctx,
shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0];
spec_coef = powf(n_dot_h, shine);
- if (spec_coef > 1.0e-10) {
+ if (spec_coef > 1.0e-10F) {
if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) {
ACC_SCALE_SCALAR_3V( specularContrib, spec_coef,
light->_MatSpecular[0]);
@@ -378,7 +378,7 @@ _tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4])
GLfloat eye[4], clip[4], ndc[3], d;
GLfloat *norm, eyenorm[3];
GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
- double scale[3], translate[3];
+ float scale[3], translate[3];
/* apply modelview matrix: eye = MV * obj */
TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj );
diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c
index 1ca72f866b7..5489ed6857f 100644
--- a/src/mesa/tnl/t_vb_fog.c
+++ b/src/mesa/tnl/t_vb_fog.c
@@ -45,8 +45,8 @@ struct fog_stage_data {
#define FOG_STAGE_DATA(stage) ((struct fog_stage_data *)stage->privatePtr)
#define FOG_EXP_TABLE_SIZE 256
-#define FOG_MAX (10.0)
-#define EXP_FOG_MAX .0006595
+#define FOG_MAX (10.0F)
+#define EXP_FOG_MAX .0006595F
#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
static GLfloat inited = 0;
@@ -54,7 +54,7 @@ static GLfloat inited = 0;
#if 1
#define NEG_EXP( result, narg ) \
do { \
- GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR)); \
+ GLfloat f = (GLfloat) (narg * (1.0F / FOG_INCR)); \
GLint k = (GLint) f; \
if (k > FOG_EXP_TABLE_SIZE-2) \
result = (GLfloat) EXP_FOG_MAX; \
diff --git a/src/mesa/tnl/t_vb_light.c b/src/mesa/tnl/t_vb_light.c
index dbd57fa6bfe..029265a4f83 100644
--- a/src/mesa/tnl/t_vb_light.c
+++ b/src/mesa/tnl/t_vb_light.c
@@ -137,23 +137,23 @@ validate_shine_table( struct gl_context *ctx, GLuint side, GLfloat shininess )
break;
m = s->tab;
- m[0] = 0.0;
- if (shininess == 0.0) {
+ m[0] = 0.0F;
+ if (shininess == 0.0F) {
for (j = 1 ; j <= SHINE_TABLE_SIZE ; j++)
- m[j] = 1.0;
+ m[j] = 1.0F;
}
else {
for (j = 1 ; j < SHINE_TABLE_SIZE ; j++) {
- GLdouble t, x = j / (GLfloat) (SHINE_TABLE_SIZE - 1);
- if (x < 0.005) /* underflow check */
- x = 0.005;
- t = pow(x, shininess);
- if (t > 1e-20)
- m[j] = (GLfloat) t;
+ GLfloat t, x = j / (GLfloat) (SHINE_TABLE_SIZE - 1);
+ if (x < 0.005F) /* underflow check */
+ x = 0.005F;
+ t = powf(x, shininess);
+ if (t > 1e-20F)
+ m[j] = t;
else
- m[j] = 0.0;
+ m[j] = 0.0F;
}
- m[SHINE_TABLE_SIZE] = 1.0;
+ m[SHINE_TABLE_SIZE] = 1.0F;
}
s->shininess = shininess;
diff --git a/src/mesa/tnl/t_vb_lighttmp.h b/src/mesa/tnl/t_vb_lighttmp.h
index f8786accbbb..3aebcd4b799 100644
--- a/src/mesa/tnl/t_vb_lighttmp.h
+++ b/src/mesa/tnl/t_vb_lighttmp.h
@@ -112,7 +112,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx,
GLint side;
GLfloat contrib[3];
GLfloat attenuation;
- GLfloat VP[3]; /* unit vector from vertex to light */
+ GLfloat VP[3]; /* unit vector from vertex to light */
GLfloat n_dot_VP; /* n dot VP */
GLfloat *h;
@@ -129,7 +129,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx,
d = (GLfloat) LEN_3FV( VP );
- if (d > 1e-6) {
+ if (d > 1e-6F) {
GLfloat invd = 1.0F / d;
SELF_SCALE_SCALAR_3V(VP, invd);
}
@@ -152,7 +152,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx,
}
}
- if (attenuation < 1e-3)
+ if (attenuation < 1e-3F)
continue; /* this light makes no contribution */
/* Compute dot product or normal and vector from V to light pos */
@@ -204,7 +204,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx,
if (n_dot_h > 0.0F) {
GLfloat spec_coef = lookup_shininess(ctx, side, n_dot_h);
- if (spec_coef > 1.0e-10) {
+ if (spec_coef > 1.0e-10F) {
spec_coef *= attenuation;
ACC_SCALE_SCALAR_3V( spec[side], spec_coef,
light->_MatSpecular[side]);
@@ -283,12 +283,11 @@ static void TAG(light_rgba)( struct gl_context *ctx,
/* Add contribution from each enabled light source */
foreach (light, &ctx->Light.EnabledList) {
-
GLfloat n_dot_h;
GLfloat correction;
GLint side;
GLfloat contrib[3];
- GLfloat attenuation = 1.0;
+ GLfloat attenuation;
GLfloat VP[3]; /* unit vector from vertex to light */
GLfloat n_dot_VP; /* n dot VP */
GLfloat *h;
@@ -302,12 +301,11 @@ static void TAG(light_rgba)( struct gl_context *ctx,
else {
GLfloat d; /* distance from vertex to light */
-
SUB_3V(VP, light->_Position, vertex);
d = (GLfloat) LEN_3FV( VP );
- if ( d > 1e-6) {
+ if (d > 1e-6F) {
GLfloat invd = 1.0F / d;
SELF_SCALE_SCALAR_3V(VP, invd);
}
@@ -330,7 +328,7 @@ static void TAG(light_rgba)( struct gl_context *ctx,
}
}
- if (attenuation < 1e-3)
+ if (attenuation < 1e-3F)
continue; /* this light makes no contribution */
/* Compute dot product or normal and vector from V to light pos */
diff --git a/src/mesa/tnl/t_vb_normals.c b/src/mesa/tnl/t_vb_normals.c
index 9aee1a2fb0b..6fc89c23b33 100644
--- a/src/mesa/tnl/t_vb_normals.c
+++ b/src/mesa/tnl/t_vb_normals.c
@@ -114,7 +114,7 @@ validate_normal_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage)
store->NormalTransform = _mesa_normal_tab[transform | NORM_NORMALIZE];
}
else if (ctx->Transform.RescaleNormals &&
- ctx->_ModelViewInvScale != 1.0) {
+ ctx->_ModelViewInvScale != 1.0F) {
store->NormalTransform = _mesa_normal_tab[transform | NORM_RESCALE];
}
else {
@@ -131,7 +131,7 @@ validate_normal_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage)
store->NormalTransform = _mesa_normal_tab[NORM_NORMALIZE];
}
else if (!ctx->Transform.RescaleNormals &&
- ctx->_ModelViewInvScale != 1.0) {
+ ctx->_ModelViewInvScale != 1.0F) {
store->NormalTransform = _mesa_normal_tab[NORM_RESCALE];
}
else {
diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c
index 4960ac0969e..03e8fcfa196 100644
--- a/src/mesa/tnl/t_vb_render.c
+++ b/src/mesa/tnl/t_vb_render.c
@@ -315,7 +315,7 @@ static GLboolean run_render( struct gl_context *ctx,
if (MESA_VERBOSE & VERBOSE_PRIMS)
_mesa_debug(NULL, "MESA prim %s %d..%d\n",
- _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK),
+ _mesa_enum_to_string(prim & PRIM_MODE_MASK),
start, start+length);
if (length)
diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c
index 2a25a96928f..6c40c868363 100644
--- a/src/mesa/tnl/t_vertex_generic.c
+++ b/src/mesa/tnl/t_vertex_generic.c
@@ -1026,7 +1026,7 @@ void _tnl_generic_interp( struct gl_context *ctx,
if (tnl->NeedNdcCoords) {
const GLfloat *dstclip = VB->ClipPtr->data[edst];
- if (dstclip[3] != 0.0) {
+ if (dstclip[3] != 0.0f) {
const GLfloat w = 1.0f / dstclip[3];
GLfloat pos[4];
diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c
index 30dc1a72080..14e7812ec78 100644
--- a/src/mesa/tnl/t_vertex_sse.c
+++ b/src/mesa/tnl/t_vertex_sse.c
@@ -592,7 +592,7 @@ static GLboolean build_vertex_emit( struct x86_program *p )
break;
case GL_UNSIGNED_SHORT:
default:
- printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE));
+ printf("unknown CHAN_TYPE %s\n", _mesa_enum_to_string(CHAN_TYPE));
return GL_FALSE;
}
break;
diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h
index 8c59ff9e58f..5a9938e7afb 100644
--- a/src/mesa/tnl/tnl.h
+++ b/src/mesa/tnl/tnl.h
@@ -76,7 +76,7 @@ struct _mesa_prim;
struct _mesa_index_buffer;
void
-_tnl_draw_prims( struct gl_context *ctx,
+_tnl_draw_prims(struct gl_context *ctx,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
@@ -84,6 +84,7 @@ _tnl_draw_prims( struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
+ unsigned stream,
struct gl_buffer_object *indirect );
extern void
diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h
index 667e2a6e5d5..7be39541e43 100644
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -1256,7 +1256,7 @@ static GLboolean TAG(validate_render)( struct gl_context *ctx,
}
if (!ok) {
-/* fprintf(stderr, "not ok %s\n", _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK)); */
+/* fprintf(stderr, "not ok %s\n", _mesa_enum_to_string(prim & PRIM_MODE_MASK)); */
return GL_FALSE;
}
}
diff --git a/src/mesa/tnl_dd/t_dd_unfilled.h b/src/mesa/tnl_dd/t_dd_unfilled.h
index 82190c08916..ee15e773c88 100644
--- a/src/mesa/tnl_dd/t_dd_unfilled.h
+++ b/src/mesa/tnl_dd/t_dd_unfilled.h
@@ -60,7 +60,7 @@ static void TAG(unfilled_tri)( struct gl_context *ctx,
}
/* fprintf(stderr, "%s %s %d %d %d\n", __func__, */
-/* _mesa_lookup_enum_by_nr( mode ), */
+/* _mesa_enum_to_string( mode ), */
/* ef[e0], ef[e1], ef[e2]); */
if (mode == GL_POINT) {
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h
index 54dee6c464f..2aaff5df019 100644
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -97,7 +97,8 @@ typedef void (*vbo_draw_func)( struct gl_context *ctx,
GLuint min_index,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount,
- struct gl_buffer_object *indirect );
+ unsigned stream,
+ struct gl_buffer_object *indirect);
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index fd1ffe2f76d..e3eb286e482 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -37,9 +37,9 @@
static GLuint check_size( const GLfloat *attr )
{
- if (attr[3] != 1.0) return 4;
- if (attr[2] != 0.0) return 3;
- if (attr[1] != 0.0) return 2;
+ if (attr[3] != 1.0F) return 4;
+ if (attr[2] != 0.0F) return 3;
+ if (attr[1] != 0.0F) return 2;
return 1;
}
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 72b8206ec23..34d2c1d3d6b 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -255,7 +255,7 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
GLint k;
for (k = 0; k < array->Size; k++) {
if (IS_INF_OR_NAN(f[k]) ||
- f[k] >= 1.0e20 || f[k] <= -1.0e10) {
+ f[k] >= 1.0e20F || f[k] <= -1.0e10F) {
printf("Bad array data:\n");
printf(" Element[%u].%u = %f\n", j, k, f[k]);
printf(" Array %u at %p\n", attrib, (void* ) array);
@@ -263,7 +263,7 @@ check_array_data(struct gl_context *ctx, struct gl_client_array *array,
array->Type, array->Size, array->Stride);
printf(" Address/offset %p in Buffer Object %u\n",
array->Ptr, array->BufferObj->Name);
- f[k] = 1.0; /* XXX replace the bad value! */
+ f[k] = 1.0F; /* XXX replace the bad value! */
}
/*assert(!IS_INF_OR_NAN(f[k]));*/
}
@@ -633,7 +633,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
/* draw one or two prims */
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, primCount, NULL,
- GL_TRUE, start, start + count - 1, NULL, NULL);
+ GL_TRUE, start, start + count - 1, NULL, 0, NULL);
}
}
else {
@@ -644,7 +644,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, 1, NULL,
GL_TRUE, start, start + count - 1,
- NULL, NULL);
+ NULL, 0, NULL);
}
if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
@@ -786,7 +786,7 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count);
+ _mesa_enum_to_string(mode), start, count);
if (!_mesa_validate_DrawArrays(ctx, mode, count))
return;
@@ -813,7 +813,7 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
+ _mesa_enum_to_string(mode), start, count, numInstances);
if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
return;
@@ -839,7 +839,7 @@ vbo_exec_DrawArraysInstancedBaseInstance(GLenum mode, GLint first, GLsizei count
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawArraysInstancedBaseInstance(%s, %d, %d, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), first, count,
+ _mesa_enum_to_string(mode), first, count,
numInstances, baseInstance);
if (!_mesa_validate_DrawArraysInstanced(ctx, mode, first, count,
@@ -990,7 +990,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, 1, &ib,
- index_bounds_valid, start, end, NULL, NULL);
+ index_bounds_valid, start, end, NULL, 0, NULL);
if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
_mesa_flush(ctx);
@@ -1021,8 +1021,8 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx,
"glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, end, count,
- _mesa_lookup_enum_by_nr(type), indices, basevertex);
+ _mesa_enum_to_string(mode), start, end, count,
+ _mesa_enum_to_string(type), indices, basevertex);
if (!_mesa_validate_DrawRangeElements(ctx, mode, start, end, count,
type, indices))
@@ -1099,8 +1099,8 @@ vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
GET_CURRENT_CONTEXT(ctx);
_mesa_debug(ctx,
"glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), start, end, count,
- _mesa_lookup_enum_by_nr(type), indices);
+ _mesa_enum_to_string(mode), start, end, count,
+ _mesa_enum_to_string(type), indices);
}
vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
@@ -1119,8 +1119,8 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices);
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices);
if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
return;
@@ -1141,8 +1141,8 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, basevertex);
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices, basevertex);
if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
return;
@@ -1163,8 +1163,8 @@ vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, numInstances);
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices, numInstances);
if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
numInstances))
@@ -1187,8 +1187,8 @@ vbo_exec_DrawElementsInstancedBaseVertex(GLenum mode, GLsizei count, GLenum type
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsInstancedBaseVertex(%s, %d, %s, %p, %d; %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices,
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices,
numInstances, basevertex);
if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
@@ -1212,8 +1212,8 @@ vbo_exec_DrawElementsInstancedBaseInstance(GLenum mode, GLsizei count, GLenum ty
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsInstancedBaseInstance(%s, %d, %s, %p, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices,
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices,
numInstances, baseInstance);
if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
@@ -1238,8 +1238,8 @@ vbo_exec_DrawElementsInstancedBaseVertexBaseInstance(GLenum mode, GLsizei count,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsInstancedBaseVertexBaseInstance(%s, %d, %s, %p, %d, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices,
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices,
numInstances, basevertex, baseInstance);
if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
@@ -1350,7 +1350,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, primcount, &ib,
- false, ~0, ~0, NULL, NULL);
+ false, ~0, ~0, NULL, 0, NULL);
} else {
/* render one prim at a time */
for (i = 0; i < primcount; i++) {
@@ -1379,7 +1379,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, 1, &ib,
- false, ~0, ~0, NULL, NULL);
+ false, ~0, ~0, NULL, 0, NULL);
}
}
@@ -1464,7 +1464,7 @@ vbo_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, 1, NULL,
- GL_TRUE, 0, 0, obj, NULL);
+ GL_TRUE, 0, 0, obj, stream, NULL);
if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH) {
_mesa_flush(ctx);
@@ -1488,7 +1488,7 @@ vbo_exec_DrawTransformFeedback(GLenum mode, GLuint name)
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawTransformFeedback(%s, %d)\n",
- _mesa_lookup_enum_by_nr(mode), name);
+ _mesa_enum_to_string(mode), name);
vbo_draw_transform_feedback(ctx, mode, obj, 0, 1);
}
@@ -1502,7 +1502,7 @@ vbo_exec_DrawTransformFeedbackStream(GLenum mode, GLuint name, GLuint stream)
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawTransformFeedbackStream(%s, %u, %u)\n",
- _mesa_lookup_enum_by_nr(mode), name, stream);
+ _mesa_enum_to_string(mode), name, stream);
vbo_draw_transform_feedback(ctx, mode, obj, stream, 1);
}
@@ -1517,7 +1517,7 @@ vbo_exec_DrawTransformFeedbackInstanced(GLenum mode, GLuint name,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawTransformFeedbackInstanced(%s, %d)\n",
- _mesa_lookup_enum_by_nr(mode), name);
+ _mesa_enum_to_string(mode), name);
vbo_draw_transform_feedback(ctx, mode, obj, 0, primcount);
}
@@ -1533,7 +1533,7 @@ vbo_exec_DrawTransformFeedbackStreamInstanced(GLenum mode, GLuint name,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawTransformFeedbackStreamInstanced"
"(%s, %u, %u, %i)\n",
- _mesa_lookup_enum_by_nr(mode), name, stream, primcount);
+ _mesa_enum_to_string(mode), name, stream, primcount);
vbo_draw_transform_feedback(ctx, mode, obj, stream, primcount);
}
@@ -1563,7 +1563,7 @@ vbo_validated_drawarraysindirect(struct gl_context *ctx,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, 1,
NULL, GL_TRUE, 0, ~0,
- NULL,
+ NULL, 0,
ctx->DrawIndirectBuffer);
if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
@@ -1603,7 +1603,7 @@ vbo_validated_multidrawarraysindirect(struct gl_context *ctx,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, primcount,
NULL, GL_TRUE, 0, ~0,
- NULL,
+ NULL, 0,
ctx->DrawIndirectBuffer);
free(prim);
@@ -1640,7 +1640,7 @@ vbo_validated_drawelementsindirect(struct gl_context *ctx,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, 1,
&ib, GL_TRUE, 0, ~0,
- NULL,
+ NULL, 0,
ctx->DrawIndirectBuffer);
if (MESA_DEBUG_FLAGS & DEBUG_ALWAYS_FLUSH)
@@ -1689,7 +1689,7 @@ vbo_validated_multidrawelementsindirect(struct gl_context *ctx,
check_buffers_are_unmapped(exec->array.inputs);
vbo->draw_prims(ctx, prim, primcount,
&ib, GL_TRUE, 0, ~0,
- NULL,
+ NULL, 0,
ctx->DrawIndirectBuffer);
free(prim);
@@ -1709,7 +1709,7 @@ vbo_exec_DrawArraysIndirect(GLenum mode, const GLvoid *indirect)
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawArraysIndirect(%s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), indirect);
+ _mesa_enum_to_string(mode), indirect);
if (!_mesa_validate_DrawArraysIndirect(ctx, mode, indirect))
return;
@@ -1725,8 +1725,8 @@ vbo_exec_DrawElementsIndirect(GLenum mode, GLenum type,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glDrawElementsIndirect(%s, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode),
- _mesa_lookup_enum_by_nr(type), indirect);
+ _mesa_enum_to_string(mode),
+ _mesa_enum_to_string(type), indirect);
if (!_mesa_validate_DrawElementsIndirect(ctx, mode, type, indirect))
return;
@@ -1743,7 +1743,7 @@ vbo_exec_MultiDrawArraysIndirect(GLenum mode,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glMultiDrawArraysIndirect(%s, %p, %i, %i)\n",
- _mesa_lookup_enum_by_nr(mode), indirect, primcount, stride);
+ _mesa_enum_to_string(mode), indirect, primcount, stride);
/* If <stride> is zero, the array elements are treated as tightly packed. */
if (stride == 0)
@@ -1768,8 +1768,8 @@ vbo_exec_MultiDrawElementsIndirect(GLenum mode, GLenum type,
if (MESA_VERBOSE & VERBOSE_DRAW)
_mesa_debug(ctx, "glMultiDrawElementsIndirect(%s, %s, %p, %i, %i)\n",
- _mesa_lookup_enum_by_nr(mode),
- _mesa_lookup_enum_by_nr(type), indirect, primcount, stride);
+ _mesa_enum_to_string(mode),
+ _mesa_enum_to_string(type), indirect, primcount, stride);
/* If <stride> is zero, the array elements are treated as tightly packed. */
if (stride == 0)
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 37b53a8309d..2bfb0c32b73 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -412,7 +412,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
GL_TRUE,
0,
exec->vtx.vert_count - 1,
- NULL, NULL);
+ NULL, 0, NULL);
/* If using a real VBO, get new storage -- unless asked not to.
*/
diff --git a/src/mesa/vbo/vbo_primitive_restart.c b/src/mesa/vbo/vbo_primitive_restart.c
index dafc4fd2a9a..0662c5cd4ef 100644
--- a/src/mesa/vbo/vbo_primitive_restart.c
+++ b/src/mesa/vbo/vbo_primitive_restart.c
@@ -251,11 +251,11 @@ vbo_sw_primitive_restart(struct gl_context *ctx,
(temp_prim.count == sub_prim->count)) {
draw_prims_func(ctx, &temp_prim, 1, ib,
GL_TRUE, sub_prim->min_index, sub_prim->max_index,
- NULL, NULL);
+ NULL, 0, NULL);
} else {
draw_prims_func(ctx, &temp_prim, 1, ib,
GL_FALSE, -1, -1,
- NULL, NULL);
+ NULL, 0, NULL);
}
}
if (sub_end_index >= end_index) {
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c
index c3c4b64e65c..24c04ca7e6a 100644
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -258,7 +258,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
GL_TRUE,
0,
max_index - min_index,
- NULL, NULL );
+ NULL, 0, NULL );
ctx->Array._DrawArrays = saved_arrays;
ctx->NewDriverState |= ctx->DriverFlags.NewArray;
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index de744e0c763..b1fd6892026 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -314,7 +314,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
GL_TRUE,
0, /* Node is a VBO, so this is ok */
node->count - 1,
- NULL, NULL);
+ NULL, 0, NULL);
}
}
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c
index 7b1e20b18d2..cb27ef961ab 100644
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -203,7 +203,7 @@ flush( struct copy_context *copy )
GL_TRUE,
0,
copy->dstbuf_nr - 1,
- NULL, NULL );
+ NULL, 0, NULL );
ctx->Array._DrawArrays = saved_arrays;
ctx->NewDriverState |= ctx->DriverFlags.NewArray;
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c
index 5887b74d829..cff4bcd30ff 100644
--- a/src/mesa/vbo/vbo_split_inplace.c
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -94,7 +94,7 @@ static void flush_vertex( struct split_context *split )
!split->ib,
split->min_index,
split->max_index,
- NULL, NULL);
+ NULL, 0, NULL);
ctx->Array._DrawArrays = saved_arrays;
ctx->NewDriverState |= ctx->DriverFlags.NewArray;