diff options
Diffstat (limited to 'src/mesa')
139 files changed, 2990 insertions, 3911 deletions
diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk index 2e308b83733..cd31e148222 100644 --- a/src/mesa/Android.libmesa_dricore.mk +++ b/src/mesa/Android.libmesa_dricore.mk @@ -50,7 +50,7 @@ endif # MESA_ENABLE_ASM ifeq ($(ARCH_X86_HAVE_SSE4_1),true) LOCAL_SRC_FILES += \ main/streaming-load-memcpy.c \ - mesa/main/sse_minmax.c + main/sse_minmax.c LOCAL_CFLAGS := \ -msse4.1 \ -DUSE_SSE41 @@ -60,6 +60,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa/main \ $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/glsl/nir \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk index ed620ac648c..9e150eaa3c0 100644 --- a/src/mesa/Android.libmesa_glsl_utils.mk +++ b/src/mesa/Android.libmesa_glsl_utils.mk @@ -37,6 +37,7 @@ LOCAL_MODULE := libmesa_glsl_utils LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/glsl/nir \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary @@ -62,6 +63,7 @@ LOCAL_CFLAGS := -D_POSIX_C_SOURCE=199309L LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/glsl/nir \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary diff --git a/src/mesa/Android.libmesa_st_mesa.mk b/src/mesa/Android.libmesa_st_mesa.mk index b4b7fd97722..427a35f4f6e 100644 --- a/src/mesa/Android.libmesa_st_mesa.mk +++ b/src/mesa/Android.libmesa_st_mesa.mk @@ -55,6 +55,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa/main \ $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/glsl/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 0915594cea6..34fb4461985 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -415,6 +415,7 @@ STATETRACKER_FILES = \ state_tracker/st_cache.h \ state_tracker/st_cb_bitmap.c \ state_tracker/st_cb_bitmap.h \ + state_tracker/st_cb_bitmap_shader.c \ state_tracker/st_cb_blit.c \ state_tracker/st_cb_blit.h \ state_tracker/st_cb_bufferobjects.c \ @@ -425,6 +426,7 @@ STATETRACKER_FILES = \ state_tracker/st_cb_condrender.h \ state_tracker/st_cb_drawpixels.c \ state_tracker/st_cb_drawpixels.h \ + state_tracker/st_cb_drawpixels_shader.c \ state_tracker/st_cb_drawtex.c \ state_tracker/st_cb_drawtex.h \ state_tracker/st_cb_eglimage.c \ @@ -525,9 +527,7 @@ PROGRAM_FILES = \ program/sampler.h \ program/string_to_uint_map.cpp \ program/symbol_table.c \ - program/symbol_table.h \ - ../glsl/shader_enums.c \ - ../glsl/shader_enums.h + program/symbol_table.h PROGRAM_NIR_FILES = \ program/prog_to_nir.c \ @@ -620,6 +620,7 @@ INCLUDE_DIRS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/glsl \ + -I$(top_srcdir)/src/glsl/nir \ -I$(top_builddir)/src/glsl \ -I$(top_builddir)/src/glsl/nir \ -I$(top_srcdir)/src/glsl/glcpp \ diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 5b80a216fef..c986326d2bf 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -16,6 +16,7 @@ env.Append(CPPPATH = [ '#/src', '#/src/mapi', '#/src/glsl', + '#/src/glsl/nir', '#/src/mesa', '#/src/gallium/include', '#/src/gallium/auxiliary', diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 33490ee6615..04b9cafe308 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -108,7 +108,11 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image, return false; } + assert(tex_obj->Target != 0); + assert(tex_obj->TargetIndex < NUM_TEXTURE_TARGETS); + view_tex_obj->Target = tex_obj->Target; + view_tex_obj->TargetIndex = tex_obj->TargetIndex; *view_tex_image = _mesa_get_tex_image(ctx, view_tex_obj, tex_obj->Target, 0); @@ -129,7 +133,6 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image, view_tex_obj->NumLayers = tex_obj->NumLayers; view_tex_obj->Immutable = tex_obj->Immutable; view_tex_obj->ImmutableLevels = tex_obj->ImmutableLevels; - view_tex_obj->Target = tex_obj->Target; if (ctx->Driver.TextureView != NULL && !ctx->Driver.TextureView(ctx, view_tex_obj, tex_obj)) { diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 1a5943c87fb..59d795998c6 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1315,9 +1315,10 @@ static struct gl_program * i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id) { switch (target) { - case GL_VERTEX_PROGRAM_ARB: - return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), - target, id); + case GL_VERTEX_PROGRAM_ARB: { + struct gl_vertex_program *prog = CALLOC_STRUCT(gl_vertex_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } case GL_FRAGMENT_PROGRAM_ARB:{ struct i915_fragment_program *prog = @@ -1325,8 +1326,7 @@ i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id) if (prog) { i915_init_program(I915_CONTEXT(ctx), prog); - return _mesa_init_fragment_program(ctx, &prog->FragProg, - target, id); + return _mesa_init_gl_program(&prog->FragProg.Base, target, id); } else return NULL; diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index a9b963a9eca..d30a053e10f 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -48,6 +48,7 @@ LOCAL_C_INCLUDES := \ $(MESA_DRI_C_INCLUDES) LOCAL_SRC_FILES := \ + $(i965_compiler_FILES) \ $(i965_FILES) LOCAL_WHOLE_STATIC_LIBRARIES := \ diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 2e241511049..04b3f9cc8ce 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -33,6 +33,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ -I$(top_srcdir)/src/gtest/include \ + -I$(top_srcdir)/src/glsl/nir \ -I$(top_builddir)/src/glsl/nir \ -I$(top_builddir)/src/mesa/drivers/dri/common \ $(DEFINES) \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index b242ab55aae..ccd540dabca 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -1,6 +1,7 @@ i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ + brw_compiler.h \ brw_cubemap_normalize.cpp \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index d458ad846bf..5308d175416 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -32,7 +32,7 @@ brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, generator(brw->intelScreen->compiler, brw, mem_ctx, (void *) rzalloc(mem_ctx, struct brw_wm_prog_key), (struct brw_stage_prog_data *) rzalloc(mem_ctx, struct brw_wm_prog_data), - NULL, 0, false, "BLORP") + 0, false, "BLORP") { if (debug_flag) generator.enable_debug("blorp"); diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 91d53eff5a7..10bcd4bafd4 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -305,6 +305,10 @@ cfg_t::cfg_t(exec_list *instructions) assert(cur_do != NULL && cur_while != NULL); cur->add_successor(mem_ctx, cur_do); + + if (inst->predicate) + cur->add_successor(mem_ctx, cur_while); + set_next_block(&cur, cur_while, ip); /* Pop the stack so we're in the previous loop */ @@ -422,7 +426,11 @@ cfg_t::dump(backend_shader *s) calculate_idom(); foreach_block (block, this) { - fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num); + if (block->idom) + fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num); + else + fprintf(stderr, "START B%d IDOM(none)", block->num); + foreach_list_typed(bblock_link, link, link, &block->parents) { fprintf(stderr, " <-B%d", link->block->num); diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c index 17a745d0373..b0119558c3a 100644 --- a/src/mesa/drivers/dri/i965/brw_clear.c +++ b/src/mesa/drivers/dri/i965/brw_clear.c @@ -241,7 +241,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask) } /* Clear color buffers with fast clear or at least rep16 writes. */ - if (brw->gen >= 6 && brw->gen < 9 && (mask & BUFFER_BITS_COLOR)) { + if (brw->gen >= 6 && (mask & BUFFER_BITS_COLOR)) { if (brw_meta_fast_clear(brw, fb, mask, partial_clear)) { debug_mask("blorp color", mask & BUFFER_BITS_COLOR); mask &= ~BUFFER_BITS_COLOR; diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h new file mode 100644 index 00000000000..11c485d2f08 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -0,0 +1,661 @@ +/* + * Copyright © 2010 - 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "brw_device_info.h" +#include "main/mtypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct ra_regs; +struct nir_shader; +struct brw_geometry_program; +union gl_constant_value; + +struct brw_compiler { + const struct brw_device_info *devinfo; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + } vec4_reg_set; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used, indexed by register size. + */ + int classes[16]; + + /** + * Mapping from classes to ra_reg ranges. Each of the per-size + * classes corresponds to a range of ra_reg nodes. This array stores + * those ranges in the form of first ra_reg in each class and the + * total number of ra_reg elements in the last array element. This + * way the range of the i'th class is given by: + * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] ) + */ + int class_to_ra_reg_range[17]; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + } fs_reg_sets[2]; + + void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); + void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); + + bool scalar_vs; + struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; +}; + + +/** + * Program key structures. + * + * When drawing, we look for the currently bound shaders in the program + * cache. This is essentially a hash table lookup, and these are the keys. + * + * Sometimes OpenGL features specified as state need to be simulated via + * shader code, due to a mismatch between the API and the hardware. This + * is often referred to as "non-orthagonal state" or "NOS". We store NOS + * in the program key so it's considered when searching for a program. If + * we haven't seen a particular combination before, we have to recompile a + * new specialized version. + * + * Shader compilation should not look up state in gl_context directly, but + * instead use the copy in the program key. This guarantees recompiles will + * happen correctly. + * + * @{ + */ + +enum PACKED gen6_gather_sampler_wa { + WA_SIGN = 1, /* whether we need to sign extend */ + WA_8BIT = 2, /* if we have an 8bit format needing wa */ + WA_16BIT = 4, /* if we have a 16bit format needing wa */ +}; + +/** + * Sampler information needed by VS, WM, and GS program cache keys. + */ +struct brw_sampler_prog_key_data { + /** + * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles. + */ + uint16_t swizzles[MAX_SAMPLERS]; + + uint32_t gl_clamp_mask[3]; + + /** + * For RG32F, gather4's channel select is broken. + */ + uint32_t gather_channel_quirk_mask; + + /** + * Whether this sampler uses the compressed multisample surface layout. + */ + uint32_t compressed_multisample_layout_mask; + + /** + * For Sandybridge, which shader w/a we need for gather quirks. + */ + enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS]; +}; + + +/** The program key for Vertex Shaders. */ +struct brw_vs_prog_key { + unsigned program_string_id; + + /* + * Per-attribute workaround flags + */ + uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX]; + + bool copy_edgeflag:1; + + bool clamp_vertex_color:1; + + /** + * How many user clipping planes are being uploaded to the vertex shader as + * push constants. + * + * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to + * clip distances. + */ + unsigned nr_userclip_plane_consts:4; + + /** + * For pre-Gen6 hardware, a bitfield indicating which texture coordinates + * are going to be replaced with point coordinates (as a consequence of a + * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because + * our SF thread requires exact matching between VS outputs and FS inputs, + * these texture coordinates will need to be unconditionally included in + * the VUE, even if they aren't written by the vertex shader. + */ + uint8_t point_coord_replace; + + struct brw_sampler_prog_key_data tex; +}; + +/** The program key for Geometry Shaders. */ +struct brw_gs_prog_key +{ + unsigned program_string_id; + + struct brw_sampler_prog_key_data tex; +}; + +/** The program key for Fragment/Pixel Shaders. */ +struct brw_wm_prog_key { + uint8_t iz_lookup; + bool stats_wm:1; + bool flat_shade:1; + bool persample_shading:1; + bool persample_2x:1; + unsigned nr_color_regions:5; + bool replicate_alpha:1; + bool render_to_fbo:1; + bool clamp_fragment_color:1; + bool compute_pos_offset:1; + bool compute_sample_id:1; + unsigned line_aa:2; + bool high_quality_derivatives:1; + + uint16_t drawable_height; + uint64_t input_slots_valid; + unsigned program_string_id; + GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */ + float alpha_test_ref; + + struct brw_sampler_prog_key_data tex; +}; + +struct brw_cs_prog_key { + uint32_t program_string_id; + struct brw_sampler_prog_key_data tex; +}; + +/* + * Image metadata structure as laid out in the shader parameter + * buffer. Entries have to be 16B-aligned for the vec4 back-end to be + * able to use them. That's okay because the padding and any unused + * entries [most of them except when we're doing untyped surface + * access] will be removed by the uniform packing pass. + */ +#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0 +#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4 +#define BRW_IMAGE_PARAM_SIZE_OFFSET 8 +#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12 +#define BRW_IMAGE_PARAM_TILING_OFFSET 16 +#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20 +#define BRW_IMAGE_PARAM_SIZE 24 + +struct brw_image_param { + /** Surface binding table index. */ + uint32_t surface_idx; + + /** Offset applied to the X and Y surface coordinates. */ + uint32_t offset[2]; + + /** Surface X, Y and Z dimensions. */ + uint32_t size[3]; + + /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in + * pixels, vertical slice stride in pixels. + */ + uint32_t stride[4]; + + /** Log2 of the tiling modulus in the X, Y and Z dimension. */ + uint32_t tiling[3]; + + /** + * Right shift to apply for bit 6 address swizzling. Two different + * swizzles can be specified and will be applied one after the other. The + * resulting address will be: + * + * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^ + * (addr >> swizzling[1]))) + * + * Use \c 0xff if any of the swizzles is not required. + */ + uint32_t swizzling[2]; +}; + +struct brw_stage_prog_data { + struct { + /** size of our binding table. */ + uint32_t size_bytes; + + /** @{ + * surface indices for the various groups of surfaces + */ + uint32_t pull_constants_start; + uint32_t texture_start; + uint32_t gather_texture_start; + uint32_t ubo_start; + uint32_t ssbo_start; + uint32_t abo_start; + uint32_t image_start; + uint32_t shader_time_start; + /** @} */ + } binding_table; + + GLuint nr_params; /**< number of float params/constants */ + GLuint nr_pull_params; + unsigned nr_image_params; + + unsigned curb_read_length; + unsigned total_scratch; + + /** + * Register where the thread expects to find input data from the URB + * (typically uniforms, followed by vertex or fragment attributes). + */ + unsigned dispatch_grf_start_reg; + + bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ + + /* Pointers to tracked values (only valid once + * _mesa_load_state_parameters has been called at runtime). + */ + const union gl_constant_value **param; + const union gl_constant_value **pull_param; + + /** Image metadata passed to the shader as uniforms. */ + struct brw_image_param *image_param; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs. + */ +struct brw_wm_prog_data { + struct brw_stage_prog_data base; + + GLuint num_varying_inputs; + + GLuint dispatch_grf_start_reg_16; + GLuint reg_blocks; + GLuint reg_blocks_16; + + struct { + /** @{ + * surface indices the WM-specific surfaces + */ + uint32_t render_target_start; + /** @} */ + } binding_table; + + uint8_t computed_depth_mode; + + bool early_fragment_tests; + bool no_8; + bool dual_src_blend; + bool uses_pos_offset; + bool uses_omask; + bool uses_kill; + bool pulls_bary; + uint32_t prog_offset_16; + + /** + * Mask of which interpolation modes are required by the fragment shader. + * Used in hardware setup on gen6+. + */ + uint32_t barycentric_interp_modes; + + /** + * Map from gl_varying_slot to the position within the FS setup data + * payload where the varying's attribute vertex deltas should be delivered. + * For varying slots that are not used by the FS, the value is -1. + */ + int urb_setup[VARYING_SLOT_MAX]; +}; + +struct brw_cs_prog_data { + struct brw_stage_prog_data base; + + GLuint dispatch_grf_start_reg_16; + unsigned local_size[3]; + unsigned simd_size; + bool uses_barrier; + bool uses_num_work_groups; + unsigned local_invocation_id_regs; + + struct { + /** @{ + * surface indices the CS-specific surfaces + */ + uint32_t work_groups_start; + /** @} */ + } binding_table; +}; + +/** + * Enum representing the i965-specific vertex results that don't correspond + * exactly to any element of gl_varying_slot. The values of this enum are + * assigned such that they don't conflict with gl_varying_slot. + */ +typedef enum +{ + BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX, + BRW_VARYING_SLOT_PAD, + /** + * Technically this is not a varying but just a placeholder that + * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord + * builtin variable to be compiled correctly. see compile_sf_prog() for + * more info. + */ + BRW_VARYING_SLOT_PNTC, + BRW_VARYING_SLOT_COUNT +} brw_varying_slot; + +/** + * Data structure recording the relationship between the gl_varying_slot enum + * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a + * single octaword within the VUE (128 bits). + * + * Note that each BRW register contains 256 bits (2 octawords), so when + * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two + * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as + * in a vertex shader), each register corresponds to a single VUE slot, since + * it contains data for two separate vertices. + */ +struct brw_vue_map { + /** + * Bitfield representing all varying slots that are (a) stored in this VUE + * map, and (b) actually written by the shader. Does not include any of + * the additional varying slots defined in brw_varying_slot. + */ + GLbitfield64 slots_valid; + + /** + * Is this VUE map for a separate shader pipeline? + * + * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched + * without the linker having a chance to dead code eliminate unused varyings. + * + * This means that we have to use a fixed slot layout, based on the output's + * location field, rather than assigning slots in a compact contiguous block. + */ + bool separate; + + /** + * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are + * not stored in a slot (because they are not written, or because + * additional processing is applied before storing them in the VUE), the + * value is -1. + */ + signed char varying_to_slot[BRW_VARYING_SLOT_COUNT]; + + /** + * Map from VUE slot to gl_varying_slot value. For slots that do not + * directly correspond to a gl_varying_slot, the value comes from + * brw_varying_slot. + * + * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this + * simplifies code that uses the value stored in slot_to_varying to + * create a bit mask). + */ + signed char slot_to_varying[BRW_VARYING_SLOT_COUNT]; + + /** + * Total number of VUE slots in use + */ + int num_slots; +}; + +/** + * Convert a VUE slot number into a byte offset within the VUE. + */ +static inline GLuint brw_vue_slot_to_offset(GLuint slot) +{ + return 16*slot; +} + +/** + * Convert a vertex output (brw_varying_slot) into a byte offset within the + * VUE. + */ +static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map, + GLuint varying) +{ + return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]); +} + +void brw_compute_vue_map(const struct brw_device_info *devinfo, + struct brw_vue_map *vue_map, + GLbitfield64 slots_valid, + bool separate_shader); + +enum shader_dispatch_mode { + DISPATCH_MODE_4X1_SINGLE = 0, + DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, + DISPATCH_MODE_4X2_DUAL_OBJECT = 2, + DISPATCH_MODE_SIMD8 = 3, +}; + +struct brw_vue_prog_data { + struct brw_stage_prog_data base; + struct brw_vue_map vue_map; + + GLuint urb_read_length; + GLuint total_grf; + + /* Used for calculating urb partitions. In the VS, this is the size of the + * URB entry used for both input and output to the thread. In the GS, this + * is the size of the URB entry used for output. + */ + GLuint urb_entry_size; + + enum shader_dispatch_mode dispatch_mode; +}; + +struct brw_vs_prog_data { + struct brw_vue_prog_data base; + + GLbitfield64 inputs_read; + + unsigned nr_attributes; + + bool uses_vertexid; + bool uses_instanceid; +}; + +struct brw_gs_prog_data +{ + struct brw_vue_prog_data base; + + /** + * Size of an output vertex, measured in HWORDS (32 bytes). + */ + unsigned output_vertex_size_hwords; + + unsigned output_topology; + + /** + * Size of the control data (cut bits or StreamID bits), in hwords (32 + * bytes). 0 if there is no control data. + */ + unsigned control_data_header_size_hwords; + + /** + * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID + * if the control data is StreamID bits, or + * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). + * Ignored if control_data_header_size is 0. + */ + unsigned control_data_format; + + bool include_primitive_id; + + /** + * The number of vertices emitted, if constant - otherwise -1. + */ + int static_vertex_count; + + int invocations; + + /** + * Gen6 transform feedback enabled flag. + */ + bool gen6_xfb_enabled; + + /** + * Gen6: Provoking vertex convention for odd-numbered triangles + * in tristrips. + */ + GLuint pv_first:1; + + /** + * Gen6: Number of varyings that are output to transform feedback. + */ + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ + + /** + * Gen6: Map from the index of a transform feedback binding table entry to the + * gl_varying_slot that should be streamed out through that binding table + * entry. + */ + unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */]; + + /** + * Gen6: Map from the index of a transform feedback binding table entry to the + * swizzles that should be used when streaming out data through that + * binding table entry. + */ + unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */]; +}; + + +/** @} */ + +/** + * Compile a vertex shader. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_vs(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_vs_prog_key *key, + struct brw_vs_prog_data *prog_data, + const struct nir_shader *shader, + gl_clip_plane *clip_planes, + bool use_legacy_snorm_formula, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str); + +/** + * Scratch data used when compiling a GLSL geometry shader. + */ +struct brw_gs_compile +{ + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + struct brw_vue_map input_vue_map; + + struct brw_geometry_program *gp; + + unsigned control_data_bits_per_vertex; + unsigned control_data_header_size_bits; +}; + +/** + * Compile a vertex shader. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_gs(const struct brw_compiler *compiler, void *log_data, + struct brw_gs_compile *c, + const struct nir_shader *shader, + struct gl_shader_program *shader_prog, + void *mem_ctx, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str); + +/** + * Compile a fragment shader. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_fs(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_wm_prog_key *key, + struct brw_wm_prog_data *prog_data, + const struct nir_shader *shader, + struct gl_program *prog, + int shader_time_index8, + int shader_time_index16, + bool use_rep_send, + unsigned *final_assembly_size, + char **error_str); + +/** + * Compile a compute shader. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_cs(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + const struct nir_shader *shader, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str); + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6b2bbd21703..3b125448e14 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -51,7 +51,7 @@ #include "brw_context.h" #include "brw_defines.h" -#include "brw_shader.h" +#include "brw_compiler.h" #include "brw_draw.h" #include "brw_state.h" diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index aa1284db3ce..4f503ae4869 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -40,6 +40,7 @@ #include "main/mm.h" #include "main/mtypes.h" #include "brw_structs.h" +#include "brw_compiler.h" #include "intel_aub.h" #include "program/prog_parameter.h" @@ -340,260 +341,6 @@ struct brw_shader { bool compiled_once; }; -struct brw_stage_prog_data { - struct { - /** size of our binding table. */ - uint32_t size_bytes; - - /** @{ - * surface indices for the various groups of surfaces - */ - uint32_t pull_constants_start; - uint32_t texture_start; - uint32_t gather_texture_start; - uint32_t ubo_start; - uint32_t abo_start; - uint32_t image_start; - uint32_t shader_time_start; - /** @} */ - } binding_table; - - GLuint nr_params; /**< number of float params/constants */ - GLuint nr_pull_params; - unsigned nr_image_params; - - unsigned curb_read_length; - unsigned total_scratch; - - /** - * Register where the thread expects to find input data from the URB - * (typically uniforms, followed by vertex or fragment attributes). - */ - unsigned dispatch_grf_start_reg; - - bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ - - /* Pointers to tracked values (only valid once - * _mesa_load_state_parameters has been called at runtime). - */ - const gl_constant_value **param; - const gl_constant_value **pull_param; - - /** Image metadata passed to the shader as uniforms. */ - struct brw_image_param *image_param; -}; - -/* - * Image metadata structure as laid out in the shader parameter - * buffer. Entries have to be 16B-aligned for the vec4 back-end to be - * able to use them. That's okay because the padding and any unused - * entries [most of them except when we're doing untyped surface - * access] will be removed by the uniform packing pass. - */ -#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0 -#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4 -#define BRW_IMAGE_PARAM_SIZE_OFFSET 8 -#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12 -#define BRW_IMAGE_PARAM_TILING_OFFSET 16 -#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20 -#define BRW_IMAGE_PARAM_SIZE 24 - -struct brw_image_param { - /** Surface binding table index. */ - uint32_t surface_idx; - - /** Offset applied to the X and Y surface coordinates. */ - uint32_t offset[2]; - - /** Surface X, Y and Z dimensions. */ - uint32_t size[3]; - - /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in - * pixels, vertical slice stride in pixels. - */ - uint32_t stride[4]; - - /** Log2 of the tiling modulus in the X, Y and Z dimension. */ - uint32_t tiling[3]; - - /** - * Right shift to apply for bit 6 address swizzling. Two different - * swizzles can be specified and will be applied one after the other. The - * resulting address will be: - * - * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^ - * (addr >> swizzling[1]))) - * - * Use \c 0xff if any of the swizzles is not required. - */ - uint32_t swizzling[2]; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs. - */ -struct brw_wm_prog_data { - struct brw_stage_prog_data base; - - GLuint num_varying_inputs; - - GLuint dispatch_grf_start_reg_16; - GLuint reg_blocks; - GLuint reg_blocks_16; - - struct { - /** @{ - * surface indices the WM-specific surfaces - */ - uint32_t render_target_start; - /** @} */ - } binding_table; - - uint8_t computed_depth_mode; - - bool early_fragment_tests; - bool no_8; - bool dual_src_blend; - bool uses_pos_offset; - bool uses_omask; - bool uses_kill; - bool pulls_bary; - uint32_t prog_offset_16; - - /** - * Mask of which interpolation modes are required by the fragment shader. - * Used in hardware setup on gen6+. - */ - uint32_t barycentric_interp_modes; - - /** - * Map from gl_varying_slot to the position within the FS setup data - * payload where the varying's attribute vertex deltas should be delivered. - * For varying slots that are not used by the FS, the value is -1. - */ - int urb_setup[VARYING_SLOT_MAX]; -}; - -struct brw_cs_prog_data { - struct brw_stage_prog_data base; - - GLuint dispatch_grf_start_reg_16; - unsigned local_size[3]; - unsigned simd_size; - bool uses_barrier; - bool uses_num_work_groups; - unsigned local_invocation_id_regs; - - struct { - /** @{ - * surface indices the CS-specific surfaces - */ - uint32_t work_groups_start; - /** @} */ - } binding_table; -}; - -/** - * Enum representing the i965-specific vertex results that don't correspond - * exactly to any element of gl_varying_slot. The values of this enum are - * assigned such that they don't conflict with gl_varying_slot. - */ -typedef enum -{ - BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX, - BRW_VARYING_SLOT_PAD, - /** - * Technically this is not a varying but just a placeholder that - * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord - * builtin variable to be compiled correctly. see compile_sf_prog() for - * more info. - */ - BRW_VARYING_SLOT_PNTC, - BRW_VARYING_SLOT_COUNT -} brw_varying_slot; - - -/** - * Data structure recording the relationship between the gl_varying_slot enum - * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a - * single octaword within the VUE (128 bits). - * - * Note that each BRW register contains 256 bits (2 octawords), so when - * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two - * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as - * in a vertex shader), each register corresponds to a single VUE slot, since - * it contains data for two separate vertices. - */ -struct brw_vue_map { - /** - * Bitfield representing all varying slots that are (a) stored in this VUE - * map, and (b) actually written by the shader. Does not include any of - * the additional varying slots defined in brw_varying_slot. - */ - GLbitfield64 slots_valid; - - /** - * Is this VUE map for a separate shader pipeline? - * - * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched - * without the linker having a chance to dead code eliminate unused varyings. - * - * This means that we have to use a fixed slot layout, based on the output's - * location field, rather than assigning slots in a compact contiguous block. - */ - bool separate; - - /** - * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are - * not stored in a slot (because they are not written, or because - * additional processing is applied before storing them in the VUE), the - * value is -1. - */ - signed char varying_to_slot[BRW_VARYING_SLOT_COUNT]; - - /** - * Map from VUE slot to gl_varying_slot value. For slots that do not - * directly correspond to a gl_varying_slot, the value comes from - * brw_varying_slot. - * - * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this - * simplifies code that uses the value stored in slot_to_varying to - * create a bit mask). - */ - signed char slot_to_varying[BRW_VARYING_SLOT_COUNT]; - - /** - * Total number of VUE slots in use - */ - int num_slots; -}; - -/** - * Convert a VUE slot number into a byte offset within the VUE. - */ -static inline GLuint brw_vue_slot_to_offset(GLuint slot) -{ - return 16*slot; -} - -/** - * Convert a vertex output (brw_varying_slot) into a byte offset within the - * VUE. - */ -static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map, - GLuint varying) -{ - return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]); -} - -void brw_compute_vue_map(const struct brw_device_info *devinfo, - struct brw_vue_map *vue_map, - GLbitfield64 slots_valid, - bool separate_shader); - - /** * Bitmask indicating which fragment shader inputs represent varyings (and * hence have to be delivered to the fragment shader by the SF/SBE stage). @@ -670,39 +417,6 @@ struct brw_ff_gs_prog_data { unsigned svbi_postincrement_value; }; -enum shader_dispatch_mode { - DISPATCH_MODE_4X1_SINGLE = 0, - DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, - DISPATCH_MODE_4X2_DUAL_OBJECT = 2, - DISPATCH_MODE_SIMD8 = 3, -}; - -struct brw_vue_prog_data { - struct brw_stage_prog_data base; - struct brw_vue_map vue_map; - - GLuint urb_read_length; - GLuint total_grf; - - /* Used for calculating urb partitions. In the VS, this is the size of the - * URB entry used for both input and output to the thread. In the GS, this - * is the size of the URB entry used for output. - */ - GLuint urb_entry_size; - - enum shader_dispatch_mode dispatch_mode; -}; - - -struct brw_vs_prog_data { - struct brw_vue_prog_data base; - - GLbitfield64 inputs_read; - - bool uses_vertexid; - bool uses_instanceid; -}; - /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 32 @@ -715,9 +429,6 @@ struct brw_vs_prog_data { /** Max number of SSBOs in a shader */ #define BRW_MAX_SSBO 12 -/** Max number of combined UBOs and SSBOs in a shader */ -#define BRW_MAX_COMBINED_UBO_SSBO (BRW_MAX_UBO + BRW_MAX_SSBO) - /** Max number of atomic counter buffer objects in a shader */ #define BRW_MAX_ABO 16 @@ -763,71 +474,6 @@ struct brw_vs_prog_data { #define SURF_INDEX_GEN6_SOL_BINDING(t) (t) -struct brw_gs_prog_data -{ - struct brw_vue_prog_data base; - - /** - * Size of an output vertex, measured in HWORDS (32 bytes). - */ - unsigned output_vertex_size_hwords; - - unsigned output_topology; - - /** - * Size of the control data (cut bits or StreamID bits), in hwords (32 - * bytes). 0 if there is no control data. - */ - unsigned control_data_header_size_hwords; - - /** - * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID - * if the control data is StreamID bits, or - * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). - * Ignored if control_data_header_size is 0. - */ - unsigned control_data_format; - - bool include_primitive_id; - - /** - * The number of vertices emitted, if constant - otherwise -1. - */ - int static_vertex_count; - - int invocations; - - /** - * Gen6 transform feedback enabled flag. - */ - bool gen6_xfb_enabled; - - /** - * Gen6: Provoking vertex convention for odd-numbered triangles - * in tristrips. - */ - GLuint pv_first:1; - - /** - * Gen6: Number of varyings that are output to transform feedback. - */ - GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ - - /** - * Gen6: Map from the index of a transform feedback binding table entry to the - * gl_varying_slot that should be streamed out through that binding table - * entry. - */ - unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; - - /** - * Gen6: Map from the index of a transform feedback binding table entry to the - * swizzles that should be used when streaming out data through that - * binding table entry. - */ - unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; -}; - /** * Stride in bytes between shader_time entries. * @@ -953,6 +599,8 @@ struct intel_batchbuffer { } saved; }; +#define MAX_GS_INPUT_VERTICES 6 + #define BRW_MAX_XFB_STREAMS 4 struct brw_transform_feedback_object { diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 45fb816c160..263d224e882 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -105,9 +105,15 @@ brw_codegen_cs_prog(struct brw_context *brw, if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, prog, &cp->program.Base, ST_CS); - program = brw_cs_emit(brw, mem_ctx, key, &prog_data, - &cp->program, prog, st_index, &program_size); + char *error_str; + program = brw_compile_cs(brw->intelScreen->compiler, brw, mem_ctx, + key, &prog_data, cp->program.Base.nir, + st_index, &program_size, &error_str); if (program == NULL) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, error_str); + _mesa_problem(NULL, "Failed to compile compute shader: %s\n", error_str); + ralloc_free(mem_ctx); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 17c2ff9871a..899e340f14e 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -27,11 +27,6 @@ #include "brw_program.h" -struct brw_cs_prog_key { - uint32_t program_string_id; - struct brw_sampler_prog_key_data tex; -}; - #ifdef __cplusplus extern "C" { #endif @@ -39,16 +34,6 @@ extern "C" { void brw_upload_cs_prog(struct brw_context *brw); -const unsigned * -brw_cs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, - struct gl_compute_program *cp, - struct gl_shader_program *prog, - int shader_time_index, - unsigned *final_assembly_size); - void brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, void *buffer, uint32_t threads, uint32_t stride); diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp index 33571292007..33d2048e657 100644 --- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp +++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp @@ -30,7 +30,7 @@ * \author Eric Anholt <[email protected]> */ -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir.h" #include "program/prog_instruction.h" /* For WRITEMASK_* */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 761aa0ec5fa..0ac1ad9378b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index dc699bb6321..bf2fee9ed48 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3212,26 +3212,29 @@ brw_pixel_interpolator_query(struct brw_codegen *p, struct brw_reg mrf, bool noperspective, unsigned mode, - unsigned data, + struct brw_reg data, unsigned msg_length, unsigned response_length) { const struct brw_device_info *devinfo = p->devinfo; - struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, mrf); - brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, - msg_length, response_length, - false /* header is never present for PI */, - false); + struct brw_inst *insn; + const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current); - brw_inst_set_pi_simd_mode( - devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); + /* brw_send_indirect_message will automatically use a direct send message + * if data is actually immediate. + */ + insn = brw_send_indirect_message(p, + GEN7_SFID_PIXEL_INTERPOLATOR, + dest, + mrf, + vec1(data)); + brw_inst_set_mlen(devinfo, insn, msg_length); + brw_inst_set_rlen(devinfo, insn, response_length); + + brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16); brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */ brw_inst_set_pi_nopersp(devinfo, insn, noperspective); brw_inst_set_pi_message_type(devinfo, insn, mode); - brw_inst_set_pi_message_data(devinfo, insn, data); } void diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5049851c617..0562c5a9981 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -47,7 +47,7 @@ #include "brw_dead_control_flow.h" #include "main/uniforms.h" #include "brw_fs_live_variables.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "program/sampler.h" using namespace brw; @@ -338,6 +338,18 @@ fs_inst::can_do_source_mods(const struct brw_device_info *devinfo) } bool +fs_inst::can_change_types() const +{ + return dst.type == src[0].type && + !src[0].abs && !src[0].negate && !saturate && + (opcode == BRW_OPCODE_MOV || + (opcode == BRW_OPCODE_SEL && + dst.type == src[1].type && + predicate != BRW_PREDICATE_NONE && + !src[1].abs && !src[1].negate)); +} + +bool fs_inst::has_side_effects() const { return this->eot || backend_instruction::has_side_effects(); @@ -1049,11 +1061,11 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name, unsigned int array_elements; if (type->is_array()) { - array_elements = type->length; + array_elements = type->arrays_of_arrays_size(); if (array_elements == 0) { fail("dereferenced array '%s' has length 0\n", name); } - type = type->fields.array; + type = type->without_array(); } else { array_elements = 1; } @@ -1509,25 +1521,14 @@ void fs_visitor::assign_vs_urb_setup() { brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; - int grf, count, slot, channel, attr; assert(stage == MESA_SHADER_VERTEX); - count = _mesa_bitcount_64(vs_prog_data->inputs_read); + int count = _mesa_bitcount_64(vs_prog_data->inputs_read); if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) count++; /* Each attribute is 4 regs. */ - this->first_non_payload_grf += count * 4; - - unsigned vue_entries = - MAX2(count, vs_prog_data->base.vue_map.num_slots); - - /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS - * command). Each attribute is 16 bytes (4 floats/dwords), so each unit - * fits four attributes. - */ - vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4; - vs_prog_data->base.urb_read_length = (count + 1) / 2; + this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes; assert(vs_prog_data->base.urb_read_length <= 15); @@ -1535,25 +1536,10 @@ fs_visitor::assign_vs_urb_setup() foreach_block_and_inst(block, fs_inst, inst, cfg) { for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == ATTR) { - - if (inst->src[i].reg == VERT_ATTRIB_MAX) { - slot = count - 1; - } else { - /* Attributes come in in a contiguous block, ordered by their - * gl_vert_attrib value. That means we can compute the slot - * number for an attribute by masking out the enabled - * attributes before it and counting the bits. - */ - attr = inst->src[i].reg + inst->src[i].reg_offset / 4; - slot = _mesa_bitcount_64(vs_prog_data->inputs_read & - BITFIELD64_MASK(attr)); - } - - channel = inst->src[i].reg_offset & 3; - - grf = payload.num_regs + - prog_data->curb_read_length + - slot * 4 + channel; + int grf = payload.num_regs + + prog_data->curb_read_length + + inst->src[i].reg + + inst->src[i].reg_offset; inst->src[i].file = HW_REG; inst->src[i].fixed_hw_reg = @@ -5134,41 +5120,140 @@ fs_visitor::run_cs() return !failed; } +/** + * Return a bitfield where bit n is set if barycentric interpolation mode n + * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. + */ +static unsigned +brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, + bool shade_model_flat, + bool persample_shading, + const nir_shader *shader) +{ + unsigned barycentric_interp_modes = 0; + + nir_foreach_variable(var, &shader->inputs) { + enum glsl_interp_qualifier interp_qualifier = + (enum glsl_interp_qualifier)var->data.interpolation; + bool is_centroid = var->data.centroid && !persample_shading; + bool is_sample = var->data.sample || persample_shading; + bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) || + (var->data.location == VARYING_SLOT_COL1); + + /* Ignore WPOS and FACE, because they don't require interpolation. */ + if (var->data.location == VARYING_SLOT_POS || + var->data.location == VARYING_SLOT_FACE) + continue; + + /* Determine the set (or sets) of barycentric coordinates needed to + * interpolate this variable. Note that when + * brw->needs_unlit_centroid_workaround is set, centroid interpolation + * uses PIXEL interpolation for unlit pixels and CENTROID interpolation + * for lit pixels, so we need both sets of barycentric coordinates. + */ + if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) { + if (is_centroid) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; + } else if (is_sample) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC; + } + if ((!is_centroid && !is_sample) || + devinfo->needs_unlit_centroid_workaround) { + barycentric_interp_modes |= + 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; + } + } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH || + (!(shade_model_flat && is_gl_Color) && + interp_qualifier == INTERP_QUALIFIER_NONE)) { + if (is_centroid) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; + } else if (is_sample) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC; + } + if ((!is_centroid && !is_sample) || + devinfo->needs_unlit_centroid_workaround) { + barycentric_interp_modes |= + 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; + } + } + } + + return barycentric_interp_modes; +} + +static uint8_t +computed_depth_mode(const nir_shader *shader) +{ + if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (shader->info.fs.depth_layout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + const unsigned * -brw_wm_fs_emit(struct brw_context *brw, +brw_compile_fs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_wm_prog_key *key, struct brw_wm_prog_data *prog_data, - struct gl_fragment_program *fp, - struct gl_shader_program *prog, + const nir_shader *shader, + struct gl_program *prog, int shader_time_index8, int shader_time_index16, - unsigned *final_assembly_size) + bool use_rep_send, + unsigned *final_assembly_size, + char **error_str) { - /* Now the main event: Visit the shader IR and generate our FS IR for it. + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. */ - fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8); + prog_data->uses_kill = shader->info.fs.uses_discard || key->alpha_test_func; + prog_data->uses_omask = + shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); + prog_data->computed_depth_mode = computed_depth_mode(shader); + + prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(compiler->devinfo, + key->flat_shade, + key->persample_shading, + shader); + + fs_visitor v(compiler, log_data, mem_ctx, key, + &prog_data->base, prog, shader, 8, + shader_time_index8); if (!v.run_fs(false /* do_rep_send */)) { - if (prog) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - } - - _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", - v.fail_msg); + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } cfg_t *simd16_cfg = NULL; - fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16); - if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { + fs_visitor v2(compiler, log_data, mem_ctx, key, + &prog_data->base, prog, shader, 16, + shader_time_index16); + if (likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ v2.import_uniforms(&v); - if (!v2.run_fs(brw->use_rep_send)) { - perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg); + if (!v2.run_fs(use_rep_send)) { + compiler->shader_perf_log(log_data, + "SIMD16 shader failed to compile: %s", + v2.fail_msg); } else { simd16_cfg = v2.cfg; } @@ -5176,8 +5261,8 @@ brw_wm_fs_emit(struct brw_context *brw, } cfg_t *simd8_cfg; - int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8; - if ((no_simd8 || brw->gen < 5) && simd16_cfg) { + int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send; + if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) { simd8_cfg = NULL; prog_data->no_8 = true; } else { @@ -5185,20 +5270,14 @@ brw_wm_fs_emit(struct brw_context *brw, prog_data->no_8 = false; } - fs_generator g(brw->intelScreen->compiler, brw, - mem_ctx, (void *) key, &prog_data->base, - &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS"); + fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base, + v.promoted_constants, v.runtime_check_aads_emit, "FS"); if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - char *name; - if (prog) - name = ralloc_asprintf(mem_ctx, "%s fragment shader %d", - prog->Label ? prog->Label : "unnamed", - prog->Name); - else - name = ralloc_asprintf(mem_ctx, "fragment program %d", fp->Base.Id); - - g.enable_debug(name); + g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s", + shader->info.label ? shader->info.label : + "unnamed", + shader->info.name)); } if (simd8_cfg) @@ -5283,29 +5362,32 @@ fs_visitor::emit_cs_work_group_id_setup() } const unsigned * -brw_cs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_cs_prog_key *key, - struct brw_cs_prog_data *prog_data, - struct gl_compute_program *cp, - struct gl_shader_program *prog, - int shader_time_index, - unsigned *final_assembly_size) +brw_compile_cs(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + const nir_shader *shader, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str) { - prog_data->local_size[0] = cp->LocalSize[0]; - prog_data->local_size[1] = cp->LocalSize[1]; - prog_data->local_size[2] = cp->LocalSize[2]; + prog_data->local_size[0] = shader->info.cs.local_size[0]; + prog_data->local_size[1] = shader->info.cs.local_size[1]; + prog_data->local_size[2] = shader->info.cs.local_size[2]; unsigned local_workgroup_size = - cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2]; - unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads; + shader->info.cs.local_size[0] * shader->info.cs.local_size[1] * + shader->info.cs.local_size[2]; + + unsigned max_cs_threads = compiler->devinfo->max_cs_threads; cfg_t *cfg = NULL; const char *fail_msg = NULL; /* Now the main event: Visit the shader IR and generate our CS IR for it. */ - fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index); + fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base, + NULL, /* Never used in core profile */ + shader, 8, shader_time_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * max_cs_threads) { @@ -5313,15 +5395,18 @@ brw_cs_emit(struct brw_context *brw, prog_data->simd_size = 8; } - fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key, - &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index); + fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base, + NULL, /* Never used in core profile */ + shader, 16, shader_time_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * max_cs_threads) { /* Try a SIMD16 compile */ v16.import_uniforms(&v8); if (!v16.run_cs()) { - perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg); + compiler->shader_perf_log(log_data, + "SIMD16 shader failed to compile: %s", + v16.fail_msg); if (!cfg) { fail_msg = "Couldn't generate SIMD16 program and not " @@ -5335,20 +5420,19 @@ brw_cs_emit(struct brw_context *brw, if (unlikely(cfg == NULL)) { assert(fail_msg); - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, fail_msg); - _mesa_problem(NULL, "Failed to compile compute shader: %s\n", - fail_msg); + if (error_str) + *error_str = ralloc_strdup(mem_ctx, fail_msg); + return NULL; } - fs_generator g(brw->intelScreen->compiler, brw, - mem_ctx, (void*) key, &prog_data->base, &cp->Base, + fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base, v8.promoted_constants, v8.runtime_check_aads_emit, "CS"); if (INTEL_DEBUG & DEBUG_CS) { - char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", - prog->Label ? prog->Label : "unnamed", - prog->Name); + char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s", + shader->info.label ? shader->info.label : + "unnamed", + shader->info.name); g.enable_debug(name); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index e8b511f9ce6..171338dcc0b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -48,7 +48,7 @@ extern "C" { #include "brw_wm.h" #include "intel_asm_annotation.h" } -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir.h" #include "glsl/nir/nir.h" #include "program/sampler.h" @@ -96,7 +96,7 @@ public: const void *key, struct brw_stage_prog_data *prog_data, struct gl_program *prog, - nir_shader *shader, + const nir_shader *shader, unsigned dispatch_width, int shader_time_index); @@ -400,7 +400,6 @@ public: void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, - struct gl_program *fp, unsigned promoted_constants, bool runtime_check_aads_emit, const char *stage_abbrev); @@ -499,8 +498,6 @@ private: const void * const key; struct brw_stage_prog_data * const prog_data; - const struct gl_program *prog; - unsigned dispatch_width; /**< 8 or 16 */ exec_list discard_halt_patches; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 277b6cc3a60..a13d001291c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -45,7 +45,7 @@ #include "brw_wm.h" #include "glsl/ir.h" #include "glsl/ir_expression_flattening.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" class ir_channel_expressions_visitor : public ir_hierarchical_visitor { public: diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 230b0caec47..5589716239a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -275,17 +275,6 @@ is_logic_op(enum opcode opcode) opcode == BRW_OPCODE_NOT); } -static bool -can_change_source_types(fs_inst *inst) -{ - return !inst->src[0].abs && !inst->src[0].negate && - inst->dst.type == inst->src[0].type && - (inst->opcode == BRW_OPCODE_MOV || - (inst->opcode == BRW_OPCODE_SEL && - inst->predicate != BRW_PREDICATE_NONE && - !inst->src[1].abs && !inst->src[1].negate)); -} - bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -368,7 +357,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (has_source_modifiers && entry->dst.type != inst->src[arg].type && - !can_change_source_types(inst)) + !inst->can_change_types()) return false; if (devinfo->gen >= 8 && (entry->src.negate || entry->src.abs) && @@ -438,7 +427,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) * type. If we got here, then we can just change the source and * destination types of the instruction and keep going. */ - assert(can_change_source_types(inst)); + assert(inst->can_change_types()); for (int i = 0; i < inst->sources; i++) { inst->src[i].type = entry->dst.type; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 6f8b75e339f..13c495cd395 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -131,7 +131,6 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const void *key, struct brw_stage_prog_data *prog_data, - struct gl_program *prog, unsigned promoted_constants, bool runtime_check_aads_emit, const char *stage_abbrev) @@ -139,7 +138,7 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data, : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), key(key), prog_data(prog_data), - prog(prog), promoted_constants(promoted_constants), + promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), stage_abbrev(stage_abbrev), mem_ctx(mem_ctx) { @@ -1377,15 +1376,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg msg_data, unsigned msg_type) { - assert(msg_data.file == BRW_IMMEDIATE_VALUE && - msg_data.type == BRW_REGISTER_TYPE_UD); + assert(msg_data.type == BRW_REGISTER_TYPE_UD); brw_pixel_interpolator_query(p, retype(dst, BRW_REGISTER_TYPE_UW), src, inst->pi_noperspective, msg_type, - msg_data.dw1.ud, + msg_data, inst->mlen, inst->regs_written); } @@ -2188,7 +2186,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, - p->devinfo, prog); + p->devinfo); ralloc_free(annotation.ann); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 19aec92fad1..ce066a9778e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -259,16 +259,15 @@ fs_live_variables::compute_start_end() struct block_data *bd = &block_data[block->num]; for (int i = 0; i < num_vars; i++) { - if (BITSET_TEST(bd->livein, i)) { - start[i] = MIN2(start[i], block->start_ip); - end[i] = MAX2(end[i], block->start_ip); - } - - if (BITSET_TEST(bd->liveout, i)) { - start[i] = MIN2(start[i], block->end_ip); - end[i] = MAX2(end[i], block->end_ip); - } + if (BITSET_TEST(bd->livein, i)) { + start[i] = MIN2(start[i], block->start_ip); + end[i] = MAX2(end[i], block->start_ip); + } + if (BITSET_TEST(bd->liveout, i)) { + start[i] = MIN2(start[i], block->end_ip); + end[i] = MAX2(end[i], block->end_ip); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 45c3f4ef3b4..feedbfbb2e3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -56,61 +56,25 @@ fs_visitor::emit_nir_code() void fs_visitor::nir_setup_inputs() { + if (stage != MESA_SHADER_FRAGMENT) + return; + nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); nir_foreach_variable(var, &nir->inputs) { - enum brw_reg_type type = brw_type_for_base_type(var->type); fs_reg input = offset(nir_inputs, bld, var->data.driver_location); fs_reg reg; - switch (stage) { - case MESA_SHADER_VERTEX: { - /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value - * stored in nir_variable::location. - * - * However, NIR's load_input intrinsics use a different index - an - * offset into a single contiguous array containing all inputs. - * This index corresponds to the nir_variable::driver_location field. - * - * So, we need to copy from fs_reg(ATTR, var->location) to - * offset(nir_inputs, var->data.driver_location). - */ - const glsl_type *const t = var->type->without_array(); - const unsigned components = t->components(); - const unsigned cols = t->matrix_columns; - const unsigned elts = t->vector_elements; - unsigned array_length = var->type->is_array() ? var->type->length : 1; - for (unsigned i = 0; i < array_length; i++) { - for (unsigned j = 0; j < cols; j++) { - for (unsigned k = 0; k < elts; k++) { - bld.MOV(offset(retype(input, type), bld, - components * i + elts * j + k), - offset(fs_reg(ATTR, var->data.location + i, type), - bld, 4 * j + k)); - } - } - } - break; - } - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_COMPUTE: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - unreachable("fs_visitor not used for these stages yet."); - break; - case MESA_SHADER_FRAGMENT: - if (var->data.location == VARYING_SLOT_POS) { - reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, - var->data.origin_upper_left); - emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), - input, reg), 0xF); - } else { - emit_general_interpolation(input, var->name, var->type, - (glsl_interp_qualifier) var->data.interpolation, - var->data.location, var->data.centroid, - var->data.sample); - } - break; + if (var->data.location == VARYING_SLOT_POS) { + reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer, + var->data.origin_upper_left); + emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), + input, reg), 0xF); + } else { + emit_general_interpolation(input, var->name, var->type, + (glsl_interp_qualifier) var->data.interpolation, + var->data.location, var->data.centroid, + var->data.sample); } } } @@ -125,9 +89,7 @@ fs_visitor::nir_setup_outputs() nir_foreach_variable(var, &nir->outputs) { fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); - int vector_elements = - var->type->is_array() ? var->type->fields.array->vector_elements - : var->type->vector_elements; + int vector_elements = var->type->without_array()->vector_elements; switch (stage) { case MESA_SHADER_VERTEX: @@ -1180,6 +1142,36 @@ get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type) } } +static fs_inst * +emit_pixel_interpolater_send(const fs_builder &bld, + enum opcode opcode, + const fs_reg &dst, + const fs_reg &src, + const fs_reg &desc, + glsl_interp_qualifier interpolation) +{ + fs_inst *inst; + fs_reg payload; + int mlen; + + if (src.file == BAD_FILE) { + /* Dummy payload */ + payload = bld.vgrf(BRW_REGISTER_TYPE_F, 1); + mlen = 1; + } else { + payload = src; + mlen = 2 * bld.dispatch_width() / 8; + } + + inst = bld.emit(opcode, dst, payload, desc); + inst->mlen = mlen; + /* 2 floats per slot returned */ + inst->regs_written = 2 * bld.dispatch_width() / 8; + inst->pi_noperspective = interpolation == INTERP_QUALIFIER_NOPERSPECTIVE; + + return inst; +} + void fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { @@ -1440,7 +1432,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr */ brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ubo_start + - nir->info.num_ssbos - 1); + nir->info.num_ubos - 1); } if (has_indirect) { @@ -1488,21 +1480,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg surf_index; if (const_uniform_block) { - unsigned index = stage_prog_data->binding_table.ubo_start + + unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = fs_reg(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + fs_reg(stage_prog_data->binding_table.ssbo_start)); surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. */ brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + + stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -1545,8 +1537,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_load_input: { unsigned index = 0; for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = offset(retype(nir_inputs, dest.type), bld, - instr->const_index[0] + index); + fs_reg src; + if (stage == MESA_SHADER_VERTEX) { + src = offset(fs_reg(ATTR, instr->const_index[0], dest.type), bld, index); + } else { + src = offset(retype(nir_inputs, dest.type), bld, + instr->const_index[0] + index); + } if (has_indirect) src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); index++; @@ -1583,28 +1580,81 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true; fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - - /* For most messages, we need one reg of ignored data; the hardware - * requires mlen==1 even when there is no payload. in the per-slot - * offset case, we'll replace this with the proper source data. - */ - fs_reg src = vgrf(glsl_type::float_type); - int mlen = 1; /* one reg unless overriden */ - fs_inst *inst; + const glsl_interp_qualifier interpolation = + (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation; switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, - dst_xy, src, fs_reg(0u)); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_CENTROID, + dst_xy, + fs_reg(), /* src */ + fs_reg(0u), + interpolation); break; case nir_intrinsic_interp_var_at_sample: { - /* XXX: We should probably handle non-constant sample id's */ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); - assert(const_sample); - unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, - fs_reg(msg_data)); + + if (const_sample) { + unsigned msg_data = const_sample->i[0] << 4; + + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + fs_reg(msg_data), + interpolation); + } else { + const fs_reg sample_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + + if (nir_src_is_dynamically_uniform(instr->src[0])) { + const fs_reg sample_id = bld.emit_uniformize(sample_src); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + msg_data, + interpolation); + } else { + /* Make a loop that sends a message to the pixel interpolater + * for the sample number in each live channel. If there are + * multiple channels with the same sample number then these + * will be handled simultaneously with a single interation of + * the loop. + */ + bld.emit(BRW_OPCODE_DO); + + /* Get the next live sample number into sample_id_reg */ + const fs_reg sample_id = bld.emit_uniformize(sample_src); + + /* Set the flag register so that we can perform the send + * message on all channels that have the same sample number + */ + bld.CMP(bld.null_reg_ud(), + sample_src, sample_id, + BRW_CONDITIONAL_EQ); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u)); + fs_inst *inst = + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dst_xy, + fs_reg(), /* src */ + msg_data, + interpolation); + set_predicate(BRW_PREDICATE_NORMAL, inst); + + /* Continue the loop if there are any live channels left */ + set_predicate_inv(BRW_PREDICATE_NORMAL, + true, /* inverse */ + bld.emit(BRW_OPCODE_WHILE)); + } + } + break; } @@ -1615,10 +1665,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, - fs_reg(off_x | (off_y << 4))); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, + dst_xy, + fs_reg(), /* src */ + fs_reg(off_x | (off_y << 4)), + interpolation); } else { - src = vgrf(glsl_type::ivec2_type); + fs_reg src = vgrf(glsl_type::ivec2_type); fs_reg offset_src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); for (int i = 0; i < 2; i++) { @@ -1646,9 +1700,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.SEL(offset(src, bld, i), itemp, fs_reg(7))); } - mlen = 2 * dispatch_width / 8; - inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, - fs_reg(0u)); + const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; + emit_pixel_interpolater_send(bld, + opcode, + dst_xy, + src, + fs_reg(0u), + interpolation); } break; } @@ -1657,12 +1715,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unreachable("Invalid intrinsic"); } - inst->mlen = mlen; - /* 2 floats per slot returned */ - inst->regs_written = 2 * dispatch_width / 8; - inst->pi_noperspective = instr->variables[0]->var->data.interpolation == - INTERP_QUALIFIER_NOPERSPECTIVE; - for (unsigned j = 0; j < instr->num_components; j++) { fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; @@ -1684,18 +1736,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[1]); if (const_uniform_block) { - unsigned index = stage_prog_data->binding_table.ubo_start + + unsigned index = stage_prog_data->binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = fs_reg(index); brw_mark_surface_used(prog_data, index); } else { surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[1]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + fs_reg(stage_prog_data->binding_table.ssbo_start)); surf_index = bld.emit_uniformize(surf_index); brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + + stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -1780,17 +1832,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_ssbo_atomic_add: nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr); break; - case nir_intrinsic_ssbo_atomic_min: - if (dest.type == BRW_REGISTER_TYPE_D) - nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr); - else - nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr); + case nir_intrinsic_ssbo_atomic_imin: + nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr); break; - case nir_intrinsic_ssbo_atomic_max: - if (dest.type == BRW_REGISTER_TYPE_D) - nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr); - else - nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr); + case nir_intrinsic_ssbo_atomic_umin: + nir_emit_ssbo_atomic(bld, BRW_AOP_UMIN, instr); + break; + case nir_intrinsic_ssbo_atomic_imax: + nir_emit_ssbo_atomic(bld, BRW_AOP_IMAX, instr); + break; + case nir_intrinsic_ssbo_atomic_umax: + nir_emit_ssbo_atomic(bld, BRW_AOP_UMAX, instr); break; case nir_intrinsic_ssbo_atomic_and: nir_emit_ssbo_atomic(bld, BRW_AOP_AND, instr); @@ -1810,7 +1862,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0; + unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0; int reg_width = dispatch_width / 8; /* Set LOD = 0 */ @@ -1821,7 +1873,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr BRW_REGISTER_TYPE_UD); bld.LOAD_PAYLOAD(src_payload, &source, 1, 0); - fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start + ubo_index); + fs_reg surf_index = fs_reg(prog_data->binding_table.ssbo_start + ssbo_index); fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest, src_payload, surf_index); inst->header_size = 0; @@ -1874,20 +1926,20 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, fs_reg surface; nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); if (const_surface) { - unsigned surf_index = stage_prog_data->binding_table.ubo_start + + unsigned surf_index = stage_prog_data->binding_table.ssbo_start + const_surface->u[0]; surface = fs_reg(surf_index); brw_mark_surface_used(prog_data, surf_index); } else { surface = vgrf(glsl_type::uint_type); bld.ADD(surface, get_nir_src(instr->src[0]), - fs_reg(stage_prog_data->binding_table.ubo_start)); + fs_reg(stage_prog_data->binding_table.ssbo_start)); - /* Assume this may touch any UBO. This is the same we do for other + /* Assume this may touch any SSBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. */ brw_mark_surface_used(prog_data, - stage_prog_data->binding_table.ubo_start + + stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index c3a037be4b1..36388fad98d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -27,7 +27,7 @@ #include "brw_fs.h" #include "brw_cfg.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir_optimization.h" using namespace brw; diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index e406c2899e8..8792a8c7b1d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -52,11 +52,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) ip--; if (inst->opcode != BRW_OPCODE_MOV || + !inst->saturate || inst->dst.file != GRF || + inst->dst.type != inst->src[0].type || inst->src[0].file != GRF || inst->src[0].abs || - inst->src[0].negate || - !inst->saturate) + inst->src[0].negate) continue; int src_var = v->live_intervals->var_from_reg(inst->src[0]); @@ -65,7 +66,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) bool interfered = false; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst, block) { if (scan_inst->overwrites_reg(inst->src[0])) { - if (scan_inst->is_partial_write()) + if (scan_inst->is_partial_write() || + (scan_inst->dst.type != inst->dst.type && + !scan_inst->can_change_types())) break; if (scan_inst->saturate) { @@ -73,6 +76,12 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) progress = true; } else if (src_end_ip <= ip || inst->dst.equals(inst->src[0])) { if (scan_inst->can_do_saturate()) { + if (scan_inst->dst.type != inst->dst.type) { + scan_inst->dst.type = inst->dst.type; + for (int i = 0; i < scan_inst->sources; i++) { + scan_inst->src[i].type = inst->dst.type; + } + } scan_inst->saturate = true; inst->saturate = false; progress = true; diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp index d0e04f3bf47..814c551f1be 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp @@ -32,7 +32,7 @@ #define fsv_assert(cond) \ if (!(cond)) { \ - fprintf(stderr, "ASSERT: FS validation failed!\n"); \ + fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", stage_abbrev); \ dump_instruction(inst, stderr); \ fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, #cond); \ abort(); \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 6000e35b9b9..cab5af318a2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -42,7 +42,7 @@ #include "glsl/ir.h" #include "glsl/ir_visitor.h" #include "glsl/ir_rvalue_visitor.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "util/hash_table.h" static bool debug = false; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index df1a7ed9b59..f825fed4daf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -43,7 +43,7 @@ #include "brw_vec4.h" #include "brw_fs.h" #include "main/uniforms.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir_optimization.h" #include "program/sampler.h" @@ -53,7 +53,8 @@ fs_reg * fs_visitor::emit_vs_system_value(int location) { fs_reg *reg = new(this->mem_ctx) - fs_reg(ATTR, VERT_ATTRIB_MAX, BRW_REGISTER_TYPE_D); + fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info.inputs_read), + BRW_REGISTER_TYPE_D); brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data; switch (location) { @@ -903,12 +904,9 @@ fs_visitor::emit_urb_writes() urb_offset = 0; flush = false; for (slot = 0; slot < vue_map->num_slots; slot++) { - fs_reg reg, src, zero; - int varying = vue_map->slot_to_varying[slot]; switch (varying) { - case VARYING_SLOT_PSIZ: - + case VARYING_SLOT_PSIZ: { /* The point size varying slot is the vue header and is always in the * vue map. But often none of the special varyings that live there * are written and in that case we can skip writing to the vue @@ -920,7 +918,7 @@ fs_visitor::emit_urb_writes() break; } - zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); + fs_reg zero(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); bld.MOV(zero, fs_reg(0u)); sources[length++] = zero; @@ -939,7 +937,7 @@ fs_visitor::emit_urb_writes() else sources[length++] = zero; break; - + } case BRW_VARYING_SLOT_NDC: case VARYING_SLOT_EDGE: unreachable("unexpected scalar vs output"); @@ -972,8 +970,8 @@ fs_visitor::emit_urb_writes() * temp register and use that for the payload. */ for (int i = 0; i < 4; i++) { - reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); - src = offset(this->outputs[varying], bld, i); + fs_reg reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); + fs_reg src = offset(this->outputs[varying], bld, i); set_saturate(true, bld.MOV(reg, src)); sources[length++] = reg; } @@ -1069,7 +1067,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, const void *key, struct brw_stage_prog_data *prog_data, struct gl_program *prog, - nir_shader *shader, + const nir_shader *shader, unsigned dispatch_width, int shader_time_index) : backend_shader(compiler, log_data, mem_ctx, shader, prog_data), diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index e0165fb4a23..10a7f28fdab 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -57,6 +57,7 @@ brw_codegen_gs_prog(struct brw_context *brw, struct brw_geometry_program *gp, struct brw_gs_prog_key *key) { + struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_compile c; memset(&c, 0, sizeof(c)); @@ -300,8 +301,11 @@ brw_codegen_gs_prog(struct brw_context *brw, void *mem_ctx = ralloc_context(NULL); unsigned program_size; + char *error_str; const unsigned *program = - brw_gs_emit(brw, prog, &c, mem_ctx, st_index, &program_size); + brw_compile_gs(brw->intelScreen->compiler, brw, &c, + shader->Program->nir, prog, + mem_ctx, st_index, &program_size, &error_str); if (program == NULL) { ralloc_free(mem_ctx); return false; diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index 0bb307432d0..00125c0f405 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -129,7 +129,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw) ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; if (prog) { - /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], &brw->gs.base, &brw->gs.prog_data->base.base); } @@ -137,6 +137,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_gs_image_surfaces = { .dirty = { + .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_GS_PROG_DATA | diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 97c6f8b2500..7726e4b78a0 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -204,6 +204,7 @@ public: unsigned components_read(unsigned i) const; int regs_read(int arg) const; bool can_do_source_mods(const struct brw_device_info *devinfo); + bool can_change_types() const; bool has_side_effects() const; bool reads_flag() const; diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 96dd633e117..1b57b65db27 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -179,6 +179,7 @@ public: int swizzle, int swizzle_mask); void reswizzle(int dst_writemask, int swizzle); bool can_do_source_mods(const struct brw_device_info *devinfo); + bool can_change_types() const; bool reads_flag() { diff --git a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp index 8c59b9e415b..4219d471def 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp @@ -31,7 +31,7 @@ * \author Chris Forbes <[email protected]> */ -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir.h" #include "glsl/ir_builder.h" diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index eb201736c6e..fbde3f04204 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -451,6 +451,11 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS) clear_type = REP_CLEAR; + if (brw->gen >= 9 && clear_type == FAST_CLEAR) { + perf_debug("fast MCS clears are disabled on gen9"); + clear_type = REP_CLEAR; + } + /* We can't do scissored fast clears because of the restrictions on the * fast clear rectangle size. */ diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 0a9c09f1075..dc497770914 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -27,30 +27,112 @@ #include "glsl/nir/glsl_to_nir.h" #include "program/prog_to_nir.h" +static bool +remap_vs_attrs(nir_block *block, void *closure) +{ + GLbitfield64 inputs_read = *((GLbitfield64 *) closure); + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ + assert(intrin->intrinsic != nir_intrinsic_load_input_indirect); + + if (intrin->intrinsic == nir_intrinsic_load_input) { + /* Attributes come in a contiguous block, ordered by their + * gl_vert_attrib value. That means we can compute the slot + * number for an attribute by masking out the enabled attributes + * before it and counting the bits. + */ + int attr = intrin->const_index[0]; + int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); + intrin->const_index[0] = 4 * slot; + } + } + return true; +} + static void brw_nir_lower_inputs(nir_shader *nir, bool is_scalar) { switch (nir->stage) { + case MESA_SHADER_VERTEX: + /* For now, leave the vec4 backend doing the old method. */ + if (!is_scalar) { + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, + type_size_vec4); + break; + } + + /* Start with the location of the variable's base. */ + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } + + /* Now use nir_lower_io to walk dereference chains. Attribute arrays + * are loaded as one vec4 per element (or matrix column), so we use + * type_size_vec4 here. + */ + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + + /* Finally, translate VERT_ATTRIB_* values into the actual registers. + * + * Note that we can use nir->info.inputs_read instead of key->inputs_read + * since the two are identical aside from Gen4-5 edge flag differences. + */ + GLbitfield64 inputs_read = nir->info.inputs_read; + nir_foreach_overload(nir, overload) { + if (overload->impl) { + nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); + } + } + break; case MESA_SHADER_GEOMETRY: foreach_list_typed(nir_variable, var, node, &nir->inputs) { var->data.driver_location = var->data.location; } break; - default: + case MESA_SHADER_FRAGMENT: + assert(is_scalar); nir_assign_var_locations(&nir->inputs, &nir->num_inputs, - is_scalar ? type_size_scalar : type_size_vec4); + type_size_scalar); + break; + case MESA_SHADER_COMPUTE: + /* Compute shaders have no inputs. */ + assert(exec_list_is_empty(&nir->inputs)); break; + default: + unreachable("unsupported shader stage"); } } static void brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) { - if (is_scalar) { - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar); - } else { - nir_foreach_variable(var, &nir->outputs) - var->data.driver_location = var->data.location; + switch (nir->stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_GEOMETRY: + if (is_scalar) { + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, + type_size_scalar); + } else { + nir_foreach_variable(var, &nir->outputs) + var->data.driver_location = var->data.location; + } + break; + case MESA_SHADER_FRAGMENT: + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, + type_size_scalar); + break; + case MESA_SHADER_COMPUTE: + /* Compute shaders have no outputs. */ + assert(exec_list_is_empty(&nir->outputs)); + break; + default: + unreachable("unsupported shader stage"); } } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index dbd0e50228b..22b0227756e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -69,8 +69,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, if (prog) { prog->id = get_new_program_id(brw->intelScreen); - return _mesa_init_vertex_program( ctx, &prog->program, - target, id ); + return _mesa_init_gl_program(&prog->program.Base, target, id); } else return NULL; @@ -81,8 +80,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, if (prog) { prog->id = get_new_program_id(brw->intelScreen); - return _mesa_init_fragment_program( ctx, &prog->program, - target, id ); + return _mesa_init_gl_program(&prog->program.Base, target, id); } else return NULL; @@ -93,7 +91,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, if (prog) { prog->id = get_new_program_id(brw->intelScreen); - return _mesa_init_geometry_program(ctx, &prog->program, target, id); + return _mesa_init_gl_program(&prog->program, target, id); } else { return NULL; } @@ -104,7 +102,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx, if (prog) { prog->id = get_new_program_id(brw->intelScreen); - return _mesa_init_compute_program(ctx, &prog->program, target, id); + return _mesa_init_gl_program(&prog->program.Base, target, id); } else { return NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index cf0522a8b10..f8cf2b062c8 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -24,129 +24,7 @@ #ifndef BRW_PROGRAM_H #define BRW_PROGRAM_H -/** - * Program key structures. - * - * When drawing, we look for the currently bound shaders in the program - * cache. This is essentially a hash table lookup, and these are the keys. - * - * Sometimes OpenGL features specified as state need to be simulated via - * shader code, due to a mismatch between the API and the hardware. This - * is often referred to as "non-orthagonal state" or "NOS". We store NOS - * in the program key so it's considered when searching for a program. If - * we haven't seen a particular combination before, we have to recompile a - * new specialized version. - * - * Shader compilation should not look up state in gl_context directly, but - * instead use the copy in the program key. This guarantees recompiles will - * happen correctly. - * - * @{ - */ - -enum PACKED gen6_gather_sampler_wa { - WA_SIGN = 1, /* whether we need to sign extend */ - WA_8BIT = 2, /* if we have an 8bit format needing wa */ - WA_16BIT = 4, /* if we have a 16bit format needing wa */ -}; - -/** - * Sampler information needed by VS, WM, and GS program cache keys. - */ -struct brw_sampler_prog_key_data { - /** - * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles. - */ - uint16_t swizzles[MAX_SAMPLERS]; - - uint32_t gl_clamp_mask[3]; - - /** - * For RG32F, gather4's channel select is broken. - */ - uint32_t gather_channel_quirk_mask; - - /** - * Whether this sampler uses the compressed multisample surface layout. - */ - uint32_t compressed_multisample_layout_mask; - - /** - * For Sandybridge, which shader w/a we need for gather quirks. - */ - enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS]; -}; - - -/** The program key for Vertex Shaders. */ -struct brw_vs_prog_key { - unsigned program_string_id; - - /* - * Per-attribute workaround flags - */ - uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX]; - - bool copy_edgeflag:1; - - bool clamp_vertex_color:1; - - /** - * How many user clipping planes are being uploaded to the vertex shader as - * push constants. - * - * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to - * clip distances. - */ - unsigned nr_userclip_plane_consts:4; - - /** - * For pre-Gen6 hardware, a bitfield indicating which texture coordinates - * are going to be replaced with point coordinates (as a consequence of a - * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because - * our SF thread requires exact matching between VS outputs and FS inputs, - * these texture coordinates will need to be unconditionally included in - * the VUE, even if they aren't written by the vertex shader. - */ - uint8_t point_coord_replace; - - struct brw_sampler_prog_key_data tex; -}; - -/** The program key for Geometry Shaders. */ -struct brw_gs_prog_key -{ - unsigned program_string_id; - - struct brw_sampler_prog_key_data tex; -}; - -/** The program key for Fragment/Pixel Shaders. */ -struct brw_wm_prog_key { - uint8_t iz_lookup; - bool stats_wm:1; - bool flat_shade:1; - bool persample_shading:1; - bool persample_2x:1; - unsigned nr_color_regions:5; - bool replicate_alpha:1; - bool render_to_fbo:1; - bool clamp_fragment_color:1; - bool compute_pos_offset:1; - bool compute_sample_id:1; - unsigned line_aa:2; - bool high_quality_derivatives:1; - - uint16_t drawable_height; - uint64_t input_slots_valid; - unsigned program_string_id; - GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */ - float alpha_test_ref; - - struct brw_sampler_prog_key_data tex; -}; - -/** @} */ +#include "brw_compiler.h" #ifdef __cplusplus extern "C" { diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index c2db5f69560..6d73444dad0 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -44,6 +44,7 @@ #include "main/macros.h" #include "main/samplerobj.h" +#include "util/half_float.h" /** * Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet. diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 4e43e5ccdbd..b710c60148c 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -29,7 +29,7 @@ #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_shader.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir_optimization.h" using namespace brw; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 3a58a58a00b..6be2a6e5b55 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -660,7 +660,7 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) backend_shader::backend_shader(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - nir_shader *shader, + const nir_shader *shader, struct brw_stage_prog_data *stage_prog_data) : compiler(compiler), log_data(log_data), @@ -1131,11 +1131,16 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, next_binding_table_offset += num_textures; if (shader) { - assert(shader->NumUniformBlocks <= BRW_MAX_COMBINED_UBO_SSBO); + assert(shader->NumUniformBlocks <= BRW_MAX_UBO); stage_prog_data->binding_table.ubo_start = next_binding_table_offset; next_binding_table_offset += shader->NumUniformBlocks; + + assert(shader->NumShaderStorageBlocks <= BRW_MAX_SSBO); + stage_prog_data->binding_table.ssbo_start = next_binding_table_offset; + next_binding_table_offset += shader->NumShaderStorageBlocks; } else { stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; + stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0; } if (INTEL_DEBUG & DEBUG_SHADER_TIME) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index ad2de5eae2d..b33b08f40d7 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -38,64 +38,6 @@ #define MAX_SAMPLER_MESSAGE_SIZE 11 #define MAX_VGRF_SIZE 16 -struct brw_compiler { - const struct brw_device_info *devinfo; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used. - */ - int *classes; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - } vec4_reg_set; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used, indexed by register size. - */ - int classes[16]; - - /** - * Mapping from classes to ra_reg ranges. Each of the per-size - * classes corresponds to a range of ra_reg nodes. This array stores - * those ranges in the form of first ra_reg in each class and the - * total number of ra_reg elements in the last array element. This - * way the range of the i'th class is given by: - * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] ) - */ - int class_to_ra_reg_range[17]; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - - /** - * ra class for the aligned pairs we use for PLN, which doesn't - * appear in *classes. - */ - int aligned_pairs_class; - } fs_reg_sets[2]; - - void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); - void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); - - bool scalar_vs; - struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; -}; - enum PACKED register_file { BAD_FILE, GRF, @@ -225,7 +167,7 @@ protected: backend_shader(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - nir_shader *shader, + const nir_shader *shader, struct brw_stage_prog_data *stage_prog_data); public: @@ -234,7 +176,7 @@ public: void *log_data; /* Passed to compiler->*_log functions */ const struct brw_device_info * const devinfo; - nir_shader *nir; + const nir_shader *nir; struct brw_stage_prog_data * const stage_prog_data; /** ralloc context for temporary data used during compile */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index e966b96a5ca..befc92445d3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -280,6 +280,18 @@ vec4_instruction::can_do_source_mods(const struct brw_device_info *devinfo) return true; } +bool +vec4_instruction::can_change_types() const +{ + return dst.type == src[0].type && + !src[0].abs && !src[0].negate && !saturate && + (opcode == BRW_OPCODE_MOV || + (opcode == BRW_OPCODE_SEL && + dst.type == src[1].type && + predicate != BRW_PREDICATE_NONE && + !src[1].abs && !src[1].negate)); +} + /** * Returns how many MRFs an opcode will write over. * @@ -1632,28 +1644,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) */ if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; - nr_attributes++; } lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); - /* The BSpec says we always have to read at least one thing from - * the VF, and it appears that the hardware wedges otherwise. - */ - if (nr_attributes == 0) - nr_attributes = 1; - - prog_data->urb_read_length = (nr_attributes + 1) / 2; - - unsigned vue_entries = - MAX2(nr_attributes, prog_data->vue_map.num_slots); - - if (devinfo->gen == 6) - prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8; - else - prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4; - - return payload_reg + nr_attributes; + return payload_reg + vs_prog_data->nr_attributes; } int @@ -1937,51 +1932,76 @@ extern "C" { * Returns the final assembly and the program's size. */ const unsigned * -brw_vs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_vertex_program *vp, - struct gl_shader_program *prog, - int shader_time_index, - unsigned *final_assembly_size) +brw_compile_vs(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_vs_prog_key *key, + struct brw_vs_prog_data *prog_data, + const nir_shader *shader, + gl_clip_plane *clip_planes, + bool use_legacy_snorm_formula, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str) { const unsigned *assembly = NULL; - if (brw->intelScreen->compiler->scalar_vs) { + unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read); + + /* gl_VertexID and gl_InstanceID are system values, but arrive via an + * incoming vertex attribute. So, add an extra slot. + */ + if (shader->info.system_values_read & + (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | + BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) { + nr_attributes++; + } + + /* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry + * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in + * vec4 mode, the hardware appears to wedge unless we read something. + */ + if (compiler->scalar_vs) + prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2); + else + prog_data->base.urb_read_length = DIV_ROUND_UP(MAX2(nr_attributes, 1), 2); + + prog_data->nr_attributes = nr_attributes; + + /* Since vertex shaders reuse the same VUE entry for inputs and outputs + * (overwriting the original contents), we need to make sure the size is + * the larger of the two. + */ + const unsigned vue_entries = + MAX2(nr_attributes, (unsigned)prog_data->base.vue_map.num_slots); + + if (compiler->devinfo->gen == 6) + prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8); + else + prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); + + if (compiler->scalar_vs) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; - fs_visitor v(brw->intelScreen->compiler, brw, - mem_ctx, key, &prog_data->base.base, + fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ - vp->Base.nir, 8, shader_time_index); - if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { - if (prog) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - } - - _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", - v.fail_msg); + shader, 8, shader_time_index); + if (!v.run_vs(clip_planes)) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } - fs_generator g(brw->intelScreen->compiler, brw, - mem_ctx, (void *) key, &prog_data->base.base, - &vp->Base, v.promoted_constants, + fs_generator g(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, v.promoted_constants, v.runtime_check_aads_emit, "VS"); if (INTEL_DEBUG & DEBUG_VS) { - char *name; - if (prog) { - name = ralloc_asprintf(mem_ctx, "%s vertex shader %d", - prog->Label ? prog->Label : "unnamed", - prog->Name); - } else { - name = ralloc_asprintf(mem_ctx, "vertex program %d", - vp->Base.Id); - } - g.enable_debug(name); + const char *debug_name = + ralloc_asprintf(mem_ctx, "%s vertex shader %s", + shader->info.label ? shader->info.label : "unnamed", + shader->info.name); + + g.enable_debug(debug_name); } g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); @@ -1990,26 +2010,19 @@ brw_vs_emit(struct brw_context *brw, if (!assembly) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data, - vp->Base.nir, brw_select_clip_planes(&brw->ctx), - mem_ctx, shader_time_index, - !_mesa_is_gles3(&brw->ctx)); + vec4_vs_visitor v(compiler, log_data, key, prog_data, + shader, clip_planes, mem_ctx, + shader_time_index, use_legacy_snorm_formula); if (!v.run()) { - if (prog) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, v.fail_msg); - } - - _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", - v.fail_msg); + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } - vec4_generator g(brw->intelScreen->compiler, brw, - prog, &vp->Base, &prog_data->base, + vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); - assembly = g.generate_assembly(v.cfg, final_assembly_size); + assembly = g.generate_assembly(v.cfg, final_assembly_size, shader); } return assembly; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5e3500c0c9a..d861b2e85df 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -72,7 +72,7 @@ public: void *log_data, const struct brw_sampler_prog_key_data *key, struct brw_vue_prog_data *prog_data, - nir_shader *shader, + const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index); @@ -391,8 +391,6 @@ class vec4_generator { public: vec4_generator(const struct brw_compiler *compiler, void *log_data, - struct gl_shader_program *shader_prog, - struct gl_program *prog, struct brw_vue_prog_data *prog_data, void *mem_ctx, bool debug_flag, @@ -400,10 +398,11 @@ public: const char *stage_abbrev); ~vec4_generator(); - const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size); + const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size, + const nir_shader *nir); private: - void generate_code(const cfg_t *cfg); + void generate_code(const cfg_t *cfg, const nir_shader *nir); void generate_math1_gen4(vec4_instruction *inst, struct brw_reg dst, @@ -485,9 +484,6 @@ private: struct brw_codegen *p; - struct gl_shader_program *shader_prog; - const struct gl_program *prog; - struct brw_vue_prog_data *prog_data; void *mem_ctx; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 610caef7dce..db99ecba35a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -256,18 +256,6 @@ try_constant_propagate(const struct brw_device_info *devinfo, } static bool -can_change_source_types(vec4_instruction *inst) -{ - return inst->dst.type == inst->src[0].type && - !inst->src[0].abs && !inst->src[0].negate && !inst->saturate && - (inst->opcode == BRW_OPCODE_MOV || - (inst->opcode == BRW_OPCODE_SEL && - inst->dst.type == inst->src[1].type && - inst->predicate != BRW_PREDICATE_NONE && - !inst->src[1].abs && !inst->src[1].negate)); -} - -static bool try_copy_propagate(const struct brw_device_info *devinfo, vec4_instruction *inst, int arg, struct copy_entry *entry) @@ -325,7 +313,7 @@ try_copy_propagate(const struct brw_device_info *devinfo, if (has_source_modifiers && value.type != inst->src[arg].type && - !can_change_source_types(inst)) + !inst->can_change_types()) return false; if (has_source_modifiers && @@ -394,7 +382,7 @@ try_copy_propagate(const struct brw_device_info *devinfo, value.swizzle = composed_swizzle; if (has_source_modifiers && value.type != inst->src[arg].type) { - assert(can_change_source_types(inst)); + assert(inst->can_change_types()); for (int i = 0; i < 3; i++) { inst->src[i].type = value.type; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index dcacc900540..a84f6c47471 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -21,6 +21,7 @@ */ #include <ctype.h> +#include "glsl/glsl_parser_extras.h" #include "brw_vec4.h" #include "brw_cfg.h" @@ -137,15 +138,13 @@ vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i) vec4_generator::vec4_generator(const struct brw_compiler *compiler, void *log_data, - struct gl_shader_program *shader_prog, - struct gl_program *prog, struct brw_vue_prog_data *prog_data, void *mem_ctx, bool debug_flag, const char *stage_name, const char *stage_abbrev) : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), - shader_prog(shader_prog), prog(prog), prog_data(prog_data), + prog_data(prog_data), mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), debug_flag(debug_flag) { @@ -1142,7 +1141,7 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, } void -vec4_generator::generate_code(const cfg_t *cfg) +vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) { struct annotation_info annotation; memset(&annotation, 0, sizeof(annotation)); @@ -1648,14 +1647,10 @@ vec4_generator::generate_code(const cfg_t *cfg) int after_size = p->next_insn_offset; if (unlikely(debug_flag)) { - if (shader_prog) { - fprintf(stderr, "Native code for %s %s shader %d:\n", - shader_prog->Label ? shader_prog->Label : "unnamed", - stage_name, shader_prog->Name); - } else { - fprintf(stderr, "Native code for %s program %d:\n", stage_name, - prog->Id); - } + fprintf(stderr, "Native code for %s %s shader %s:\n", + nir->info.label ? nir->info.label : "unnamed", + _mesa_shader_stage_to_string(nir->stage), nir->info.name); + fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d" " bytes (%.0f%%)\n", stage_abbrev, @@ -1663,7 +1658,7 @@ vec4_generator::generate_code(const cfg_t *cfg) 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, - p->devinfo, prog); + p->devinfo); ralloc_free(annotation.ann); } @@ -1676,10 +1671,11 @@ vec4_generator::generate_code(const cfg_t *cfg) const unsigned * vec4_generator::generate_assembly(const cfg_t *cfg, - unsigned *assembly_size) + unsigned *assembly_size, + const nir_shader *nir) { brw_set_default_access_mode(p, BRW_ALIGN_16); - generate_code(cfg); + generate_code(cfg, nir); return brw_get_program(p, assembly_size); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 4ce471e0669..a715cf5a6cb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -30,14 +30,12 @@ #include "brw_vec4_gs_visitor.h" #include "gen6_gs_visitor.h" -const unsigned MAX_GS_INPUT_VERTICES = 6; - namespace brw { vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, - nir_shader *shader, + const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) @@ -598,32 +596,17 @@ vec4_gs_visitor::gs_end_primitive() emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); } -static const unsigned * -generate_assembly(struct brw_context *brw, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_vue_prog_data *prog_data, - void *mem_ctx, - const cfg_t *cfg, - unsigned *final_assembly_size) -{ - vec4_generator g(brw->intelScreen->compiler, brw, - shader_prog, prog, prog_data, mem_ctx, - INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); - return g.generate_assembly(cfg, final_assembly_size); -} - extern "C" const unsigned * -brw_gs_emit(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_gs_compile *c, - void *mem_ctx, - int shader_time_index, - unsigned *final_assembly_size) +brw_compile_gs(const struct brw_compiler *compiler, void *log_data, + struct brw_gs_compile *c, + const nir_shader *shader, + struct gl_shader_program *shader_prog, + void *mem_ctx, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str) { - struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - - if (brw->gen >= 7) { + if (compiler->devinfo->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use * dual object mode. @@ -632,13 +615,12 @@ brw_gs_emit(struct brw_context *brw, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(brw->intelScreen->compiler, brw, - c, shader->Program->nir, + vec4_gs_visitor v(compiler, log_data, c, shader, mem_ctx, true /* no_spills */, shader_time_index); if (v.run()) { - return generate_assembly(brw, prog, &c->gp->program.Base, - &c->prog_data.base, mem_ctx, v.cfg, - final_assembly_size); + vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx, + INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); + return g.generate_assembly(v.cfg, final_assembly_size, shader); } } } @@ -666,7 +648,7 @@ brw_gs_emit(struct brw_context *brw, * mode is more performant when invocations > 1. Gen6 only supports * SINGLE mode. */ - if (c->prog_data.invocations <= 1 || brw->gen < 7) + if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7) c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE; else c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; @@ -674,24 +656,22 @@ brw_gs_emit(struct brw_context *brw, vec4_gs_visitor *gs = NULL; const unsigned *ret = NULL; - if (brw->gen >= 7) - gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw, - c, shader->Program->nir, + if (compiler->devinfo->gen >= 7) + gs = new vec4_gs_visitor(compiler, log_data, c, shader, mem_ctx, false /* no_spills */, shader_time_index); else - gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw, - c, prog, shader->Program->nir, + gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader, mem_ctx, false /* no_spills */, shader_time_index); if (!gs->run()) { - prog->LinkStatus = false; - ralloc_strcat(&prog->InfoLog, gs->fail_msg); + if (error_str) + *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { - ret = generate_assembly(brw, prog, &c->gp->program.Base, - &c->prog_data.base, mem_ctx, gs->cfg, - final_assembly_size); + vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx, + INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); + ret = g.generate_assembly(gs->cfg, final_assembly_size, shader); } delete gs; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index 3ff195c3e68..c52552768c8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -32,36 +32,6 @@ #include "brw_vec4.h" -/** - * Scratch data used when compiling a GLSL geometry shader. - */ -struct brw_gs_compile -{ - struct brw_gs_prog_key key; - struct brw_gs_prog_data prog_data; - struct brw_vue_map input_vue_map; - - struct brw_geometry_program *gp; - - unsigned control_data_bits_per_vertex; - unsigned control_data_header_size_bits; -}; - -#ifdef __cplusplus -extern "C" { -#endif - -const unsigned *brw_gs_emit(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_gs_compile *c, - void *mem_ctx, - int shader_time_index, - unsigned *final_assembly_size); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - #ifdef __cplusplus namespace brw { @@ -71,7 +41,7 @@ public: vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, - nir_shader *shader, + const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index cc688ef8083..678237901f2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -291,15 +291,15 @@ vec4_visitor::calculate_live_intervals() struct block_data *bd = &live_intervals->block_data[block->num]; for (int i = 0; i < live_intervals->num_vars; i++) { - if (BITSET_TEST(bd->livein, i)) { - start[i] = MIN2(start[i], block->start_ip); - end[i] = MAX2(end[i], block->start_ip); - } + if (BITSET_TEST(bd->livein, i)) { + start[i] = MIN2(start[i], block->start_ip); + end[i] = MAX2(end[i], block->start_ip); + } - if (BITSET_TEST(bd->liveout, i)) { - start[i] = MIN2(start[i], block->end_ip); - end[i] = MAX2(end[i], block->end_ip); - } + if (BITSET_TEST(bd->liveout, i)) { + start[i] = MIN2(start[i], block->end_ip); + end[i] = MAX2(end[i], block->end_ip); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 41bd80df377..ea1e3e7bbcf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -423,10 +423,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_get_buffer_size: { nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); - unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0; + unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0; - src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start + - ubo_index); + src_reg surf_index = src_reg(prog_data->base.binding_table.ssbo_start + + ssbo_index); dst_reg result_dst = get_nir_dest(instr->dest); vec4_instruction *inst = new(mem_ctx) vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst); @@ -456,18 +456,18 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[1]); if (const_uniform_block) { - unsigned index = prog_data->base.binding_table.ubo_start + + unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = src_reg(index); brw_mark_surface_used(&prog_data->base, index); } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[1], 1), - src_reg(prog_data->base.binding_table.ubo_start))); + src_reg(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + + prog_data->base.binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -599,7 +599,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg surf_index; if (const_uniform_block) { - unsigned index = prog_data->base.binding_table.ubo_start + + unsigned index = prog_data->base.binding_table.ssbo_start + const_uniform_block->u[0]; surf_index = src_reg(index); @@ -607,14 +607,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } else { surf_index = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surf_index), get_nir_src(instr->src[0], 1), - src_reg(prog_data->base.binding_table.ubo_start))); + src_reg(prog_data->base.binding_table.ssbo_start))); surf_index = emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. */ brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + + prog_data->base.binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -645,17 +645,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_ssbo_atomic_add: nir_emit_ssbo_atomic(BRW_AOP_ADD, instr); break; - case nir_intrinsic_ssbo_atomic_min: - if (dest.type == BRW_REGISTER_TYPE_D) - nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr); - else - nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr); + case nir_intrinsic_ssbo_atomic_imin: + nir_emit_ssbo_atomic(BRW_AOP_IMIN, instr); + break; + case nir_intrinsic_ssbo_atomic_umin: + nir_emit_ssbo_atomic(BRW_AOP_UMIN, instr); break; - case nir_intrinsic_ssbo_atomic_max: - if (dest.type == BRW_REGISTER_TYPE_D) - nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr); - else - nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr); + case nir_intrinsic_ssbo_atomic_imax: + nir_emit_ssbo_atomic(BRW_AOP_IMAX, instr); + break; + case nir_intrinsic_ssbo_atomic_umax: + nir_emit_ssbo_atomic(BRW_AOP_UMAX, instr); break; case nir_intrinsic_ssbo_atomic_and: nir_emit_ssbo_atomic(BRW_AOP_AND, instr); @@ -765,7 +765,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.ubo_start + - nir->info.num_ssbos - 1); + nir->info.num_ubos - 1); } unsigned const_offset = instr->const_index[0]; @@ -821,20 +821,20 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) src_reg surface; nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]); if (const_surface) { - unsigned surf_index = prog_data->base.binding_table.ubo_start + + unsigned surf_index = prog_data->base.binding_table.ssbo_start + const_surface->u[0]; surface = src_reg(surf_index); brw_mark_surface_used(&prog_data->base, surf_index); } else { surface = src_reg(this, glsl_type::uint_type); emit(ADD(dst_reg(surface), get_nir_src(instr->src[0]), - src_reg(prog_data->base.binding_table.ubo_start))); + src_reg(prog_data->base.binding_table.ssbo_start))); /* Assume this may touch any UBO. This is the same we do for other * UBO/SSBO accesses with non-constant surface. */ brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + + prog_data->base.binding_table.ssbo_start + nir->info.num_ssbos - 1); } @@ -1237,14 +1237,8 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_b2i: - emit(AND(dst, op[0], src_reg(1))); - break; - case nir_op_b2f: - op[0].type = BRW_REGISTER_TYPE_D; - dst.type = BRW_REGISTER_TYPE_D; - emit(AND(dst, op[0], src_reg(0x3f800000u))); - dst.type = BRW_REGISTER_TYPE_F; + emit(MOV(dst, negate(op[0]))); break; case nir_op_f2b: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 98ea9be6ee4..5be9c6a6b2d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1815,7 +1815,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, void *log_data, const struct brw_sampler_prog_key_data *key_tex, struct brw_vue_prog_data *prog_data, - nir_shader *shader, + const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index b6e1971c2ee..485a80ee2fc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -301,7 +301,7 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, - nir_shader *shader, + const nir_shader *shader, gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 38de98fab86..ba680a98f7e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -31,6 +31,7 @@ #include "main/compiler.h" +#include "main/context.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -57,18 +58,6 @@ brw_codegen_vs_prog(struct brw_context *brw, bool start_busy = false; double start_time = 0; - if (!vp->program.Base.nir) { - /* Normally we generate NIR in LinkShader() or - * ProgramStringNotify(), but Mesa's fixed-function vertex program - * handling doesn't notify the driver at all. Just do it here, at - * the last minute, even though it's lame. - */ - assert(vp->program.Base.Id == 0 && prog == NULL); - vp->program.Base.nir = - brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX, - brw->intelScreen->compiler->scalar_vs); - } - if (prog) vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; @@ -171,7 +160,7 @@ brw_codegen_vs_prog(struct brw_context *brw, } if (unlikely(INTEL_DEBUG & DEBUG_VS)) - brw_dump_ir("vertex", prog, &vs->base, &vp->program.Base); + brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base); int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) @@ -179,9 +168,20 @@ brw_codegen_vs_prog(struct brw_context *brw, /* Emit GEN4 code. */ - program = brw_vs_emit(brw, mem_ctx, key, &prog_data, - &vp->program, prog, st_index, &program_size); + char *error_str; + program = brw_compile_vs(brw->intelScreen->compiler, brw, mem_ctx, key, + &prog_data, vp->program.Base.nir, + brw_select_clip_planes(&brw->ctx), + !_mesa_is_gles3(&brw->ctx), + st_index, &program_size, &error_str); if (program == NULL) { + if (prog) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, error_str); + } + + _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str); + ralloc_free(mem_ctx); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index f1242f61b33..bcb5e7b0b2a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -54,14 +54,6 @@ extern "C" { #endif -const unsigned *brw_vs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_vs_prog_key *key, - struct brw_vs_prog_data *prog_data, - struct gl_vertex_program *vp, - struct gl_shader_program *shader_prog, - int shader_time_index, - unsigned *program_size); void brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_vs_prog_key *key); @@ -88,7 +80,7 @@ public: void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, - nir_shader *shader, + const nir_shader *shader, gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 9bb48eb2e27..f65258a52a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -201,7 +201,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw) ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; if (prog) { - /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], &brw->vs.base, &brw->vs.prog_data->base.base); } @@ -209,6 +209,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_vs_image_surfaces = { .dirty = { + .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | BRW_NEW_IMAGE_UNITS | BRW_NEW_VERTEX_PROGRAM | diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 4d5e7f67bd6..5c49db9e63e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -39,89 +39,6 @@ #include "util/ralloc.h" -/** - * Return a bitfield where bit n is set if barycentric interpolation mode n - * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. - */ -static unsigned -brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, - bool shade_model_flat, - bool persample_shading, - nir_shader *shader) -{ - unsigned barycentric_interp_modes = 0; - - nir_foreach_variable(var, &shader->inputs) { - enum glsl_interp_qualifier interp_qualifier = var->data.interpolation; - bool is_centroid = var->data.centroid && !persample_shading; - bool is_sample = var->data.sample || persample_shading; - bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) || - (var->data.location == VARYING_SLOT_COL1); - - /* Ignore WPOS and FACE, because they don't require interpolation. */ - if (var->data.location == VARYING_SLOT_POS || - var->data.location == VARYING_SLOT_FACE) - continue; - - /* Determine the set (or sets) of barycentric coordinates needed to - * interpolate this variable. Note that when - * brw->needs_unlit_centroid_workaround is set, centroid interpolation - * uses PIXEL interpolation for unlit pixels and CENTROID interpolation - * for lit pixels, so we need both sets of barycentric coordinates. - */ - if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) { - if (is_centroid) { - barycentric_interp_modes |= - 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; - } else if (is_sample) { - barycentric_interp_modes |= - 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC; - } - if ((!is_centroid && !is_sample) || - devinfo->needs_unlit_centroid_workaround) { - barycentric_interp_modes |= - 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; - } - } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH || - (!(shade_model_flat && is_gl_Color) && - interp_qualifier == INTERP_QUALIFIER_NONE)) { - if (is_centroid) { - barycentric_interp_modes |= - 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; - } else if (is_sample) { - barycentric_interp_modes |= - 1 << BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC; - } - if ((!is_centroid && !is_sample) || - devinfo->needs_unlit_centroid_workaround) { - barycentric_interp_modes |= - 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; - } - } - } - - return barycentric_interp_modes; -} - -static uint8_t -computed_depth_mode(struct gl_fragment_program *fp) -{ - if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - switch (fp->FragDepthLayout) { - case FRAG_DEPTH_LAYOUT_NONE: - case FRAG_DEPTH_LAYOUT_ANY: - return BRW_PSCDEPTH_ON; - case FRAG_DEPTH_LAYOUT_GREATER: - return BRW_PSCDEPTH_ON_GE; - case FRAG_DEPTH_LAYOUT_LESS: - return BRW_PSCDEPTH_ON_LE; - case FRAG_DEPTH_LAYOUT_UNCHANGED: - return BRW_PSCDEPTH_OFF; - } - } - return BRW_PSCDEPTH_OFF; -} - static void assign_fs_binding_table_offsets(const struct brw_device_info *devinfo, const struct gl_shader_program *shader_prog, @@ -166,15 +83,6 @@ brw_codegen_wm_prog(struct brw_context *brw, fs = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; memset(&prog_data, 0, sizeof(prog_data)); - /* key->alpha_test_func means simulating alpha testing via discards, - * so the shader definitely kills pixels. - */ - prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func; - prog_data.uses_omask = - fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); - prog_data.computed_depth_mode = computed_depth_mode(&fp->program); - - prog_data.early_fragment_tests = fs && fs->base.EarlyFragmentTests; /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (!prog) @@ -209,12 +117,6 @@ brw_codegen_wm_prog(struct brw_context *brw, &prog_data.base); } - prog_data.barycentric_interp_modes = - brw_compute_barycentric_interp_modes(brw->intelScreen->devinfo, - key->flat_shade, - key->persample_shading, - fp->program.Base.nir); - if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo)); @@ -222,7 +124,7 @@ brw_codegen_wm_prog(struct brw_context *brw, } if (unlikely(INTEL_DEBUG & DEBUG_WM)) - brw_dump_ir("fragment", prog, &fs->base, &fp->program.Base); + brw_dump_ir("fragment", prog, fs ? &fs->base : NULL, &fp->program.Base); int st_index8 = -1, st_index16 = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) { @@ -230,9 +132,19 @@ brw_codegen_wm_prog(struct brw_context *brw, st_index16 = brw_get_shader_time_index(brw, prog, &fp->program.Base, ST_FS16); } - program = brw_wm_fs_emit(brw, mem_ctx, key, &prog_data, - &fp->program, prog, st_index8, st_index16, &program_size); + char *error_str = NULL; + program = brw_compile_fs(brw->intelScreen->compiler, brw, mem_ctx, + key, &prog_data, fp->program.Base.nir, + &fp->program.Base, st_index8, st_index16, + brw->use_rep_send, &program_size, &error_str); if (program == NULL) { + if (prog) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, error_str); + } + + _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str); + ralloc_free(mem_ctx); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 6ee22b2f907..53a642ee8bb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -61,21 +61,6 @@ extern "C" { #endif -/** - * Compile a fragment shader. - * - * Returns the final assembly and the program's size. - */ -const unsigned *brw_wm_fs_emit(struct brw_context *brw, - void *mem_ctx, - const struct brw_wm_prog_key *key, - struct brw_wm_prog_data *prog_data, - struct gl_fragment_program *fp, - struct gl_shader_program *prog, - int shader_time_index8, - int shader_time_index16, - unsigned *final_assembly_size); - GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c671e23827e..6ebe6481c32 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -34,6 +34,7 @@ #include "main/blend.h" #include "main/mtypes.h" #include "main/samplerobj.h" +#include "main/shaderimage.h" #include "program/prog_parameter.h" #include "main/framebuffer.h" @@ -925,54 +926,53 @@ brw_upload_ubo_surfaces(struct brw_context *brw, if (!shader) return; - uint32_t *surf_offsets = + uint32_t *ubo_surf_offsets = &stage_state->surf_offset[prog_data->binding_table.ubo_start]; for (int i = 0; i < shader->NumUniformBlocks; i++) { - struct intel_buffer_object *intel_bo; + struct gl_uniform_buffer_binding *binding = + &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding]; - /* Because behavior for referencing outside of the binding's size in the - * glBindBufferRange case is undefined, we can just bind the whole buffer - * glBindBufferBase wants and be a correct implementation. - */ - if (!shader->UniformBlocks[i].IsShaderStorage) { - struct gl_uniform_buffer_binding *binding; - binding = - &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding]; - if (binding->BufferObject == ctx->Shared->NullBufferObj) { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]); - } else { - intel_bo = intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = - intel_bufferobj_buffer(brw, intel_bo, - binding->Offset, - binding->BufferObject->Size - binding->Offset); - brw_create_constant_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, - &surf_offsets[i], - dword_pitch); - } + if (binding->BufferObject == ctx->Shared->NullBufferObj) { + brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]); } else { - struct gl_shader_storage_buffer_binding *binding; - binding = - &ctx->ShaderStorageBufferBindings[shader->UniformBlocks[i].Binding]; - if (binding->BufferObject == ctx->Shared->NullBufferObj) { - brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &surf_offsets[i]); - } else { - intel_bo = intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = - intel_bufferobj_buffer(brw, intel_bo, - binding->Offset, - binding->BufferObject->Size - binding->Offset); - brw_create_buffer_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, - &surf_offsets[i], - dword_pitch); - } + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = + intel_bufferobj_buffer(brw, intel_bo, + binding->Offset, + binding->BufferObject->Size - binding->Offset); + brw_create_constant_surface(brw, bo, binding->Offset, + binding->BufferObject->Size - binding->Offset, + &ubo_surf_offsets[i], + dword_pitch); + } + } + + uint32_t *ssbo_surf_offsets = + &stage_state->surf_offset[prog_data->binding_table.ssbo_start]; + + for (int i = 0; i < shader->NumShaderStorageBlocks; i++) { + struct gl_shader_storage_buffer_binding *binding = + &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding]; + + if (binding->BufferObject == ctx->Shared->NullBufferObj) { + brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]); + } else { + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = + intel_bufferobj_buffer(brw, intel_bo, + binding->Offset, + binding->BufferObject->Size - binding->Offset); + brw_create_buffer_surface(brw, bo, binding->Offset, + binding->BufferObject->Size - binding->Offset, + &ssbo_surf_offsets[i], + dword_pitch); } } - if (shader->NumUniformBlocks) + if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks) brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } @@ -1112,7 +1112,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw) ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; if (prog) { - /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], &brw->cs.base, &brw->cs.prog_data->base); } @@ -1120,7 +1120,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_cs_image_surfaces = { .dirty = { - .mesa = _NEW_PROGRAM, + .mesa = _NEW_TEXTURE | _NEW_PROGRAM, .brw = BRW_NEW_BATCH | BRW_NEW_CS_PROG_DATA | BRW_NEW_IMAGE_UNITS @@ -1253,7 +1253,7 @@ update_image_surface(struct brw_context *brw, uint32_t *surf_offset, struct brw_image_param *param) { - if (u->_Valid) { + if (_mesa_is_image_unit_valid(&brw->ctx, u)) { struct gl_texture_object *obj = u->TexObj; const unsigned format = get_image_format(brw, u->_ActualFormat, access); @@ -1338,7 +1338,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw) struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; if (prog) { - /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */ + /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */ brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], &brw->wm.base, &brw->wm.prog_data->base); } @@ -1346,6 +1346,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_wm_image_surfaces = { .dirty = { + .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 59a76559103..671a535a5bd 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -31,8 +31,6 @@ #include "gen6_gs_visitor.h" -const unsigned MAX_GS_INPUT_VERTICES = 6; - namespace brw { void diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index e75d6aa10b8..d02c67d8a74 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -39,7 +39,7 @@ public: void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, - nir_shader *shader, + const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) : diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 497ecec8e45..8d6d3fe1d34 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -59,9 +59,7 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | - (brw->is_haswell && prog_data->base.nr_image_params ? - HSW_GS_UAV_ACCESS_ENABLE : 0)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index b7e48585482..a18dc697651 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -126,9 +126,7 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | - (brw->is_haswell && prog_data->base.nr_image_params ? - HSW_VS_UAV_ACCESS_ENABLE : 0)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index fd6dab5be8b..06d5e65786b 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -113,7 +113,14 @@ upload_wm_state(struct brw_context *brw) else if (prog_data->base.nr_image_params) dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; - /* _NEW_BUFFERS | _NEW_COLOR */ + /* The "UAV access enable" bits are unnecessary on HSW because they only + * seem to have an effect on the HW-assisted coherency mechanism which we + * don't need, and the rasterization-related UAV_ONLY flag and the + * DISPATCH_ENABLE bit can be set independently from it. + * C.f. gen8_upload_ps_extra(). + * + * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR + */ if (brw->is_haswell && !(brw_color_buffer_write_enabled(brw) || writes_depth) && prog_data->base.nr_image_params) @@ -221,9 +228,6 @@ gen7_upload_ps_state(struct brw_context *brw, _mesa_get_min_invocations_per_fragment(ctx, fp, false); assert(min_inv_per_frag >= 1); - if (brw->is_haswell && prog_data->base.nr_image_params) - dw4 |= HSW_PS_UAV_ACCESS_ENABLE; - if (prog_data->prog_offset_16 || prog_data->no_8) { dw4 |= GEN7_PS_16_DISPATCH_ENABLE; if (!prog_data->no_8 && min_inv_per_frag == 1) { diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 4195f4cf4a7..d766ca7bebf 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -52,9 +52,7 @@ gen8_upload_gs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | - (prog_data->base.nr_image_params ? - HSW_GS_UAV_ACCESS_ENABLE : 0)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index a686fed704f..8f0507413a7 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -25,6 +25,7 @@ #include "program/program.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_wm.h" #include "intel_batchbuffer.h" void @@ -65,8 +66,33 @@ gen8_upload_ps_extra(struct brw_context *brw, if (brw->gen >= 9 && prog_data->pulls_bary) dw1 |= GEN9_PSX_SHADER_PULLS_BARY; - if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) || - prog_data->base.nr_image_params) + /* The stricter cross-primitive coherency guarantees that the hardware + * gives us with the "Accesses UAV" bit set for at least one shader stage + * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are + * redundant within the current image, atomic counter and SSBO GL APIs, + * which all have very loose ordering and coherency requirements and + * generally rely on the application to insert explicit barriers when a + * shader invocation is expected to see the memory writes performed by the + * invocations of some previous primitive. Regardless of the value of "UAV + * coherency required", the "Accesses UAV" bits will implicitly cause an in + * most cases useless DC flush when the lowermost stage with the bit set + * finishes execution. + * + * It would be nice to disable it, but in some cases we can't because on + * Gen8+ it also has an influence on rasterization via the PS UAV-only + * signal (which could be set independently from the coherency mechanism in + * the 3DSTATE_WM command on Gen7), and because in some cases it will + * determine whether the hardware skips execution of the fragment shader or + * not via the ThreadDispatchEnable signal. However if we know that + * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and + * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any + * difference so we may just disable it here. + * + * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR + */ + if ((_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) || + prog_data->base.nr_image_params) && + !brw_color_buffer_write_enabled(brw)) dw1 |= GEN8_PSX_SHADER_HAS_UAV; BEGIN_BATCH(2); @@ -91,7 +117,7 @@ upload_ps_extra(struct brw_context *brw) const struct brw_tracked_state gen8_ps_extra = { .dirty = { - .mesa = 0, + .mesa = _NEW_BUFFERS | _NEW_COLOR, .brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index e1e7704655d..18b86652fd2 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -221,8 +221,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw, * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN * 16 must be used." */ - assert(brw->gen < 9 || mt->halign == 16); - assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16); + if (brw->gen >= 9 || mt->num_samples == 1) + assert(mt->halign == 16); } const uint32_t surf_type = translate_tex_target(target); @@ -470,8 +470,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN * 16 must be used." */ - assert(brw->gen < 9 || mt->halign == 16); - assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16); + if (brw->gen >= 9 || mt->num_samples == 1) + assert(mt->halign == 16); } uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 8b5048bee7e..28f5adddf14 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -53,9 +53,7 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | - (prog_data->base.nr_image_params ? - HSW_VS_UAV_ACCESS_ENABLE : 0)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c index bb8bb8d38c9..b3d6324a5fe 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c @@ -33,8 +33,7 @@ void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, - const struct brw_device_info *devinfo, - const struct gl_program *prog) + const struct brw_device_info *devinfo) { const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; @@ -57,19 +56,7 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation last_annotation_ir = annotation[i].ir; if (last_annotation_ir) { fprintf(stderr, " "); - if (prog->nir) - nir_print_instr(annotation[i].ir, stderr); - else if (!prog->Instructions) - fprint_ir(stderr, annotation[i].ir); - else { - const struct prog_instruction *pi = - (const struct prog_instruction *)annotation[i].ir; - fprintf(stderr, "%d: ", - (int)(pi - prog->Instructions)); - _mesa_fprint_instruction_opt(stderr, - pi, - 0, PROG_PRINT_DEBUG, NULL); - } + nir_print_instr(annotation[i].ir, stderr); fprintf(stderr, "\n"); } } diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.h b/src/mesa/drivers/dri/i965/intel_asm_annotation.h index d9c69bc41b0..6c72326f058 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.h +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.h @@ -60,8 +60,7 @@ struct annotation_info { void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, - const struct brw_device_info *devinfo, - const struct gl_program *prog); + const struct brw_device_info *devinfo); void annotate(const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index a169c41790e..b6e35205727 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -201,6 +201,14 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, if (brw->gen < 7) return false; + if (brw->gen >= 9) { + /* FINISHME: Enable singlesample fast MCS clears on SKL after all GPU + * FINISHME: hangs are resolved. + */ + perf_debug("singlesample fast MCS clears disabled on gen9"); + return false; + } + if (mt->disable_aux_buffers) return false; diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 8adb626d420..5f80f90a91d 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -66,7 +66,7 @@ void cmod_propagation_test::SetUp() v = new cmod_propagation_fs_visitor(compiler, prog_data, shader); - _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); + _mesa_init_gl_program(&fp->program.Base, GL_FRAGMENT_SHADER, 0); devinfo->gen = 4; } diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index f77b18e7db8..32e8b8f8867 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -66,7 +66,7 @@ void saturate_propagation_test::SetUp() v = new saturate_propagation_fs_visitor(compiler, prog_data, shader); - _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); + _mesa_init_gl_program(&fp->program.Base, GL_FRAGMENT_SHADER, 0); devinfo->gen = 4; } diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 40253961a65..e80b71b558d 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -98,7 +98,7 @@ void copy_propagation_test::SetUp() v = new copy_propagation_vec4_visitor(compiler, shader); - _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); + _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); devinfo->gen = 4; } diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index 76028d36311..2f824617454 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -101,7 +101,7 @@ void register_coalesce_test::SetUp() v = new register_coalesce_vec4_visitor(compiler, shader); - _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); + _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); devinfo->gen = 4; } diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index d43eaf977fc..628c5708090 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -1200,18 +1200,19 @@ r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog) static struct gl_program * r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id) { - struct r200_vertex_program *vp; - switch(target){ - case GL_VERTEX_PROGRAM_ARB: - vp = CALLOC_STRUCT(r200_vertex_program); - return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); - case GL_FRAGMENT_PROGRAM_ARB: - return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id ); + case GL_VERTEX_PROGRAM_ARB: { + struct r200_vertex_program *vp = CALLOC_STRUCT(r200_vertex_program); + return _mesa_init_gl_program(&vp->mesa_program.Base, target, id); + } + case GL_FRAGMENT_PROGRAM_ARB: { + struct gl_fragment_program *prog = CALLOC_STRUCT(gl_fragment_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } default: _mesa_problem(ctx, "Bad target in r200NewProgram"); + return NULL; } - return NULL; } diff --git a/src/mesa/drivers/x11/SConscript b/src/mesa/drivers/x11/SConscript index d29f9874f44..cd5cccda0d1 100644 --- a/src/mesa/drivers/x11/SConscript +++ b/src/mesa/drivers/x11/SConscript @@ -4,6 +4,8 @@ env = env.Clone() env.Append(CPPPATH = [ '#/src', + '#/src/glsl', + '#/src/glsl/nir', '#/src/mapi', '#/src/mesa', '#/src/mesa/main', diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c index dee5e29d5b8..20aa4980935 100644 --- a/src/mesa/main/blend.c +++ b/src/mesa/main/blend.c @@ -190,6 +190,19 @@ update_uses_dual_src(struct gl_context *ctx, int buf) blend_factor_is_dual_src(ctx->Color.Blend[buf].DstA)); } + +/** + * Return the number of per-buffer blend states to update in + * glBlendFunc, glBlendFuncSeparate, glBlendEquation, etc. + */ +static inline unsigned +num_buffers(const struct gl_context *ctx) +{ + return ctx->Extensions.ARB_draw_buffers_blend + ? ctx->Const.MaxDrawBuffers : 1; +} + + /** * Set the separate blend source/dest factors for all draw buffers. * @@ -202,9 +215,10 @@ void GLAPIENTRY _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA ) { - GLuint buf, numBuffers; - GLboolean changed; GET_CURRENT_CONTEXT(ctx); + const unsigned numBuffers = num_buffers(ctx); + unsigned buf; + bool changed = false; if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendFuncSeparate %s %s %s %s\n", @@ -213,28 +227,38 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB, _mesa_enum_to_string(sfactorA), _mesa_enum_to_string(dfactorA)); - if (!validate_blend_factors(ctx, "glBlendFuncSeparate", - sfactorRGB, dfactorRGB, - sfactorA, dfactorA)) { - return; + /* Check if we're really changing any state. If not, return early. */ + if (ctx->Color._BlendFuncPerBuffer) { + /* Check all per-buffer states */ + for (buf = 0; buf < numBuffers; buf++) { + if (ctx->Color.Blend[buf].SrcRGB != sfactorRGB || + ctx->Color.Blend[buf].DstRGB != dfactorRGB || + ctx->Color.Blend[buf].SrcA != sfactorA || + ctx->Color.Blend[buf].DstA != dfactorA) { + changed = true; + break; + } + } } - - numBuffers = ctx->Extensions.ARB_draw_buffers_blend - ? ctx->Const.MaxDrawBuffers : 1; - - changed = GL_FALSE; - for (buf = 0; buf < numBuffers; buf++) { - if (ctx->Color.Blend[buf].SrcRGB != sfactorRGB || - ctx->Color.Blend[buf].DstRGB != dfactorRGB || - ctx->Color.Blend[buf].SrcA != sfactorA || - ctx->Color.Blend[buf].DstA != dfactorA) { - changed = GL_TRUE; - break; + else { + /* only need to check 0th per-buffer state */ + if (ctx->Color.Blend[0].SrcRGB != sfactorRGB || + ctx->Color.Blend[0].DstRGB != dfactorRGB || + ctx->Color.Blend[0].SrcA != sfactorA || + ctx->Color.Blend[0].DstA != dfactorA) { + changed = true; } } + if (!changed) return; + if (!validate_blend_factors(ctx, "glBlendFuncSeparate", + sfactorRGB, dfactorRGB, + sfactorA, dfactorA)) { + return; + } + FLUSH_VERTICES(ctx, _NEW_COLOR); for (buf = 0; buf < numBuffers; buf++) { @@ -242,8 +266,13 @@ _mesa_BlendFuncSeparate( GLenum sfactorRGB, GLenum dfactorRGB, ctx->Color.Blend[buf].DstRGB = dfactorRGB; ctx->Color.Blend[buf].SrcA = sfactorA; ctx->Color.Blend[buf].DstA = dfactorA; - update_uses_dual_src(ctx, buf); } + + update_uses_dual_src(ctx, 0); + for (buf = 1; buf < numBuffers; buf++) { + ctx->Color.Blend[buf]._UsesDualSrc = ctx->Color.Blend[0]._UsesDualSrc; + } + ctx->Color._BlendFuncPerBuffer = GL_FALSE; if (ctx->Driver.BlendFuncSeparate) { @@ -283,18 +312,18 @@ _mesa_BlendFuncSeparateiARB(GLuint buf, GLenum sfactorRGB, GLenum dfactorRGB, return; } - if (!validate_blend_factors(ctx, "glBlendFuncSeparatei", - sfactorRGB, dfactorRGB, - sfactorA, dfactorA)) { - return; - } - if (ctx->Color.Blend[buf].SrcRGB == sfactorRGB && ctx->Color.Blend[buf].DstRGB == dfactorRGB && ctx->Color.Blend[buf].SrcA == sfactorA && ctx->Color.Blend[buf].DstA == dfactorA) return; /* no change */ + if (!validate_blend_factors(ctx, "glBlendFuncSeparatei", + sfactorRGB, dfactorRGB, + sfactorA, dfactorA)) { + return; + } + FLUSH_VERTICES(ctx, _NEW_COLOR); ctx->Color.Blend[buf].SrcRGB = sfactorRGB; @@ -331,34 +360,43 @@ legal_blend_equation(const struct gl_context *ctx, GLenum mode) void GLAPIENTRY _mesa_BlendEquation( GLenum mode ) { - GLuint buf, numBuffers; - GLboolean changed; GET_CURRENT_CONTEXT(ctx); + const unsigned numBuffers = num_buffers(ctx); + unsigned buf; + bool changed = false; if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendEquation(%s)\n", _mesa_enum_to_string(mode)); - if (!legal_blend_equation(ctx, mode)) { - _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation"); - return; + if (ctx->Color._BlendEquationPerBuffer) { + /* Check all per-buffer states */ + for (buf = 0; buf < numBuffers; buf++) { + if (ctx->Color.Blend[buf].EquationRGB != mode || + ctx->Color.Blend[buf].EquationA != mode) { + changed = true; + break; + } + } } - - numBuffers = ctx->Extensions.ARB_draw_buffers_blend - ? ctx->Const.MaxDrawBuffers : 1; - - changed = GL_FALSE; - for (buf = 0; buf < numBuffers; buf++) { - if (ctx->Color.Blend[buf].EquationRGB != mode || - ctx->Color.Blend[buf].EquationA != mode) { - changed = GL_TRUE; - break; + else { + /* only need to check 0th per-buffer state */ + if (ctx->Color.Blend[0].EquationRGB != mode || + ctx->Color.Blend[0].EquationA != mode) { + changed = true; } } + if (!changed) return; + if (!legal_blend_equation(ctx, mode)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glBlendEquation"); + return; + } + FLUSH_VERTICES(ctx, _NEW_COLOR); + for (buf = 0; buf < numBuffers; buf++) { ctx->Color.Blend[buf].EquationRGB = mode; ctx->Color.Blend[buf].EquationA = mode; @@ -383,7 +421,7 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode) buf, _mesa_enum_to_string(mode)); if (buf >= ctx->Const.MaxDrawBuffers) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBlendFuncSeparatei(buffer=%u)", + _mesa_error(ctx, GL_INVALID_VALUE, "glBlendEquationi(buffer=%u)", buf); return; } @@ -407,15 +445,37 @@ _mesa_BlendEquationiARB(GLuint buf, GLenum mode) void GLAPIENTRY _mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA ) { - GLuint buf, numBuffers; - GLboolean changed; GET_CURRENT_CONTEXT(ctx); + const unsigned numBuffers = num_buffers(ctx); + unsigned buf; + bool changed = false; if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glBlendEquationSeparateEXT(%s %s)\n", _mesa_enum_to_string(modeRGB), _mesa_enum_to_string(modeA)); + if (ctx->Color._BlendEquationPerBuffer) { + /* Check all per-buffer states */ + for (buf = 0; buf < numBuffers; buf++) { + if (ctx->Color.Blend[buf].EquationRGB != modeRGB || + ctx->Color.Blend[buf].EquationA != modeA) { + changed = true; + break; + } + } + } + else { + /* only need to check 0th per-buffer state */ + if (ctx->Color.Blend[0].EquationRGB != modeRGB || + ctx->Color.Blend[0].EquationA != modeA) { + changed = true; + } + } + + if (!changed) + return; + if ( (modeRGB != modeA) && !ctx->Extensions.EXT_blend_equation_separate ) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBlendEquationSeparateEXT not supported by driver"); @@ -432,21 +492,8 @@ _mesa_BlendEquationSeparate( GLenum modeRGB, GLenum modeA ) return; } - numBuffers = ctx->Extensions.ARB_draw_buffers_blend - ? ctx->Const.MaxDrawBuffers : 1; - - changed = GL_FALSE; - for (buf = 0; buf < numBuffers; buf++) { - if (ctx->Color.Blend[buf].EquationRGB != modeRGB || - ctx->Color.Blend[buf].EquationA != modeA) { - changed = GL_TRUE; - break; - } - } - if (!changed) - return; - FLUSH_VERTICES(ctx, _NEW_COLOR); + for (buf = 0; buf < numBuffers; buf++) { ctx->Color.Blend[buf].EquationRGB = modeRGB; ctx->Color.Blend[buf].EquationA = modeA; diff --git a/src/mesa/main/es1_conversion.c b/src/mesa/main/es1_conversion.c index b254a6ef1c7..1dfe8278e71 100644 --- a/src/mesa/main/es1_conversion.c +++ b/src/mesa/main/es1_conversion.c @@ -1,3 +1,4 @@ + #include <stdbool.h> #include "api_loopback.h" @@ -326,7 +327,24 @@ _mesa_GetTexEnvxv(GLenum target, GLenum pname, GLfixed *params) } break; case GL_TEXTURE_ENV: - if (pname != GL_TEXTURE_ENV_COLOR && pname != GL_RGB_SCALE && pname != GL_ALPHA_SCALE && pname != GL_TEXTURE_ENV_MODE && pname != GL_COMBINE_RGB && pname != GL_COMBINE_ALPHA && pname != GL_SRC0_RGB && pname != GL_SRC1_RGB && pname != GL_SRC2_RGB && pname != GL_SRC0_ALPHA && pname != GL_SRC1_ALPHA && pname != GL_SRC2_ALPHA && pname != GL_OPERAND0_RGB && pname != GL_OPERAND1_RGB && pname != GL_OPERAND2_RGB && pname != GL_OPERAND0_ALPHA && pname != GL_OPERAND1_ALPHA && pname != GL_OPERAND2_ALPHA) { + if (pname != GL_TEXTURE_ENV_COLOR && + pname != GL_RGB_SCALE && + pname != GL_ALPHA_SCALE && + pname != GL_TEXTURE_ENV_MODE && + pname != GL_COMBINE_RGB && + pname != GL_COMBINE_ALPHA && + pname != GL_SRC0_RGB && + pname != GL_SRC1_RGB && + pname != GL_SRC2_RGB && + pname != GL_SRC0_ALPHA && + pname != GL_SRC1_ALPHA && + pname != GL_SRC2_ALPHA && + pname != GL_OPERAND0_RGB && + pname != GL_OPERAND1_RGB && + pname != GL_OPERAND2_RGB && + pname != GL_OPERAND0_ALPHA && + pname != GL_OPERAND1_ALPHA && + pname != GL_OPERAND2_ALPHA) { _mesa_error(_mesa_get_current_context(), GL_INVALID_ENUM, "glGetTexEnvxv(target=0x%x)", target); return; diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index e4e2a18c1da..e63d0f1ec55 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -40,7 +40,7 @@ #include "glsl/ir_optimization.h" #include "glsl/glsl_parser_extras.h" #include "glsl/glsl_symbol_table.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "program/ir_to_mesa.h" #include "program/program.h" #include "program/programopt.h" @@ -975,13 +975,11 @@ static void load_texture( texenv_fragment_program *p, GLuint unit ) ir_var_uniform); p->top_instructions->push_head(sampler); - /* Set the texture unit for this sampler. The linker will pick this value - * up and do-the-right-thing. - * - * NOTE: The cast to int is important. Without it, the constant will have - * type uint, and things later on may get confused. + /* Set the texture unit for this sampler in the same way that + * layout(binding=X) would. */ - sampler->constant_value = new(p->mem_ctx) ir_constant(int(unit)); + sampler->data.explicit_binding = true; + sampler->data.binding = unit; deref = new(p->mem_ctx) ir_dereference_variable(sampler); tex->set_sampler(deref, glsl_type::vec4_type); diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index a6183b47e2e..34cc9218add 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -1690,11 +1690,10 @@ _mesa_get_fixed_func_vertex_program(struct gl_context *ctx) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); -#if 0 if (ctx->Driver.ProgramStringNotify) ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, &prog->Base ); -#endif + _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key, sizeof(key), &prog->Base); } diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h index 618f43d0aaa..378997b38b2 100644 --- a/src/mesa/main/format_utils.h +++ b/src/mesa/main/format_utils.h @@ -34,6 +34,7 @@ #include "imports.h" #include "macros.h" #include "util/rounding.h" +#include "util/half_float.h" extern const mesa_array_format RGBA32_FLOAT; extern const mesa_array_format RGBA8_UBYTE; diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 350e6752c8b..230ebbc67f4 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -307,154 +307,6 @@ _mesa_bitcount_64(uint64_t n) } #endif - -/** - * Convert a 4-byte float to a 2-byte half float. - * - * Not all float32 values can be represented exactly as a float16 value. We - * round such intermediate float32 values to the nearest float16. When the - * float32 lies exactly between to float16 values, we round to the one with - * an even mantissa. - * - * This rounding behavior has several benefits: - * - It has no sign bias. - * - * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's - * GPU ISA. - * - * - By reproducing the behavior of the GPU (at least on Intel hardware), - * compile-time evaluation of constant packHalf2x16 GLSL expressions will - * result in the same value as if the expression were executed on the GPU. - */ -GLhalfARB -_mesa_float_to_half(float val) -{ - const fi_type fi = {val}; - const int flt_m = fi.i & 0x7fffff; - const int flt_e = (fi.i >> 23) & 0xff; - const int flt_s = (fi.i >> 31) & 0x1; - int s, e, m = 0; - GLhalfARB result; - - /* sign bit */ - s = flt_s; - - /* handle special cases */ - if ((flt_e == 0) && (flt_m == 0)) { - /* zero */ - /* m = 0; - already set */ - e = 0; - } - else if ((flt_e == 0) && (flt_m != 0)) { - /* denorm -- denorm float maps to 0 half */ - /* m = 0; - already set */ - e = 0; - } - else if ((flt_e == 0xff) && (flt_m == 0)) { - /* infinity */ - /* m = 0; - already set */ - e = 31; - } - else if ((flt_e == 0xff) && (flt_m != 0)) { - /* NaN */ - m = 1; - e = 31; - } - else { - /* regular number */ - const int new_exp = flt_e - 127; - if (new_exp < -14) { - /* The float32 lies in the range (0.0, min_normal16) and is rounded - * to a nearby float16 value. The result will be either zero, subnormal, - * or normal. - */ - e = 0; - m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f)); - } - else if (new_exp > 15) { - /* map this value to infinity */ - /* m = 0; - already set */ - e = 31; - } - else { - /* The float32 lies in the range - * [min_normal16, max_normal16 + max_step16) - * and is rounded to a nearby float16 value. The result will be - * either normal or infinite. - */ - e = new_exp + 15; - m = _mesa_lroundevenf(flt_m / (float) (1 << 13)); - } - } - - assert(0 <= m && m <= 1024); - if (m == 1024) { - /* The float32 was rounded upwards into the range of the next exponent, - * so bump the exponent. This correctly handles the case where f32 - * should be rounded up to float16 infinity. - */ - ++e; - m = 0; - } - - result = (s << 15) | (e << 10) | m; - return result; -} - - -/** - * Convert a 2-byte half float to a 4-byte float. - * Based on code from: - * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html - */ -float -_mesa_half_to_float(GLhalfARB val) -{ - /* XXX could also use a 64K-entry lookup table */ - const int m = val & 0x3ff; - const int e = (val >> 10) & 0x1f; - const int s = (val >> 15) & 0x1; - int flt_m, flt_e, flt_s; - fi_type fi; - float result; - - /* sign bit */ - flt_s = s; - - /* handle special cases */ - if ((e == 0) && (m == 0)) { - /* zero */ - flt_m = 0; - flt_e = 0; - } - else if ((e == 0) && (m != 0)) { - /* denorm -- denorm half will fit in non-denorm single */ - const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */ - float mantissa = ((float) (m)) / 1024.0f; - float sign = s ? -1.0f : 1.0f; - return sign * mantissa * half_denorm; - } - else if ((e == 31) && (m == 0)) { - /* infinity */ - flt_e = 0xff; - flt_m = 0; - } - else if ((e == 31) && (m != 0)) { - /* NaN */ - flt_e = 0xff; - flt_m = 1; - } - else { - /* regular */ - flt_e = e + 112; - flt_m = m << 13; - } - - fi.i = (flt_s << 31) | (flt_e << 23) | flt_m; - result = fi.f; - return result; -} - /*@}*/ diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 90247587be3..042147fd8bb 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -396,13 +396,6 @@ _mesa_flsll(uint64_t n) #endif } - -extern GLhalfARB -_mesa_float_to_half(float f); - -extern float -_mesa_half_to_float(GLhalfARB h); - static inline bool _mesa_half_is_negative(GLhalfARB h) { diff --git a/src/mesa/main/matrix.c b/src/mesa/main/matrix.c index 2b8016a4a72..5ff5ac5bfe1 100644 --- a/src/mesa/main/matrix.c +++ b/src/mesa/main/matrix.c @@ -151,7 +151,6 @@ _mesa_MatrixMode( GLenum mode ) if (ctx->Transform.MatrixMode == mode && mode != GL_TEXTURE) return; - FLUSH_VERTICES(ctx, _NEW_TRANSFORM); switch (mode) { case GL_MODELVIEW: diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index ab16c2854a8..50469956c6e 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -37,6 +37,7 @@ #include "texstore.h" #include "image.h" #include "macros.h" +#include "util/half_float.h" #include "../../gallium/auxiliary/util/u_format_rgb9e5.h" #include "../../gallium/auxiliary/util/u_format_r11g11b10f.h" diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index cbfb15522f0..e57b98a412d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -42,7 +42,7 @@ #include "main/config.h" #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ -#include "glsl/shader_enums.h" +#include "glsl/nir/shader_enums.h" #include "main/formats.h" /* MESA_FORMAT_COUNT */ @@ -94,11 +94,6 @@ struct vbo_context; #define PRIM_OUTSIDE_BEGIN_END (PRIM_MAX + 1) #define PRIM_UNKNOWN (PRIM_MAX + 2) -#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING) -#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX) -#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING) -#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) - /** * Determine if the given gl_varying_slot appears in the fragment shader. */ @@ -487,26 +482,24 @@ struct gl_colorbuffer_attrib struct gl_current_attrib { /** - * \name Current vertex attributes. + * \name Current vertex attributes (color, texcoords, etc). * \note Values are valid only after FLUSH_VERTICES has been called. * \note Index and Edgeflag current values are stored as floats in the * SIX and SEVEN attribute slots. + * \note We need double storage for 64-bit vertex attributes */ - /* we need double storage for this for vertex attrib 64bit */ - GLfloat Attrib[VERT_ATTRIB_MAX][4*2]; /**< Position, color, texcoords, etc */ + GLfloat Attrib[VERT_ATTRIB_MAX][4*2]; /** - * \name Current raster position attributes (always valid). - * \note This set of attributes is very similar to the SWvertex struct. + * \name Current raster position attributes (always up to date after a + * glRasterPos call). */ - /*@{*/ GLfloat RasterPos[4]; GLfloat RasterDistance; GLfloat RasterColor[4]; GLfloat RasterSecondaryColor[4]; GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4]; GLboolean RasterPosValid; - /*@}*/ }; @@ -1866,24 +1859,6 @@ typedef enum /** - * \brief Layout qualifiers for gl_FragDepth. - * - * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with - * a layout qualifier. - * - * \see enum ir_depth_layout - */ -enum gl_frag_depth_layout -{ - FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ - FRAG_DEPTH_LAYOUT_ANY, - FRAG_DEPTH_LAYOUT_GREATER, - FRAG_DEPTH_LAYOUT_LESS, - FRAG_DEPTH_LAYOUT_UNCHANGED -}; - - -/** * Base class for any kind of program object */ struct gl_program @@ -2286,12 +2261,34 @@ struct gl_shader unsigned num_combined_uniform_components; /** - * This shader's uniform block information. + * This shader's uniform/ssbo block information. * * These fields are only set post-linking. + * + * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is + * useful during the linking process so that we don't have to handle SSBOs + * specifically. + * + * UniformBlocks is a list of UBOs. This is useful for backends that need + * or prefer to see separate index spaces for UBOS and SSBOs like the GL + * API specifies. + * + * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that + * need or prefer to see separate index spaces for UBOS and SSBOs like the + * GL API specifies. + * + * UniformBlocks and ShaderStorageBlocks only have pointers into + * BufferInterfaceBlocks so the actual resource information is not + * duplicated. */ + unsigned NumBufferInterfaceBlocks; + struct gl_uniform_block *BufferInterfaceBlocks; + unsigned NumUniformBlocks; - struct gl_uniform_block *UniformBlocks; + struct gl_uniform_block **UniformBlocks; + + unsigned NumShaderStorageBlocks; + struct gl_uniform_block **ShaderStorageBlocks; struct exec_list *ir; struct exec_list *packed_varyings; @@ -2694,8 +2691,33 @@ struct gl_shader_program */ unsigned LastClipDistanceArraySize; + /** + * This shader's uniform/ssbo block information. + * + * BufferInterfaceBlocks is a list containing both UBOs and SSBOs. This is + * useful during the linking process so that we don't have to handle SSBOs + * specifically. + * + * UniformBlocks is a list of UBOs. This is useful for backends that need + * or prefer to see separate index spaces for UBOS and SSBOs like the GL + * API specifies. + * + * ShaderStorageBlocks is a list of SSBOs. This is useful for backends that + * need or prefer to see separate index spaces for UBOS and SSBOs like the + * GL API specifies. + * + * UniformBlocks and ShaderStorageBlocks only have pointers into + * BufferInterfaceBlocks so the actual resource information is not + * duplicated and are only set after linking. + */ unsigned NumBufferInterfaceBlocks; - struct gl_uniform_block *UniformBlocks; + struct gl_uniform_block *BufferInterfaceBlocks; + + unsigned NumUniformBlocks; + struct gl_uniform_block **UniformBlocks; + + unsigned NumShaderStorageBlocks; + struct gl_uniform_block **ShaderStorageBlocks; /** * Indices into the _LinkedShaders's UniformBlocks[] array for each stage @@ -4076,13 +4098,6 @@ struct gl_image_unit GLboolean Layered; /** - * GL_TRUE if the state of this image unit is valid and access from - * the shader is allowed. Otherwise loads from this unit should - * return zero and stores should have no effect. - */ - GLboolean _Valid; - - /** * Layer of the texture object bound to this unit as specified by the * application. */ diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 00e31b05c99..89faf515443 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -1073,6 +1073,21 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, } } break; + case GL_UNSIGNED_INT_24_8: + { + const GLdouble scale = (GLdouble) 0xffffff; + GLuint *dst = (GLuint *) dest; + GLuint i; + for (i = 0; i < n; i++) { + GLuint z = (GLuint) (depthSpan[i] * scale); + assert(z <= 0xffffff); + dst[i] = (z << 8); + } + if (dstPacking->SwapBytes) { + _mesa_swap4( (GLuint *) dst, n ); + } + break; + } case GL_UNSIGNED_INT: { GLuint *dst = (GLuint *) dest; diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 6d73e3bdcf2..8182d3dcc04 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -485,8 +485,14 @@ _mesa_program_resource_array_size(struct gl_program_resource *res) case GL_COMPUTE_SUBROUTINE_UNIFORM: case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: - case GL_BUFFER_VARIABLE: return RESOURCE_UNI(res)->array_elements; + case GL_BUFFER_VARIABLE: + /* Unsized arrays */ + if (RESOURCE_UNI(res)->array_stride > 0 && + RESOURCE_UNI(res)->array_elements == 0) + return 1; + else + return RESOURCE_UNI(res)->array_elements; case GL_VERTEX_SUBROUTINE: case GL_GEOMETRY_SUBROUTINE: case GL_FRAGMENT_SUBROUTINE: @@ -833,193 +839,6 @@ program_resource_location(struct gl_shader_program *shProg, } } -static char* -get_top_level_name(const char *name) -{ - const char *first_dot = strchr(name, '.'); - const char *first_square_bracket = strchr(name, '['); - int name_size = 0; - /* From ARB_program_interface_query spec: - * - * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the - * number of active array elements of the top-level shader storage block - * member containing to the active variable is written to <params>. If the - * top-level block member is not declared as an array, the value one is - * written to <params>. If the top-level block member is an array with no - * declared size, the value zero is written to <params>. - */ - - /* The buffer variable is on top level.*/ - if (!first_square_bracket && !first_dot) - name_size = strlen(name); - else if ((!first_square_bracket || - (first_dot && first_dot < first_square_bracket))) - name_size = first_dot - name; - else - name_size = first_square_bracket - name; - - return strndup(name, name_size); -} - -static char* -get_var_name(const char *name) -{ - const char *first_dot = strchr(name, '.'); - - if (!first_dot) - return strdup(name); - - return strndup(first_dot+1, strlen(first_dot) - 1); -} - -static GLint -program_resource_top_level_array_size(struct gl_shader_program *shProg, - struct gl_program_resource *res, - const char *name) -{ - int block_index = RESOURCE_UNI(res)->block_index; - int array_size = -1; - char *var_name = get_top_level_name(name); - char *interface_name = - get_top_level_name(shProg->UniformBlocks[block_index].Name); - - if (strcmp(var_name, interface_name) == 0) { - /* Deal with instanced array of SSBOs */ - char *temp_name = get_var_name(name); - free(var_name); - var_name = get_top_level_name(temp_name); - free(temp_name); - } - - for (unsigned i = 0; i < shProg->NumShaders; i++) { - if (shProg->Shaders[i] == NULL) - continue; - - const gl_shader *stage = shProg->Shaders[i]; - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || - var->data.mode != ir_var_shader_storage) - continue; - - const glsl_type *interface = var->get_interface_type(); - - if (strcmp(interface_name, interface->name) != 0) - continue; - - for (unsigned i = 0; i < interface->length; i++) { - const glsl_struct_field *field = &interface->fields.structure[i]; - if (strcmp(field->name, var_name) != 0) - continue; - /* From GL_ARB_program_interface_query spec: - * - * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer - * identifying the number of active array elements of the top-level - * shader storage block member containing to the active variable is - * written to <params>. If the top-level block member is not - * declared as an array, the value one is written to <params>. If - * the top-level block member is an array with no declared size, - * the value zero is written to <params>. - */ - if (field->type->is_unsized_array()) - array_size = 0; - else if (field->type->is_array()) - array_size = field->type->length; - else - array_size = 1; - goto found_top_level_array_size; - } - } - } -found_top_level_array_size: - free(interface_name); - free(var_name); - return array_size; -} - -static GLint -program_resource_top_level_array_stride(struct gl_shader_program *shProg, - struct gl_program_resource *res, - const char *name) -{ - int block_index = RESOURCE_UNI(res)->block_index; - int array_stride = -1; - char *var_name = get_top_level_name(name); - char *interface_name = - get_top_level_name(shProg->UniformBlocks[block_index].Name); - - if (strcmp(var_name, interface_name) == 0) { - /* Deal with instanced array of SSBOs */ - char *temp_name = get_var_name(name); - free(var_name); - var_name = get_top_level_name(temp_name); - free(temp_name); - } - - for (unsigned i = 0; i < shProg->NumShaders; i++) { - if (shProg->Shaders[i] == NULL) - continue; - - const gl_shader *stage = shProg->Shaders[i]; - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || - var->data.mode != ir_var_shader_storage) - continue; - - const glsl_type *interface = var->get_interface_type(); - - if (strcmp(interface_name, interface->name) != 0) { - continue; - } - - for (unsigned i = 0; i < interface->length; i++) { - const glsl_struct_field *field = &interface->fields.structure[i]; - if (strcmp(field->name, var_name) != 0) - continue; - /* From GL_ARB_program_interface_query: - * - * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer - * identifying the stride between array elements of the top-level - * shader storage block member containing the active variable is - * written to <params>. For top-level block members declared as - * arrays, the value written is the difference, in basic machine - * units, between the offsets of the active variable for - * consecutive elements in the top-level array. For top-level - * block members not declared as an array, zero is written to - * <params>." - */ - if (field->type->is_array()) { - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(field->matrix_layout); - bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; - const glsl_type *array_type = field->type->fields.array; - - if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { - if (array_type->is_record() || array_type->is_array()) { - array_stride = array_type->std140_size(row_major); - array_stride = glsl_align(array_stride, 16); - } else { - unsigned element_base_align = 0; - element_base_align = array_type->std140_base_alignment(row_major); - array_stride = MAX2(element_base_align, 16); - } - } else { - array_stride = array_type->std430_array_stride(row_major); - } - } else { - array_stride = 0; - } - goto found_top_level_array_size; - } - } - } -found_top_level_array_size: - free(interface_name); - free(var_name); - return array_stride; -} - /** * Function implements following location queries: * glGetUniformLocation @@ -1133,7 +952,8 @@ get_buffer_property(struct gl_shader_program *shProg, (*val)++; } return 1; - case GL_ACTIVE_VARIABLES: + case GL_ACTIVE_VARIABLES: { + unsigned num_values = 0; for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) { const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName; struct gl_program_resource *uni = @@ -1143,8 +963,10 @@ get_buffer_property(struct gl_shader_program *shProg, continue; *val++ = _mesa_program_resource_index(shProg, uni); + num_values++; } - return RESOURCE_UBO(res)->NumUniforms; + return num_values; + } } } else if (res->Type == GL_SHADER_STORAGE_BLOCK) { switch (prop) { @@ -1166,7 +988,8 @@ get_buffer_property(struct gl_shader_program *shProg, (*val)++; } return 1; - case GL_ACTIVE_VARIABLES: + case GL_ACTIVE_VARIABLES: { + unsigned num_values = 0; for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) { const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName; struct gl_program_resource *uni = @@ -1176,8 +999,10 @@ get_buffer_property(struct gl_shader_program *shProg, continue; *val++ = _mesa_program_resource_index(shProg, uni); + num_values++; } - return RESOURCE_UBO(res)->NumUniforms; + return num_values; + } } } else if (res->Type == GL_ATOMIC_COUNTER_BUFFER) { switch (prop) { @@ -1251,8 +1076,15 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, switch (res->Type) { case GL_UNIFORM: case GL_BUFFER_VARIABLE: + /* Test if a buffer variable is an array or an unsized array. + * Unsized arrays return zero as array size. + */ + if (RESOURCE_UNI(res)->is_shader_storage && + RESOURCE_UNI(res)->array_stride > 0) + *val = RESOURCE_UNI(res)->array_elements; + else *val = MAX2(RESOURCE_UNI(res)->array_elements, 1); - return 1; + return 1; case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: *val = MAX2(_mesa_program_resource_array_size(res), 1); @@ -1374,14 +1206,12 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, case GL_TOP_LEVEL_ARRAY_SIZE: VALIDATE_TYPE(GL_BUFFER_VARIABLE); - *val = program_resource_top_level_array_size(shProg, res, - _mesa_program_resource_name(res)); + *val = RESOURCE_UNI(res)->top_level_array_size; return 1; case GL_TOP_LEVEL_ARRAY_STRIDE: VALIDATE_TYPE(GL_BUFFER_VARIABLE); - *val = program_resource_top_level_array_stride(shProg, res, - _mesa_program_resource_name(res)); + *val = RESOURCE_UNI(res)->top_level_array_stride; return 1; /* GL_ARB_tessellation_shader */ diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 9dd1054c8ee..18e463d4ccc 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -713,10 +713,10 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, if (!has_ubo) break; - for (i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + for (i = 0; i < shProg->NumUniformBlocks; i++) { /* Add one for the terminating NUL character. */ - const GLint len = strlen(shProg->UniformBlocks[i].Name) + 1; + const GLint len = strlen(shProg->UniformBlocks[i]->Name) + 1; if (len > max_len) max_len = len; @@ -729,11 +729,7 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, if (!has_ubo) break; - *params = 0; - for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - if (!shProg->UniformBlocks[i].IsShaderStorage) - (*params)++; - } + *params = shProg->NumUniformBlocks; return; case GL_PROGRAM_BINARY_RETRIEVABLE_HINT: /* This enum isn't part of the OES extension for OpenGL ES 2.0. It is diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index bd4b7c7be3b..c4ebf4201fb 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -415,8 +415,8 @@ _mesa_init_image_units(struct gl_context *ctx) ctx->ImageUnits[i] = _mesa_default_image_unit(ctx); } -static GLboolean -validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u) +GLboolean +_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u) { struct gl_texture_object *t = u->TexObj; mesa_format tex_format; @@ -424,7 +424,8 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u) if (!t) return GL_FALSE; - _mesa_test_texobj_completeness(ctx, t); + if (!t->_BaseComplete && !t->_MipmapComplete) + _mesa_test_texobj_completeness(ctx, t); if (u->Level < t->BaseLevel || u->Level > t->_MaxLevel || @@ -473,17 +474,6 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u) return GL_TRUE; } -void -_mesa_validate_image_units(struct gl_context *ctx) -{ - unsigned i; - - for (i = 0; i < ctx->Const.MaxImageUnits; ++i) { - struct gl_image_unit *u = &ctx->ImageUnits[i]; - u->_Valid = validate_image_unit(ctx, u); - } -} - static GLboolean validate_bind_image_texture(struct gl_context *ctx, GLuint unit, GLuint texture, GLint level, GLboolean layered, @@ -567,7 +557,6 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level, u->Access = access; u->Format = format; u->_ActualFormat = _mesa_get_shader_image_format(format); - u->_Valid = validate_image_unit(ctx, u); if (u->TexObj && _mesa_tex_target_is_layered(u->TexObj->Target)) { u->Layered = layered; @@ -703,7 +692,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) u->Access = GL_READ_WRITE; u->Format = tex_format; u->_ActualFormat = _mesa_get_shader_image_format(tex_format); - u->_Valid = validate_image_unit(ctx, u); } else { /* Unbind the texture from the unit */ _mesa_reference_texobj(&u->TexObj, NULL); @@ -713,7 +701,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) u->Access = GL_READ_ONLY; u->Format = GL_R8; u->_ActualFormat = MESA_FORMAT_R_UNORM8; - u->_Valid = GL_FALSE; } } diff --git a/src/mesa/main/shaderimage.h b/src/mesa/main/shaderimage.h index bbe088a2459..94ee814a716 100644 --- a/src/mesa/main/shaderimage.h +++ b/src/mesa/main/shaderimage.h @@ -55,13 +55,15 @@ void _mesa_init_image_units(struct gl_context *ctx); /** - * Recalculate the \c _Valid flag of a context's shader image units. + * Return GL_TRUE if the state of the image unit passed as argument is valid + * and access from the shader is allowed. Otherwise loads from this unit + * should return zero and stores should have no effect. * - * To be called when the state of any texture bound to an image unit - * changes. + * The result depends on context state other than the passed image unit, part + * of the _NEW_TEXTURE set. */ -void -_mesa_validate_image_units(struct gl_context *ctx); +GLboolean +_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u); void GLAPIENTRY _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level, diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 4e85fda24b4..ffc71931fec 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -290,8 +290,8 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) ralloc_free(shProg->InfoLog); shProg->InfoLog = ralloc_strdup(shProg, ""); - ralloc_free(shProg->UniformBlocks); - shProg->UniformBlocks = NULL; + ralloc_free(shProg->BufferInterfaceBlocks); + shProg->BufferInterfaceBlocks = NULL; shProg->NumBufferInterfaceBlocks = 0; for (i = 0; i < MESA_SHADER_STAGES; i++) { ralloc_free(shProg->UniformBlockStageIndex[i]); diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index 1acaf59f432..c37b31d1753 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -107,6 +107,11 @@ _mesa_alloc_shared_state(struct gl_context *ctx) }; STATIC_ASSERT(ARRAY_SIZE(targets) == NUM_TEXTURE_TARGETS); shared->DefaultTex[i] = ctx->Driver.NewTextureObject(ctx, 0, targets[i]); + /* Need to explicitly set/overwrite the TargetIndex field here since + * the call to _mesa_tex_target_to_index() in NewTextureObject() may + * fail if the texture target is not supported. + */ + shared->DefaultTex[i]->TargetIndex = i; } /* sanity check */ diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index d3b1c72b08d..4043c4f2057 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -391,8 +391,12 @@ _mesa_update_state_locked( struct gl_context *ctx ) GLbitfield new_state = ctx->NewState; GLbitfield prog_flags = _NEW_PROGRAM; GLbitfield new_prog_state = 0x0; + const GLbitfield computed_states = ~(_NEW_CURRENT_ATTRIB | _NEW_LINE); - if (new_state == _NEW_CURRENT_ATTRIB) + /* we can skip a bunch of state validation checks if the dirty + * state matches one or more bits in 'computed_states'. + */ + if ((new_state & computed_states) == 0) goto out; if (MESA_VERBOSE & VERBOSE_STATE) diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c index f0f6553a01b..26e59158007 100644 --- a/src/mesa/main/texcompress_bptc.c +++ b/src/mesa/main/texcompress_bptc.c @@ -30,6 +30,7 @@ #include "texcompress.h" #include "texcompress_bptc.h" #include "util/format_srgb.h" +#include "util/half_float.h" #include "texstore.h" #include "macros.h" #include "image.h" diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 173e43c817c..547055ecf39 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -286,6 +286,12 @@ _mesa_initialize_texture_object( struct gl_context *ctx, obj->RefCount = 1; obj->Name = name; obj->Target = target; + if (target != 0) { + obj->TargetIndex = _mesa_tex_target_to_index(ctx, target); + } + else { + obj->TargetIndex = NUM_TEXTURE_TARGETS; /* invalid/error value */ + } obj->Priority = 1.0F; obj->BaseLevel = 0; obj->MaxLevel = 1000; @@ -340,6 +346,10 @@ finish_texture_init(struct gl_context *ctx, GLenum target, GLenum filter = GL_LINEAR; assert(obj->Target == 0); + obj->Target = target; + obj->TargetIndex = _mesa_tex_target_to_index(ctx, target); + assert(obj->TargetIndex < NUM_TEXTURE_TARGETS); + switch (target) { case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: @@ -1185,46 +1195,26 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture, return t; } -/** - * Wrapper for the driver function. Need this because _mesa_new_texture_object - * permits a target of 0 and does not initialize targetIndex. - */ -struct gl_texture_object * -_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target) -{ - struct gl_texture_object *texObj = NULL; - GLint targetIndex; - - if (target == 0) - return texObj; - - texObj = ctx->Driver.NewTextureObject(ctx, 0, target); - targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target); - assert(targetIndex < NUM_TEXTURE_TARGETS); - texObj->TargetIndex = targetIndex; - - return texObj; -} /** * Helper function for glCreateTextures and glGenTextures. Need this because * glCreateTextures should throw errors if target = 0. This is not exposed to * the rest of Mesa to encourage Mesa internals to use nameless textures, * which do not require expensive hash lookups. + * \param target either 0 or a a valid / error-checked texture target enum */ static void create_textures(struct gl_context *ctx, GLenum target, - GLsizei n, GLuint *textures, bool dsa) + GLsizei n, GLuint *textures, const char *caller) { GLuint first; GLint i; - const char *func = dsa ? "Create" : "Gen"; if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "gl%sTextures %d\n", func, n); + _mesa_debug(ctx, "%s %d\n", caller, n); if (n < 0) { - _mesa_error( ctx, GL_INVALID_VALUE, "gl%sTextures(n < 0)", func ); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", caller); return; } @@ -1241,28 +1231,14 @@ create_textures(struct gl_context *ctx, GLenum target, /* Allocate new, empty texture objects */ for (i = 0; i < n; i++) { struct gl_texture_object *texObj; - GLint targetIndex; GLuint name = first + i; texObj = ctx->Driver.NewTextureObject(ctx, name, target); if (!texObj) { mtx_unlock(&ctx->Shared->Mutex); - _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", func); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", caller); return; } - /* Initialize the target index if target is non-zero. */ - if (target != 0) { - targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target); - if (targetIndex < 0) { /* Bad Target */ - mtx_unlock(&ctx->Shared->Mutex); - _mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)", - func, _mesa_enum_to_string(texObj->Target)); - return; - } - assert(targetIndex < NUM_TEXTURE_TARGETS); - texObj->TargetIndex = targetIndex; - } - /* insert into hash table */ _mesa_HashInsert(ctx->Shared->TexObjects, texObj->Name, texObj); @@ -1296,7 +1272,7 @@ void GLAPIENTRY _mesa_GenTextures(GLsizei n, GLuint *textures) { GET_CURRENT_CONTEXT(ctx); - create_textures(ctx, 0, n, textures, false); + create_textures(ctx, 0, n, textures, "glGenTextures"); } /** @@ -1329,7 +1305,7 @@ _mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures) return; } - create_textures(ctx, target, n, textures, true); + create_textures(ctx, target, n, textures, "glCreateTextures"); } /** @@ -1383,8 +1359,12 @@ unbind_texobj_from_texunits(struct gl_context *ctx, const gl_texture_index index = texObj->TargetIndex; GLuint u; - if (texObj->Target == 0) + if (texObj->Target == 0) { + /* texture was never bound */ return; + } + + assert(index < NUM_TEXTURE_TARGETS); for (u = 0; u < ctx->Texture.NumCurrentTexUsed; u++) { struct gl_texture_unit *unit = &ctx->Texture.Unit[u]; @@ -1752,10 +1732,11 @@ _mesa_BindTexture( GLenum target, GLuint texName ) _mesa_HashInsert(ctx->Shared->TexObjects, texName, newTexObj); mtx_unlock(&ctx->Shared->Mutex); } - newTexObj->Target = target; - newTexObj->TargetIndex = targetIndex; } + assert(newTexObj->Target == target); + assert(newTexObj->TargetIndex == targetIndex); + bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj); } @@ -1778,19 +1759,12 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) { GET_CURRENT_CONTEXT(ctx); struct gl_texture_object *texObj; - struct gl_texture_unit *texUnit; if (unit >= _mesa_max_tex_unit(ctx)) { _mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit); return; } - texUnit = _mesa_get_tex_unit(ctx, unit); - assert(texUnit); - if (!texUnit) { - return; - } - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glBindTextureUnit %s %d\n", _mesa_enum_to_string(GL_TEXTURE0+unit), (GLint) texture); @@ -1812,7 +1786,7 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) /* Error checking */ if (!texObj) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBindTextureUnit(non-gen name)"); + "glBindTextureUnit(non-gen name)"); return; } if (texObj->Target == 0) { diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index 690878c85fc..8421337de4d 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -202,9 +202,6 @@ _mesa_unlock_context_textures( struct gl_context *ctx ); extern void _mesa_lock_context_textures( struct gl_context *ctx ); -extern struct gl_texture_object * -_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target); - extern void _mesa_delete_nameless_texture(struct gl_context *ctx, struct gl_texture_object *texObj); diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 9b5928c4306..cb147fac476 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -34,7 +34,6 @@ #include "context.h" #include "enums.h" #include "macros.h" -#include "shaderimage.h" #include "texobj.h" #include "teximage.h" #include "texstate.h" @@ -741,8 +740,6 @@ update_texture_state( struct gl_context *ctx ) if (!prog[MESA_SHADER_FRAGMENT] || !prog[MESA_SHADER_VERTEX]) update_texgen(ctx); - - _mesa_validate_image_units(ctx); } diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index 5a3282a40c1..04b7d73da5c 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -681,6 +681,8 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, texObj->Immutable = GL_TRUE; texObj->ImmutableLevels = origTexObj->ImmutableLevels; texObj->Target = target; + texObj->TargetIndex = _mesa_tex_target_to_index(ctx, target); + assert(texObj->TargetIndex < NUM_TEXTURE_TARGETS); if (ctx->Driver.TextureView != NULL && !ctx->Driver.TextureView(ctx, texObj, origTexObj)) { diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index d48729778ae..083087d6baa 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -318,19 +318,12 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, return; } - if ((uni->type->base_type == GLSL_TYPE_DOUBLE && - returnType != GLSL_TYPE_DOUBLE) || - (uni->type->base_type != GLSL_TYPE_DOUBLE && - returnType == GLSL_TYPE_DOUBLE)) { - _mesa_error( ctx, GL_INVALID_OPERATION, - "glGetnUniform*vARB(incompatible uniform types)"); - return; - } { unsigned elements = (uni->type->is_sampler()) ? 1 : uni->type->components(); const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1; + const int rmul = returnType == GLSL_TYPE_DOUBLE ? 2 : 1; /* Calculate the source base address *BEFORE* modifying elements to * account for the size of the user's buffer. @@ -342,7 +335,7 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE); /* doubles have a different size than the other 3 types */ - unsigned bytes = sizeof(src[0]) * elements * dmul; + unsigned bytes = sizeof(src[0]) * elements * rmul; if (bufSize < 0 || bytes > (unsigned) bufSize) { _mesa_error( ctx, GL_INVALID_OPERATION, "glGetnUniform*vARB(out of bounds: bufSize is %d," @@ -366,32 +359,57 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, } else { union gl_constant_value *const dst = (union gl_constant_value *) paramsOut; - /* This code could be optimized by putting the loop inside the switch * statements. However, this is not expected to be * performance-critical code. */ for (unsigned i = 0; i < elements; i++) { + int sidx = i * dmul; + int didx = i * rmul; + switch (returnType) { case GLSL_TYPE_FLOAT: switch (uni->type->base_type) { case GLSL_TYPE_UINT: - dst[i].f = (float) src[i].u; + dst[didx].f = (float) src[sidx].u; break; case GLSL_TYPE_INT: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: - dst[i].f = (float) src[i].i; + dst[didx].f = (float) src[sidx].i; break; case GLSL_TYPE_BOOL: - dst[i].f = src[i].i ? 1.0f : 0.0f; + dst[didx].f = src[sidx].i ? 1.0f : 0.0f; + break; + case GLSL_TYPE_DOUBLE: + dst[didx].f = *(double *)&src[sidx].f; + break; + default: + assert(!"Should not get here."); + break; + } + break; + case GLSL_TYPE_DOUBLE: + switch (uni->type->base_type) { + case GLSL_TYPE_UINT: + *(double *)&dst[didx].f = (double) src[sidx].u; + break; + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + *(double *)&dst[didx].f = (double) src[sidx].i; + break; + case GLSL_TYPE_BOOL: + *(double *)&dst[didx].f = src[sidx].i ? 1.0f : 0.0f; + break; + case GLSL_TYPE_FLOAT: + *(double *)&dst[didx].f = (double) src[sidx].f; break; default: assert(!"Should not get here."); break; } break; - case GLSL_TYPE_INT: case GLSL_TYPE_UINT: switch (uni->type->base_type) { @@ -413,10 +431,13 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location, * a floating-point value is rounded to the * nearest integer..." */ - dst[i].i = IROUND(src[i].f); + dst[didx].i = IROUND(src[sidx].f); break; case GLSL_TYPE_BOOL: - dst[i].i = src[i].i ? 1 : 0; + dst[didx].i = src[sidx].i ? 1 : 0; + break; + case GLSL_TYPE_DOUBLE: + dst[didx].i = *(double *)&src[sidx].f; break; default: assert(!"Should not get here."); diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 04cc81f9809..bc235380d97 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -1016,21 +1016,21 @@ _mesa_UniformBlockBinding(GLuint program, return; } - if (shProg->UniformBlocks[uniformBlockIndex].Binding != + if (shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding != uniformBlockBinding) { int i; FLUSH_VERTICES(ctx, 0); ctx->NewDriverState |= ctx->DriverFlags.NewUniformBuffer; - shProg->UniformBlocks[uniformBlockIndex].Binding = uniformBlockBinding; + shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding = uniformBlockBinding; for (i = 0; i < MESA_SHADER_STAGES; i++) { int stage_index = shProg->UniformBlockStageIndex[i][uniformBlockIndex]; if (stage_index != -1) { struct gl_shader *sh = shProg->_LinkedShaders[i]; - sh->UniformBlocks[stage_index].Binding = uniformBlockBinding; + sh->BufferInterfaceBlocks[stage_index].Binding = uniformBlockBinding; } } } @@ -1069,21 +1069,21 @@ _mesa_ShaderStorageBlockBinding(GLuint program, return; } - if (shProg->UniformBlocks[shaderStorageBlockIndex].Binding != + if (shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding != shaderStorageBlockBinding) { int i; FLUSH_VERTICES(ctx, 0); ctx->NewDriverState |= ctx->DriverFlags.NewShaderStorageBuffer; - shProg->UniformBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding; + shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding; for (i = 0; i < MESA_SHADER_STAGES; i++) { int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex]; if (stage_index != -1) { struct gl_shader *sh = shProg->_LinkedShaders[i]; - sh->UniformBlocks[stage_index].Binding = shaderStorageBlockBinding; + sh->BufferInterfaceBlocks[stage_index].Binding = shaderStorageBlockBinding; } } } diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index bec035cdc97..2f88b65043d 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -27,7 +27,7 @@ #define UNIFORMS_H #include "main/glheader.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir_uniform.h" #include "program/prog_parameter.h" diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 498b2f867d0..5635a643200 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -24,6 +24,7 @@ #include <stdio.h> +#include "context.h" #include "imports.h" #include "mtypes.h" #include "version.h" @@ -181,7 +182,23 @@ _mesa_override_gl_version(struct gl_context *ctx) { if (_mesa_override_gl_version_contextless(&ctx->Const, &ctx->API, &ctx->Version)) { - create_version_string(ctx, ""); + /* We need to include API in version string for OpenGL ES, otherwise + * application can not detect GLES via glGetString(GL_VERSION) query. + * + * From OpenGL ES 3.2 spec, Page 436: + * + * "The VERSION string is laid out as follows: + * + * OpenGL ES N.M vendor-specific information" + * + * From OpenGL 4.5 spec, Page 538: + * + * "The VERSION and SHADING_LANGUAGE_VERSION strings are laid out as + * follows: + * + * <version number><space><vendor-specific information>" + */ + create_version_string(ctx, _mesa_is_gles(ctx) ? "OpenGL ES " : ""); } } diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index ccb0fa5f32b..cc67f8aeadd 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -75,6 +75,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa \ $(MESA_TOP)/src/glsl \ + $(MESA_TOP)/src/glsl/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 0214b8e684c..1099d79d834 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -42,7 +42,7 @@ #include "glsl/ir_optimization.h" #include "glsl/ir_uniform.h" #include "glsl/glsl_parser_extras.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/linker.h" #include "glsl/program.h" #include "program/hash_table.h" diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index fc00534028f..539e3c05312 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -923,7 +923,7 @@ ptn_add_output_stores(struct ptn_compile *c) { nir_builder *b = &c->build; - foreach_list_typed(nir_variable, var, node, &b->shader->outputs) { + nir_foreach_variable(var, &b->shader->outputs) { nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); store->num_components = glsl_get_vector_elements(var->type); @@ -958,11 +958,10 @@ setup_registers_and_variables(struct ptn_compile *c) for (int i = 0; i < num_inputs; i++) { if (!(c->prog->InputsRead & BITFIELD64_BIT(i))) continue; - nir_variable *var = rzalloc(shader, nir_variable); - var->type = glsl_vec4_type(); - var->data.read_only = true; - var->data.mode = nir_var_shader_in; - var->name = ralloc_asprintf(var, "in_%d", i); + + nir_variable *var = + nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), + ralloc_asprintf(shader, "in_%d", i)); var->data.location = i; var->data.index = 0; @@ -992,12 +991,9 @@ setup_registers_and_variables(struct ptn_compile *c) nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0), nir_imm_float(b, 0.0), nir_imm_float(b, 1.0)); - nir_variable *fullvar = rzalloc(shader, nir_variable); - fullvar->type = glsl_vec4_type(); - fullvar->data.mode = nir_var_local; - fullvar->name = "fogcoord_tmp"; - exec_list_push_tail(&b->impl->locals, &fullvar->node); - + nir_variable *fullvar = + nir_local_variable_create(b->impl, glsl_vec4_type(), + "fogcoord_tmp"); nir_intrinsic_instr *store = nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); store->num_components = 4; @@ -1005,17 +1001,15 @@ setup_registers_and_variables(struct ptn_compile *c) store->src[0] = nir_src_for_ssa(f001); nir_builder_instr_insert(b, &store->instr); - /* Insert the real input into the list so the driver has real - * inputs, but set c->input_vars[i] to the temporary so we use + /* We inserted the real input into the list so the driver has real + * inputs, but we set c->input_vars[i] to the temporary so we use * the splatted value. */ - exec_list_push_tail(&shader->inputs, &var->node); c->input_vars[i] = fullvar; continue; } } - exec_list_push_tail(&shader->inputs, &var->node); c->input_vars[i] = var; } @@ -1135,6 +1129,12 @@ prog_to_nir(const struct gl_program *prog, s->info.uses_clip_distance_out = false; s->info.separate_shader = false; + if (stage == MESA_SHADER_FRAGMENT) { + struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; + + s->info.fs.uses_discard = fp->UsesKill; + } + fail: if (c->error) { ralloc_free(s); diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index e94c1021258..0e78e6ab25d 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -173,57 +173,15 @@ _mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string) /** - * Find the line number and column for 'pos' within 'string'. - * Return a copy of the line which contains 'pos'. Free the line with - * free(). - * \param string the program string - * \param pos the position within the string - * \param line returns the line number corresponding to 'pos'. - * \param col returns the column number corresponding to 'pos'. - * \return copy of the line containing 'pos'. - */ -const GLubyte * -_mesa_find_line_column(const GLubyte *string, const GLubyte *pos, - GLint *line, GLint *col) -{ - const GLubyte *lineStart = string; - const GLubyte *p = string; - GLubyte *s; - int len; - - *line = 1; - - while (p != pos) { - if (*p == (GLubyte) '\n') { - (*line)++; - lineStart = p + 1; - } - p++; - } - - *col = (pos - lineStart) + 1; - - /* return copy of this line */ - while (*p != 0 && *p != '\n') - p++; - len = p - lineStart; - s = malloc(len + 1); - memcpy(s, lineStart, len); - s[len] = 0; - - return s; -} - - -/** * Initialize a new gl_program object. */ -static void -init_program_struct(struct gl_program *prog, GLenum target, GLuint id) +struct gl_program * +_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id) { GLuint i; - assert(prog); + if (!prog) + return NULL; memset(prog, 0, sizeof(*prog)); mtx_init(&prog->Mutex, mtx_plain); @@ -235,102 +193,8 @@ init_program_struct(struct gl_program *prog, GLenum target, GLuint id) /* default mapping from samplers to texture units */ for (i = 0; i < MAX_SAMPLERS; i++) prog->SamplerUnits[i] = i; -} - - -/** - * Initialize a new fragment program object. - */ -struct gl_program * -_mesa_init_fragment_program(struct gl_context *ctx, - struct gl_fragment_program *prog, - GLenum target, GLuint id) -{ - if (prog) { - init_program_struct(&prog->Base, target, id); - return &prog->Base; - } - return NULL; -} - - -/** - * Initialize a new vertex program object. - */ -struct gl_program * -_mesa_init_vertex_program(struct gl_context *ctx, - struct gl_vertex_program *prog, - GLenum target, GLuint id) -{ - if (prog) { - init_program_struct(&prog->Base, target, id); - return &prog->Base; - } - return NULL; -} - - -/** - * Initialize a new compute program object. - */ -struct gl_program * -_mesa_init_compute_program(struct gl_context *ctx, - struct gl_compute_program *prog, - GLenum target, GLuint id) -{ - if (prog) { - init_program_struct(&prog->Base, target, id); - return &prog->Base; - } - return NULL; -} - - -/** - * Initialize a new tessellation control program object. - */ -struct gl_program * -_mesa_init_tess_ctrl_program(struct gl_context *ctx, - struct gl_tess_ctrl_program *prog, - GLenum target, GLuint id) -{ - if (prog) { - init_program_struct(&prog->Base, target, id); - return &prog->Base; - } - return NULL; -} - -/** - * Initialize a new tessellation evaluation program object. - */ -struct gl_program * -_mesa_init_tess_eval_program(struct gl_context *ctx, - struct gl_tess_eval_program *prog, - GLenum target, GLuint id) -{ - if (prog) { - init_program_struct(&prog->Base, target, id); - return &prog->Base; - } - return NULL; -} - - -/** - * Initialize a new geometry program object. - */ -struct gl_program * -_mesa_init_geometry_program(struct gl_context *ctx, - struct gl_geometry_program *prog, - GLenum target, GLuint id) -{ - if (prog) { - init_program_struct(&prog->Base, target, id); - return &prog->Base; - } - return NULL; + return prog; } @@ -349,43 +213,36 @@ _mesa_init_geometry_program(struct gl_context *ctx, struct gl_program * _mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id) { - struct gl_program *prog; switch (target) { - case GL_VERTEX_PROGRAM_ARB: /* == GL_VERTEX_PROGRAM_NV */ - prog = _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), - target, id ); - break; + case GL_VERTEX_PROGRAM_ARB: { /* == GL_VERTEX_PROGRAM_NV */ + struct gl_vertex_program *prog = CALLOC_STRUCT(gl_vertex_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } case GL_FRAGMENT_PROGRAM_NV: - case GL_FRAGMENT_PROGRAM_ARB: - prog =_mesa_init_fragment_program(ctx, - CALLOC_STRUCT(gl_fragment_program), - target, id ); - break; - case GL_GEOMETRY_PROGRAM_NV: - prog = _mesa_init_geometry_program(ctx, - CALLOC_STRUCT(gl_geometry_program), - target, id); - break; - case GL_TESS_CONTROL_PROGRAM_NV: - prog = _mesa_init_tess_ctrl_program(ctx, - CALLOC_STRUCT(gl_tess_ctrl_program), - target, id); - break; - case GL_TESS_EVALUATION_PROGRAM_NV: - prog = _mesa_init_tess_eval_program(ctx, - CALLOC_STRUCT(gl_tess_eval_program), - target, id); - break; - case GL_COMPUTE_PROGRAM_NV: - prog = _mesa_init_compute_program(ctx, - CALLOC_STRUCT(gl_compute_program), - target, id); - break; + case GL_FRAGMENT_PROGRAM_ARB: { + struct gl_fragment_program *prog = CALLOC_STRUCT(gl_fragment_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } + case GL_GEOMETRY_PROGRAM_NV: { + struct gl_geometry_program *prog = CALLOC_STRUCT(gl_geometry_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } + case GL_TESS_CONTROL_PROGRAM_NV: { + struct gl_tess_ctrl_program *prog = CALLOC_STRUCT(gl_tess_ctrl_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } + case GL_TESS_EVALUATION_PROGRAM_NV: { + struct gl_tess_eval_program *prog = CALLOC_STRUCT(gl_tess_eval_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } + case GL_COMPUTE_PROGRAM_NV: { + struct gl_compute_program *prog = CALLOC_STRUCT(gl_compute_program); + return _mesa_init_gl_program(&prog->Base, target, id); + } default: _mesa_problem(ctx, "bad target in _mesa_new_program"); - prog = NULL; + return NULL; } - return prog; } @@ -494,123 +351,6 @@ _mesa_reference_program_(struct gl_context *ctx, /** - * Return a copy of a program. - * XXX Problem here if the program object is actually OO-derivation - * made by a device driver. - */ -struct gl_program * -_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog) -{ - struct gl_program *clone; - - clone = ctx->Driver.NewProgram(ctx, prog->Target, prog->Id); - if (!clone) - return NULL; - - assert(clone->Target == prog->Target); - assert(clone->RefCount == 1); - - clone->String = (GLubyte *) strdup((char *) prog->String); - clone->Format = prog->Format; - clone->Instructions = _mesa_alloc_instructions(prog->NumInstructions); - if (!clone->Instructions) { - _mesa_reference_program(ctx, &clone, NULL); - return NULL; - } - _mesa_copy_instructions(clone->Instructions, prog->Instructions, - prog->NumInstructions); - clone->InputsRead = prog->InputsRead; - clone->OutputsWritten = prog->OutputsWritten; - clone->SamplersUsed = prog->SamplersUsed; - clone->ShadowSamplers = prog->ShadowSamplers; - memcpy(clone->TexturesUsed, prog->TexturesUsed, sizeof(prog->TexturesUsed)); - - if (prog->Parameters) - clone->Parameters = _mesa_clone_parameter_list(prog->Parameters); - if (prog->LocalParams) { - clone->LocalParams = malloc(MAX_PROGRAM_LOCAL_PARAMS * - sizeof(float[4])); - if (!clone->LocalParams) { - _mesa_reference_program(ctx, &clone, NULL); - return NULL; - } - memcpy(clone->LocalParams, prog->LocalParams, - MAX_PROGRAM_LOCAL_PARAMS * sizeof(float[4])); - } - clone->IndirectRegisterFiles = prog->IndirectRegisterFiles; - clone->NumInstructions = prog->NumInstructions; - clone->NumTemporaries = prog->NumTemporaries; - clone->NumParameters = prog->NumParameters; - clone->NumAttributes = prog->NumAttributes; - clone->NumAddressRegs = prog->NumAddressRegs; - clone->NumNativeInstructions = prog->NumNativeInstructions; - clone->NumNativeTemporaries = prog->NumNativeTemporaries; - clone->NumNativeParameters = prog->NumNativeParameters; - clone->NumNativeAttributes = prog->NumNativeAttributes; - clone->NumNativeAddressRegs = prog->NumNativeAddressRegs; - clone->NumAluInstructions = prog->NumAluInstructions; - clone->NumTexInstructions = prog->NumTexInstructions; - clone->NumTexIndirections = prog->NumTexIndirections; - clone->NumNativeAluInstructions = prog->NumNativeAluInstructions; - clone->NumNativeTexInstructions = prog->NumNativeTexInstructions; - clone->NumNativeTexIndirections = prog->NumNativeTexIndirections; - - switch (prog->Target) { - case GL_VERTEX_PROGRAM_ARB: - { - const struct gl_vertex_program *vp = gl_vertex_program_const(prog); - struct gl_vertex_program *vpc = gl_vertex_program(clone); - vpc->IsPositionInvariant = vp->IsPositionInvariant; - } - break; - case GL_FRAGMENT_PROGRAM_ARB: - { - const struct gl_fragment_program *fp = gl_fragment_program_const(prog); - struct gl_fragment_program *fpc = gl_fragment_program(clone); - fpc->UsesKill = fp->UsesKill; - fpc->UsesDFdy = fp->UsesDFdy; - fpc->OriginUpperLeft = fp->OriginUpperLeft; - fpc->PixelCenterInteger = fp->PixelCenterInteger; - } - break; - case GL_GEOMETRY_PROGRAM_NV: - { - const struct gl_geometry_program *gp = gl_geometry_program_const(prog); - struct gl_geometry_program *gpc = gl_geometry_program(clone); - gpc->VerticesOut = gp->VerticesOut; - gpc->InputType = gp->InputType; - gpc->Invocations = gp->Invocations; - gpc->OutputType = gp->OutputType; - gpc->UsesEndPrimitive = gp->UsesEndPrimitive; - gpc->UsesStreams = gp->UsesStreams; - } - break; - case GL_TESS_CONTROL_PROGRAM_NV: - { - const struct gl_tess_ctrl_program *tcp = gl_tess_ctrl_program_const(prog); - struct gl_tess_ctrl_program *tcpc = gl_tess_ctrl_program(clone); - tcpc->VerticesOut = tcp->VerticesOut; - } - break; - case GL_TESS_EVALUATION_PROGRAM_NV: - { - const struct gl_tess_eval_program *tep = gl_tess_eval_program_const(prog); - struct gl_tess_eval_program *tepc = gl_tess_eval_program(clone); - tepc->PrimitiveMode = tep->PrimitiveMode; - tepc->Spacing = tep->Spacing; - tepc->VertexOrder = tep->VertexOrder; - tepc->PointMode = tep->PointMode; - } - break; - default: - _mesa_problem(NULL, "Unexpected target in _mesa_clone_program"); - } - - return clone; -} - - -/** * Insert 'count' NOP instructions at 'start' in the given program. * Adjust branch targets accordingly. */ @@ -707,190 +447,6 @@ _mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count) /** - * Search instructions for registers that match (oldFile, oldIndex), - * replacing them with (newFile, newIndex). - */ -static void -replace_registers(struct prog_instruction *inst, GLuint numInst, - GLuint oldFile, GLuint oldIndex, - GLuint newFile, GLuint newIndex) -{ - GLuint i, j; - for (i = 0; i < numInst; i++) { - /* src regs */ - for (j = 0; j < _mesa_num_inst_src_regs(inst[i].Opcode); j++) { - if (inst[i].SrcReg[j].File == oldFile && - inst[i].SrcReg[j].Index == oldIndex) { - inst[i].SrcReg[j].File = newFile; - inst[i].SrcReg[j].Index = newIndex; - } - } - /* dst reg */ - if (inst[i].DstReg.File == oldFile && inst[i].DstReg.Index == oldIndex) { - inst[i].DstReg.File = newFile; - inst[i].DstReg.Index = newIndex; - } - } -} - - -/** - * Search instructions for references to program parameters. When found, - * increment the parameter index by 'offset'. - * Used when combining programs. - */ -static void -adjust_param_indexes(struct prog_instruction *inst, GLuint numInst, - GLuint offset) -{ - GLuint i, j; - for (i = 0; i < numInst; i++) { - for (j = 0; j < _mesa_num_inst_src_regs(inst[i].Opcode); j++) { - GLuint f = inst[i].SrcReg[j].File; - if (f == PROGRAM_CONSTANT || - f == PROGRAM_UNIFORM || - f == PROGRAM_STATE_VAR) { - inst[i].SrcReg[j].Index += offset; - } - } - } -} - - -/** - * Combine two programs into one. Fix instructions so the outputs of - * the first program go to the inputs of the second program. - */ -struct gl_program * -_mesa_combine_programs(struct gl_context *ctx, - const struct gl_program *progA, - const struct gl_program *progB) -{ - struct prog_instruction *newInst; - struct gl_program *newProg; - const GLuint lenA = progA->NumInstructions - 1; /* omit END instr */ - const GLuint lenB = progB->NumInstructions; - const GLuint numParamsA = _mesa_num_parameters(progA->Parameters); - const GLuint newLength = lenA + lenB; - GLboolean usedTemps[MAX_PROGRAM_TEMPS]; - GLuint firstTemp = 0; - GLbitfield64 inputsB; - GLuint i; - - assert(progA->Target == progB->Target); - - newInst = _mesa_alloc_instructions(newLength); - if (!newInst) - return GL_FALSE; - - _mesa_copy_instructions(newInst, progA->Instructions, lenA); - _mesa_copy_instructions(newInst + lenA, progB->Instructions, lenB); - - /* adjust branch / instruction addresses for B's instructions */ - for (i = 0; i < lenB; i++) { - newInst[lenA + i].BranchTarget += lenA; - } - - newProg = ctx->Driver.NewProgram(ctx, progA->Target, 0); - newProg->Instructions = newInst; - newProg->NumInstructions = newLength; - - /* find used temp regs (we may need new temps below) */ - _mesa_find_used_registers(newProg, PROGRAM_TEMPORARY, - usedTemps, MAX_PROGRAM_TEMPS); - - if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) { - const struct gl_fragment_program *fprogA, *fprogB; - struct gl_fragment_program *newFprog; - GLbitfield64 progB_inputsRead = progB->InputsRead; - GLint progB_colorFile, progB_colorIndex; - - fprogA = gl_fragment_program_const(progA); - fprogB = gl_fragment_program_const(progB); - newFprog = gl_fragment_program(newProg); - - newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill; - newFprog->UsesDFdy = fprogA->UsesDFdy || fprogB->UsesDFdy; - - /* We'll do a search and replace for instances - * of progB_colorFile/progB_colorIndex below... - */ - progB_colorFile = PROGRAM_INPUT; - progB_colorIndex = VARYING_SLOT_COL0; - - /* - * The fragment program may get color from a state var rather than - * a fragment input (vertex output) if it's constant. - * See the texenvprogram.c code. - * So, search the program's parameter list now to see if the program - * gets color from a state var instead of a conventional fragment - * input register. - */ - for (i = 0; i < progB->Parameters->NumParameters; i++) { - struct gl_program_parameter *p = &progB->Parameters->Parameters[i]; - if (p->Type == PROGRAM_STATE_VAR && - p->StateIndexes[0] == STATE_INTERNAL && - p->StateIndexes[1] == STATE_CURRENT_ATTRIB && - (int) p->StateIndexes[2] == (int) VERT_ATTRIB_COLOR0) { - progB_inputsRead |= VARYING_BIT_COL0; - progB_colorFile = PROGRAM_STATE_VAR; - progB_colorIndex = i; - break; - } - } - - /* Connect color outputs of fprogA to color inputs of fprogB, via a - * new temporary register. - */ - if ((progA->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) && - (progB_inputsRead & VARYING_BIT_COL0)) { - GLint tempReg = _mesa_find_free_register(usedTemps, MAX_PROGRAM_TEMPS, - firstTemp); - if (tempReg < 0) { - _mesa_problem(ctx, "No free temp regs found in " - "_mesa_combine_programs(), using 31"); - tempReg = 31; - } - firstTemp = tempReg + 1; - - /* replace writes to result.color[0] with tempReg */ - replace_registers(newInst, lenA, - PROGRAM_OUTPUT, FRAG_RESULT_COLOR, - PROGRAM_TEMPORARY, tempReg); - /* replace reads from the input color with tempReg */ - replace_registers(newInst + lenA, lenB, - progB_colorFile, progB_colorIndex, /* search for */ - PROGRAM_TEMPORARY, tempReg /* replace with */ ); - } - - /* compute combined program's InputsRead */ - inputsB = progB_inputsRead; - if (progA->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { - inputsB &= ~(1 << VARYING_SLOT_COL0); - } - newProg->InputsRead = progA->InputsRead | inputsB; - newProg->OutputsWritten = progB->OutputsWritten; - newProg->SamplersUsed = progA->SamplersUsed | progB->SamplersUsed; - } - else { - /* vertex program */ - assert(0); /* XXX todo */ - } - - /* - * Merge parameters (uniforms, constants, etc) - */ - newProg->Parameters = _mesa_combine_parameter_lists(progA->Parameters, - progB->Parameters); - - adjust_param_indexes(newInst + lenA, lenB, numParamsA); - - - return newProg; -} - - -/** * Populate the 'used' array with flags indicating which registers (TEMPs, * INPUTs, OUTPUTs, etc, are used by the given program. * \param file type of register to scan for @@ -952,140 +508,6 @@ _mesa_find_free_register(const GLboolean used[], } - -/** - * Check if the given register index is valid (doesn't exceed implementation- - * dependent limits). - * \return GL_TRUE if OK, GL_FALSE if bad index - */ -GLboolean -_mesa_valid_register_index(const struct gl_context *ctx, - gl_shader_stage shaderType, - gl_register_file file, GLint index) -{ - const struct gl_program_constants *c; - - assert(0 <= shaderType && shaderType < MESA_SHADER_STAGES); - c = &ctx->Const.Program[shaderType]; - - switch (file) { - case PROGRAM_UNDEFINED: - return GL_TRUE; /* XXX or maybe false? */ - - case PROGRAM_TEMPORARY: - return index >= 0 && index < (GLint) c->MaxTemps; - - case PROGRAM_UNIFORM: - case PROGRAM_STATE_VAR: - /* aka constant buffer */ - return index >= 0 && index < (GLint) c->MaxUniformComponents / 4; - - case PROGRAM_CONSTANT: - /* constant buffer w/ possible relative negative addressing */ - return (index > (int) c->MaxUniformComponents / -4 && - index < (int) c->MaxUniformComponents / 4); - - case PROGRAM_INPUT: - if (index < 0) - return GL_FALSE; - - switch (shaderType) { - case MESA_SHADER_VERTEX: - return index < VERT_ATTRIB_GENERIC0 + (GLint) c->MaxAttribs; - case MESA_SHADER_FRAGMENT: - return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying; - case MESA_SHADER_GEOMETRY: - return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying; - default: - return GL_FALSE; - } - - case PROGRAM_OUTPUT: - if (index < 0) - return GL_FALSE; - - switch (shaderType) { - case MESA_SHADER_VERTEX: - return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying; - case MESA_SHADER_FRAGMENT: - return index < FRAG_RESULT_DATA0 + (GLint) ctx->Const.MaxDrawBuffers; - case MESA_SHADER_GEOMETRY: - return index < VARYING_SLOT_VAR0 + (GLint) ctx->Const.MaxVarying; - default: - return GL_FALSE; - } - - case PROGRAM_ADDRESS: - return index >= 0 && index < (GLint) c->MaxAddressRegs; - - default: - _mesa_problem(ctx, - "unexpected register file in _mesa_valid_register_index()"); - return GL_FALSE; - } -} - - - -/** - * "Post-process" a GPU program. This is intended to be used for debugging. - * Example actions include no-op'ing instructions or changing instruction - * behaviour. - */ -void -_mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog) -{ - static const GLfloat white[4] = { 0.5, 0.5, 0.5, 0.5 }; - GLuint i; - GLuint whiteSwizzle; - GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters, - (gl_constant_value *) white, - 4, &whiteSwizzle); - - (void) whiteIndex; - - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); - - (void) n; - - if (_mesa_is_tex_instruction(inst->Opcode)) { -#if 0 - /* replace TEX/TXP/TXB with MOV */ - inst->Opcode = OPCODE_MOV; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - inst->SrcReg[0].Negate = NEGATE_NONE; -#endif - -#if 0 - /* disable shadow texture mode */ - inst->TexShadow = 0; -#endif - } - - if (inst->Opcode == OPCODE_TXP) { -#if 0 - inst->Opcode = OPCODE_MOV; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = whiteIndex; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - inst->SrcReg[0].Negate = NEGATE_NONE; -#endif -#if 0 - inst->TexShadow = 0; -#endif -#if 0 - inst->Opcode = OPCODE_TEX; - inst->TexShadow = 0; -#endif - } - - } -} - /* Gets the minimum number of shader invocations per fragment. * This function is useful to determine if we need to do per * sample shading or per fragment shading. diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index a894147cafd..24e05974dc3 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -63,40 +63,8 @@ _mesa_update_default_objects_program(struct gl_context *ctx); extern void _mesa_set_program_error(struct gl_context *ctx, GLint pos, const char *string); -extern const GLubyte * -_mesa_find_line_column(const GLubyte *string, const GLubyte *pos, - GLint *line, GLint *col); - - -extern struct gl_program * -_mesa_init_vertex_program(struct gl_context *ctx, - struct gl_vertex_program *prog, - GLenum target, GLuint id); - -extern struct gl_program * -_mesa_init_fragment_program(struct gl_context *ctx, - struct gl_fragment_program *prog, - GLenum target, GLuint id); - extern struct gl_program * -_mesa_init_tess_ctrl_program(struct gl_context *ctx, - struct gl_tess_ctrl_program *prog, - GLenum target, GLuint id); - -extern struct gl_program * -_mesa_init_tess_eval_program(struct gl_context *ctx, - struct gl_tess_eval_program *prog, - GLenum target, GLuint id); - -extern struct gl_program * -_mesa_init_geometry_program(struct gl_context *ctx, - struct gl_geometry_program *prog, - GLenum target, GLuint id); - -extern struct gl_program * -_mesa_init_compute_program(struct gl_context *ctx, - struct gl_compute_program *prog, - GLenum target, GLuint id); +_mesa_init_gl_program(struct gl_program *prog, GLenum target, GLuint id); extern struct gl_program * _mesa_new_program(struct gl_context *ctx, GLenum target, GLuint id); @@ -176,56 +144,12 @@ _mesa_reference_tesseprog(struct gl_context *ctx, (struct gl_program *) prog); } -extern struct gl_program * -_mesa_clone_program(struct gl_context *ctx, const struct gl_program *prog); - -static inline struct gl_vertex_program * -_mesa_clone_vertex_program(struct gl_context *ctx, - const struct gl_vertex_program *prog) -{ - return (struct gl_vertex_program *) _mesa_clone_program(ctx, &prog->Base); -} - -static inline struct gl_tess_ctrl_program * -_mesa_clone_tess_ctrl_program(struct gl_context *ctx, - const struct gl_tess_ctrl_program *prog) -{ - return (struct gl_tess_ctrl_program *) _mesa_clone_program(ctx, &prog->Base); -} - -static inline struct gl_tess_eval_program * -_mesa_clone_tess_eval_program(struct gl_context *ctx, - const struct gl_tess_eval_program *prog) -{ - return (struct gl_tess_eval_program *) _mesa_clone_program(ctx, &prog->Base); -} - -static inline struct gl_geometry_program * -_mesa_clone_geometry_program(struct gl_context *ctx, - const struct gl_geometry_program *prog) -{ - return (struct gl_geometry_program *) _mesa_clone_program(ctx, &prog->Base); -} - -static inline struct gl_fragment_program * -_mesa_clone_fragment_program(struct gl_context *ctx, - const struct gl_fragment_program *prog) -{ - return (struct gl_fragment_program *) _mesa_clone_program(ctx, &prog->Base); -} - - extern GLboolean _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count); extern GLboolean _mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count); -extern struct gl_program * -_mesa_combine_programs(struct gl_context *ctx, - const struct gl_program *progA, - const struct gl_program *progB); - extern void _mesa_find_used_registers(const struct gl_program *prog, gl_register_file file, @@ -235,15 +159,6 @@ extern GLint _mesa_find_free_register(const GLboolean used[], GLuint maxRegs, GLuint firstReg); - -extern GLboolean -_mesa_valid_register_index(const struct gl_context *ctx, - gl_shader_stage shaderType, - gl_register_file file, GLint index); - -extern void -_mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog); - extern GLint _mesa_get_min_invocations_per_fragment(struct gl_context *ctx, const struct gl_fragment_program *prog, diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 1198a3c45f1..84e2504baba 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -24,7 +24,7 @@ */ #include "main/mtypes.h" -#include "glsl/glsl_types.h" +#include "glsl/nir/glsl_types.h" #include "glsl/ir.h" #include "glsl/ir_uniform.h" #include "glsl/ir_visitor.h" diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c index 506a770499f..b820d843385 100644 --- a/src/mesa/state_tracker/st_atom_clip.c +++ b/src/mesa/state_tracker/st_atom_clip.c @@ -56,6 +56,9 @@ static void update_clip( struct st_context *st ) use_eye = TRUE; } + /* _ClipUserPlane = _NEW_TRANSFORM | _NEW_PROJECTION + * EyeUserPlane = _NEW_TRANSFORM + */ memcpy(clip.ucp, use_eye ? ctx->Transform.EyeUserPlane : ctx->Transform._ClipUserPlane, sizeof(clip.ucp)); @@ -70,7 +73,7 @@ static void update_clip( struct st_context *st ) const struct st_tracked_state st_update_clip = { "st_update_clip", /* name */ { /* dirty */ - _NEW_TRANSFORM, /* mesa */ + _NEW_TRANSFORM | _NEW_PROJECTION, /* mesa */ ST_NEW_VERTEX_PROGRAM, /* st */ }, update_clip /* update */ diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 6affb4d84d5..acaa85d9356 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -238,7 +238,7 @@ static void st_bind_ubos(struct st_context *st, struct gl_uniform_buffer_binding *binding; struct st_buffer_object *st_obj; - binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding]; + binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding]; st_obj = st_buffer_object(binding->BufferObject); cb.buffer = st_obj->buffer; diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index a04163cc137..f94c358afba 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -25,65 +25,17 @@ * **************************************************************************/ -/* - * Generate fragment programs to implement pixel transfer ops, such as - * scale/bias, colortable, convolution... - * - * Authors: +/* Authors: * Brian Paul */ -#include "main/imports.h" -#include "main/image.h" -#include "main/macros.h" -#include "program/program.h" -#include "program/prog_cache.h" -#include "program/prog_instruction.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" - #include "st_context.h" -#include "st_format.h" #include "st_texture.h" -#include "pipe/p_screen.h" -#include "pipe/p_context.h" #include "util/u_inlines.h" #include "util/u_pack_color.h" -struct state_key -{ - GLuint scaleAndBias:1; - GLuint pixelMaps:1; - -#if 0 - GLfloat Maps[3][256][4]; - int NumMaps; - GLint NumStages; - pipeline_stage Stages[STAGE_MAX]; - GLboolean StagesUsed[STAGE_MAX]; - GLfloat Scale1[4], Bias1[4]; - GLfloat Scale2[4], Bias2[4]; -#endif -}; - -static void -make_state_key(struct gl_context *ctx, struct state_key *key) -{ - memset(key, 0, sizeof(*key)); - - if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || - ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || - ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || - ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { - key->scaleAndBias = 1; - } - - key->pixelMaps = ctx->Pixel.MapColorFlag; -} - - /** * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. */ @@ -128,74 +80,15 @@ load_color_map_texture(struct gl_context *ctx, struct pipe_resource *pt) pipe_transfer_unmap(pipe, transfer); } - - -#define MAX_INST 100 - /** - * Returns a fragment program which implements the current pixel transfer ops. + * Upload the pixel transfer color map texture. */ -static struct gl_fragment_program * -get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) +static void +update_pixel_transfer(struct st_context *st) { - struct st_context *st = st_context(ctx); - struct prog_instruction inst[MAX_INST]; - struct gl_program_parameter_list *params; - struct gl_fragment_program *fp; - GLuint ic = 0; - const GLuint colorTemp = 0; - - fp = (struct gl_fragment_program *) - ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - if (!fp) - return NULL; - - params = _mesa_new_parameter_list(); - - /* - * Get initial pixel color from the texture. - * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; - */ - _mesa_init_instructions(inst + ic, 1); - inst[ic].Opcode = OPCODE_TEX; - inst[ic].DstReg.File = PROGRAM_TEMPORARY; - inst[ic].DstReg.Index = colorTemp; - inst[ic].SrcReg[0].File = PROGRAM_INPUT; - inst[ic].SrcReg[0].Index = VARYING_SLOT_TEX0; - inst[ic].TexSrcUnit = 0; - inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - fp->Base.InputsRead = BITFIELD64_BIT(VARYING_SLOT_TEX0); - fp->Base.OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR); - fp->Base.SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ - - if (key->scaleAndBias) { - static const gl_state_index scale_state[STATE_LENGTH] = - { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 }; - static const gl_state_index bias_state[STATE_LENGTH] = - { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 }; - GLint scale_p, bias_p; - - scale_p = _mesa_add_state_reference(params, scale_state); - bias_p = _mesa_add_state_reference(params, bias_state); - - /* MAD colorTemp, colorTemp, scale, bias; */ - _mesa_init_instructions(inst + ic, 1); - inst[ic].Opcode = OPCODE_MAD; - inst[ic].DstReg.File = PROGRAM_TEMPORARY; - inst[ic].DstReg.Index = colorTemp; - inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; - inst[ic].SrcReg[0].Index = colorTemp; - inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; - inst[ic].SrcReg[1].Index = scale_p; - inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR; - inst[ic].SrcReg[2].Index = bias_p; - ic++; - } - - if (key->pixelMaps) { - const GLuint temp = 1; + struct gl_context *ctx = st->ctx; + if (ctx->Pixel.MapColorFlag) { /* create the colormap/texture now if not already done */ if (!st->pixel_xfer.pixelmap_texture) { st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); @@ -203,117 +96,11 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key) st_create_texture_sampler_view(st->pipe, st->pixel_xfer.pixelmap_texture); } - - /* with a little effort, we can do four pixel map look-ups with - * two TEX instructions: - */ - - /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ - _mesa_init_instructions(inst + ic, 1); - inst[ic].Opcode = OPCODE_TEX; - inst[ic].DstReg.File = PROGRAM_TEMPORARY; - inst[ic].DstReg.Index = temp; - inst[ic].DstReg.WriteMask = WRITEMASK_XY; /* write R,G */ - inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; - inst[ic].SrcReg[0].Index = colorTemp; - inst[ic].TexSrcUnit = 1; - inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - - /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ - _mesa_init_instructions(inst + ic, 1); - inst[ic].Opcode = OPCODE_TEX; - inst[ic].DstReg.File = PROGRAM_TEMPORARY; - inst[ic].DstReg.Index = temp; - inst[ic].DstReg.WriteMask = WRITEMASK_ZW; /* write B,A */ - inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; - inst[ic].SrcReg[0].Index = colorTemp; - inst[ic].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, - SWIZZLE_Z, SWIZZLE_W); - inst[ic].TexSrcUnit = 1; - inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - - /* MOV colorTemp, temp; */ - _mesa_init_instructions(inst + ic, 1); - inst[ic].Opcode = OPCODE_MOV; - inst[ic].DstReg.File = PROGRAM_TEMPORARY; - inst[ic].DstReg.Index = colorTemp; - inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; - inst[ic].SrcReg[0].Index = temp; - ic++; - - fp->Base.SamplersUsed |= (1 << 1); /* sampler 1 is used */ - } - - /* Modify last instruction's dst reg to write to result.color */ - { - struct prog_instruction *last = &inst[ic - 1]; - last->DstReg.File = PROGRAM_OUTPUT; - last->DstReg.Index = FRAG_RESULT_COLOR; - } - - /* END; */ - _mesa_init_instructions(inst + ic, 1); - inst[ic].Opcode = OPCODE_END; - ic++; - - assert(ic <= MAX_INST); - - - fp->Base.Instructions = _mesa_alloc_instructions(ic); - if (!fp->Base.Instructions) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "generating pixel transfer program"); - _mesa_free_parameter_list(params); - return NULL; - } - - _mesa_copy_instructions(fp->Base.Instructions, inst, ic); - fp->Base.NumInstructions = ic; - fp->Base.Parameters = params; - -#if 0 - printf("========= pixel transfer prog\n"); - _mesa_print_program(&fp->Base); - _mesa_print_parameter_list(fp->Base.Parameters); -#endif - - return fp; -} - - - -/** - * Update st->pixel_xfer.program in response to new pixel-transfer state. - */ -static void -update_pixel_transfer(struct st_context *st) -{ - struct gl_context *ctx = st->ctx; - struct state_key key; - struct gl_fragment_program *fp; - - make_state_key(st->ctx, &key); - - fp = (struct gl_fragment_program *) - _mesa_search_program_cache(st->pixel_xfer.cache, &key, sizeof(key)); - if (!fp) { - fp = get_pixel_transfer_program(st->ctx, &key); - _mesa_program_cache_insert(st->ctx, st->pixel_xfer.cache, - &key, sizeof(key), &fp->Base); - } - - if (ctx->Pixel.MapColorFlag) { load_color_map_texture(ctx, st->pixel_xfer.pixelmap_texture); } - st->pixel_xfer.pixelmap_enabled = ctx->Pixel.MapColorFlag; - - st->pixel_xfer.program = (struct st_fragment_program *) fp; } - const struct st_tracked_state st_update_pixel_transfer = { "st_update_pixel_transfer", /* name */ { /* dirty */ diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 230eba8c4a5..bb6dfe85644 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -108,151 +108,6 @@ struct bitmap_cache /** - * Make fragment program for glBitmap: - * Sample the texture and kill the fragment if the bit is 0. - * This program will be combined with the user's fragment program. - */ -static struct st_fragment_program * -make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) -{ - struct st_context *st = st_context(ctx); - struct st_fragment_program *stfp; - struct gl_program *p; - GLuint ic = 0; - - p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - if (!p) - return NULL; - - p->NumInstructions = 3; - - p->Instructions = _mesa_alloc_instructions(p->NumInstructions); - if (!p->Instructions) { - ctx->Driver.DeleteProgram(ctx, p); - return NULL; - } - _mesa_init_instructions(p->Instructions, p->NumInstructions); - - /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY; - p->Instructions[ic].DstReg.Index = 0; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0; - p->Instructions[ic].TexSrcUnit = samplerIndex; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - - /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ - p->Instructions[ic].Opcode = OPCODE_KIL; - p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY; - - if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) - p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX; - - p->Instructions[ic].SrcReg[0].Index = 0; - p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW; - ic++; - - /* END; */ - p->Instructions[ic++].Opcode = OPCODE_END; - - assert(ic == p->NumInstructions); - - p->InputsRead = VARYING_BIT_TEX0; - p->OutputsWritten = 0x0; - p->SamplersUsed = (1 << samplerIndex); - - stfp = (struct st_fragment_program *) p; - stfp->Base.UsesKill = GL_TRUE; - - return stfp; -} - - -static struct gl_program * -make_bitmap_fragment_program_glsl(struct st_context *st, - struct st_fragment_program *orig, - GLuint samplerIndex) -{ - struct gl_context *ctx = st->ctx; - struct st_fragment_program *fp = (struct st_fragment_program *) - ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - - if (!fp) - return NULL; - - get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex); - return &fp->Base.Base; -} - - -static int -find_free_bit(uint bitfield) -{ - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) { - return i; - } - } - return -1; -} - - -/** - * Combine basic bitmap fragment program with the user-defined program. - * \param st current context - * \param fpIn the incoming fragment program - * \param fpOut the new fragment program which does fragment culling - * \param bitmap_sampler sampler number for the bitmap texture - */ -void -st_make_bitmap_fragment_program(struct st_context *st, - struct gl_fragment_program *fpIn, - struct gl_fragment_program **fpOut, - GLuint *bitmap_sampler) -{ - struct st_fragment_program *bitmap_prog; - struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn; - struct gl_program *newProg; - uint sampler; - - /* - * Generate new program which is the user-defined program prefixed - * with the bitmap sampler/kill instructions. - */ - sampler = find_free_bit(fpIn->Base.SamplersUsed); - - if (stfpIn->glsl_to_tgsi) - newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler); - else { - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); - } - -#if 0 - { - printf("Combined bitmap program:\n"); - _mesa_print_program(newProg); - printf("InputsRead: 0x%x\n", newProg->InputsRead); - printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); - _mesa_print_parameter_list(newProg->Parameters); - } -#endif - - /* return results */ - *fpOut = (struct gl_fragment_program *) newProg; - *bitmap_sampler = sampler; -} - - -/** * Copy user-provide bitmap bits into texture buffer, expanding * bits into texels. * "On" bits will set texels to 0x0. diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h index b4254ca1eeb..dc7e5cb5c9e 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.h +++ b/src/mesa/state_tracker/st_cb_bitmap.h @@ -31,6 +31,7 @@ #include "main/compiler.h" +#include <stdbool.h> struct dd_function_table; struct st_context; @@ -47,13 +48,11 @@ extern void st_destroy_bitmap(struct st_context *st); extern void -st_make_bitmap_fragment_program(struct st_context *st, - struct gl_fragment_program *fpIn, - struct gl_fragment_program **fpOut, - GLuint *bitmap_sampler); - -extern void st_flush_bitmap_cache(struct st_context *st); +extern const struct tgsi_token * +st_get_bitmap_shader(const struct tgsi_token *tokens, + unsigned sampler_index, + bool use_texcoord, bool swizzle_xxxx); #endif /* ST_CB_BITMAP_H */ diff --git a/src/mesa/state_tracker/st_cb_bitmap_shader.c b/src/mesa/state_tracker/st_cb_bitmap_shader.c new file mode 100644 index 00000000000..cddea36d4f6 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_bitmap_shader.c @@ -0,0 +1,174 @@ +/************************************************************************** + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "st_cb_bitmap.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" +#include "util/u_debug.h" + +struct tgsi_bitmap_transform { + struct tgsi_transform_context base; + struct tgsi_shader_info info; + unsigned sampler_index; + bool use_texcoord; + bool swizzle_xxxx; + bool first_instruction_emitted; +}; + +static inline struct tgsi_bitmap_transform * +tgsi_bitmap_transform(struct tgsi_transform_context *tctx) +{ + return (struct tgsi_bitmap_transform *)tctx; +} + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *current_inst) +{ + struct tgsi_bitmap_transform *ctx = tgsi_bitmap_transform(tctx); + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + unsigned i, semantic; + int texcoord_index = -1; + + if (ctx->first_instruction_emitted) { + tctx->emit_instruction(tctx, current_inst); + return; + } + + ctx->first_instruction_emitted = true; + + /* Add TEMP[0] if it's missing. */ + if (ctx->info.file_max[TGSI_FILE_TEMPORARY] == -1) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + tctx->emit_declaration(tctx, &decl); + } + + /* Add TEXCOORD[0] if it's missing. */ + semantic = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD : + TGSI_SEMANTIC_GENERIC; + for (i = 0; i < ctx->info.num_inputs; i++) { + if (ctx->info.input_semantic_name[i] == semantic && + ctx->info.input_semantic_index[i] == 0) { + texcoord_index = i; + break; + } + } + + if (texcoord_index == -1) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = semantic; + decl.Declaration.Interpolate = 1; + decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Range.First = decl.Range.Last = ctx->info.num_inputs; + texcoord_index = ctx->info.num_inputs; + tctx->emit_declaration(tctx, &decl); + } + + /* Declare the sampler. */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.Range.First = decl.Range.Last = ctx->sampler_index; + tctx->emit_declaration(tctx, &decl); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.Texture = 1; + inst.Texture.Texture = TGSI_TEXTURE_2D; + + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = 0; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + + inst.Instruction.NumSrcRegs = 2; + inst.Src[0].Register.File = TGSI_FILE_INPUT; + inst.Src[0].Register.Index = texcoord_index; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z; + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; + inst.Src[1].Register.File = TGSI_FILE_SAMPLER; + inst.Src[1].Register.Index = ctx->sampler_index; + + tctx->emit_instruction(tctx, &inst); + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_KILL_IF; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 1; + + inst.Src[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Src[0].Register.Index = 0; + inst.Src[0].Register.Negate = 1; + inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + if (ctx->swizzle_xxxx) { + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; + } else { + inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y; + inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Z; + inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_W; + } + tctx->emit_instruction(tctx, &inst); + + /* And emit the instruction we got. */ + tctx->emit_instruction(tctx, current_inst); +} + +const struct tgsi_token * +st_get_bitmap_shader(const struct tgsi_token *tokens, + unsigned sampler_index, + bool use_texcoord, bool swizzle_xxxx) +{ + struct tgsi_bitmap_transform ctx; + struct tgsi_token *newtoks; + int newlen; + + memset(&ctx, 0, sizeof(ctx)); + ctx.base.transform_instruction = transform_instr; + ctx.sampler_index = sampler_index; + ctx.use_texcoord = use_texcoord; + ctx.swizzle_xxxx = swizzle_xxxx; + tgsi_scan_shader(tokens, &ctx.info); + + newlen = tgsi_num_tokens(tokens) + 20; + newtoks = tgsi_alloc_tokens(newlen); + if (!newtoks) + return NULL; + + tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); + return newtoks; +} diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 152160e1dd2..7e8633edc1a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -72,217 +72,74 @@ /** - * Check if the given program is: - * 0: MOVE result.color, fragment.color; - * 1: END; - */ -static GLboolean -is_passthrough_program(const struct gl_fragment_program *prog) -{ - if (prog->Base.NumInstructions == 2) { - const struct prog_instruction *inst = prog->Base.Instructions; - if (inst[0].Opcode == OPCODE_MOV && - inst[1].Opcode == OPCODE_END && - inst[0].DstReg.File == PROGRAM_OUTPUT && - inst[0].DstReg.Index == FRAG_RESULT_COLOR && - inst[0].DstReg.WriteMask == WRITEMASK_XYZW && - inst[0].SrcReg[0].File == PROGRAM_INPUT && - inst[0].SrcReg[0].Index == VARYING_SLOT_COL0 && - inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) { - return GL_TRUE; - } - } - return GL_FALSE; -} - - -/** - * Returns a fragment program which implements the current pixel transfer ops. - */ -static struct gl_fragment_program * -get_glsl_pixel_transfer_program(struct st_context *st, - struct st_fragment_program *orig) -{ - int pixelMaps = 0, scaleAndBias = 0; - struct gl_context *ctx = st->ctx; - struct st_fragment_program *fp = (struct st_fragment_program *) - ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - - if (!fp) - return NULL; - - if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || - ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || - ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || - ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { - scaleAndBias = 1; - } - - pixelMaps = ctx->Pixel.MapColorFlag; - - if (pixelMaps) { - /* create the colormap/texture now if not already done */ - if (!st->pixel_xfer.pixelmap_texture) { - st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx); - st->pixel_xfer.pixelmap_sampler_view = - st_create_texture_sampler_view(st->pipe, - st->pixel_xfer.pixelmap_texture); - } - } - - get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi, - scaleAndBias, pixelMaps); - - return &fp->Base; -} - - -/** - * Make fragment shader for glDraw/CopyPixels. This shader is made - * by combining the pixel transfer shader with the user-defined shader. - * \param fpIn the current/incoming fragment program - * \param fpOut returns the combined fragment program - */ -void -st_make_drawpix_fragment_program(struct st_context *st, - struct gl_fragment_program *fpIn, - struct gl_fragment_program **fpOut) -{ - struct gl_program *newProg; - struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn; - - if (is_passthrough_program(fpIn)) { - newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, - &st->pixel_xfer.program->Base); - } - else if (stfp->glsl_to_tgsi != NULL) { - newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp); - } - else { -#if 0 - /* debug */ - printf("Base program:\n"); - _mesa_print_program(&fpIn->Base); - printf("DrawPix program:\n"); - _mesa_print_program(&st->pixel_xfer.program->Base.Base); -#endif - newProg = _mesa_combine_programs(st->ctx, - &st->pixel_xfer.program->Base.Base, - &fpIn->Base); - } - -#if 0 - /* debug */ - printf("Combined DrawPixels program:\n"); - _mesa_print_program(newProg); - printf("InputsRead: 0x%x\n", newProg->InputsRead); - printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); - _mesa_print_parameter_list(newProg->Parameters); -#endif - - *fpOut = (struct gl_fragment_program *) newProg; -} - - -/** * Create fragment program that does a TEX() instruction to get a Z and/or * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL. * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX). * Pass fragment color through as-is. - * \return pointer to the gl_fragment program + * + * \return CSO of the fragment shader. */ -struct gl_fragment_program * -st_make_drawpix_z_stencil_program(struct st_context *st, - GLboolean write_depth, - GLboolean write_stencil) +static void * +get_drawpix_z_stencil_program(struct st_context *st, + GLboolean write_depth, + GLboolean write_stencil) { - struct gl_context *ctx = st->ctx; - struct gl_program *p; - struct gl_fragment_program *fp; - GLuint ic = 0; + struct ureg_program *ureg; + struct ureg_src depth_sampler, stencil_sampler; + struct ureg_src texcoord, color; + struct ureg_dst out_color, out_depth, out_stencil; const GLuint shaderIndex = write_depth * 2 + write_stencil; + void *cso; - assert(shaderIndex < ARRAY_SIZE(st->drawpix.shaders)); + assert(shaderIndex < ARRAY_SIZE(st->drawpix.zs_shaders)); - if (st->drawpix.shaders[shaderIndex]) { + if (st->drawpix.zs_shaders[shaderIndex]) { /* already have the proper shader */ - return st->drawpix.shaders[shaderIndex]; + return st->drawpix.zs_shaders[shaderIndex]; } - /* - * Create shader now - */ - p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - if (!p) + ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (ureg == NULL) return NULL; - p->NumInstructions = write_depth ? 3 : 1; - p->NumInstructions += write_stencil ? 1 : 0; - - p->Instructions = _mesa_alloc_instructions(p->NumInstructions); - if (!p->Instructions) { - ctx->Driver.DeleteProgram(ctx, p); - return NULL; - } - _mesa_init_instructions(p->Instructions, p->NumInstructions); + ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, TRUE); if (write_depth) { - /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH; - p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0; - p->Instructions[ic].TexSrcUnit = 0; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - /* MOV result.color, fragment.color; */ - p->Instructions[ic].Opcode = OPCODE_MOV; - p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLOR; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_COL0; - ic++; + color = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, + TGSI_INTERPOLATE_COLOR); + out_color = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + + depth_sampler = ureg_DECL_sampler(ureg, 0); + out_depth = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); } if (write_stencil) { - /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL; - p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = VARYING_SLOT_TEX0; - p->Instructions[ic].TexSrcUnit = 1; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; + stencil_sampler = ureg_DECL_sampler(ureg, 1); + out_stencil = ureg_DECL_output(ureg, TGSI_SEMANTIC_STENCIL, 0); } - /* END; */ - p->Instructions[ic++].Opcode = OPCODE_END; - - assert(ic == p->NumInstructions); + texcoord = ureg_DECL_fs_input(ureg, + st->needs_texcoord_semantic ? + TGSI_SEMANTIC_TEXCOORD : + TGSI_SEMANTIC_GENERIC, + 0, TGSI_INTERPOLATE_LINEAR); - p->InputsRead = VARYING_BIT_TEX0 | VARYING_BIT_COL0; - p->OutputsWritten = 0; if (write_depth) { - p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_DEPTH); - p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_COLOR); + ureg_TEX(ureg, ureg_writemask(out_depth, TGSI_WRITEMASK_Z), + TGSI_TEXTURE_2D, texcoord, depth_sampler); + ureg_MOV(ureg, out_color, color); } - if (write_stencil) - p->OutputsWritten |= BITFIELD64_BIT(FRAG_RESULT_STENCIL); - p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ if (write_stencil) - p->SamplersUsed |= 1 << 1; + ureg_TEX(ureg, ureg_writemask(out_stencil, TGSI_WRITEMASK_Y), + TGSI_TEXTURE_2D, texcoord, stencil_sampler); - fp = (struct gl_fragment_program *) p; + ureg_END(ureg); + cso = ureg_create_shader_and_destroy(ureg, st->pipe); /* save the new shader */ - st->drawpix.shaders[shaderIndex] = fp; - - return fp; + st->drawpix.zs_shaders[shaderIndex] = cso; + return cso; } @@ -668,6 +525,7 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, int num_sampler_view, void *driver_vp, void *driver_fp, + struct st_fp_variant *fpv, const GLfloat *color, GLboolean invertTex, GLboolean write_depth, GLboolean write_stencil) @@ -755,10 +613,9 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_tesseval_shader_handle(cso, NULL); cso_set_geometry_shader_handle(cso, NULL); - /* texture sampling state: */ + /* user samplers, plus the drawpix samplers */ { struct pipe_sampler_state sampler; - const struct pipe_sampler_state *states[2] = {&sampler, &sampler}; memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; @@ -769,8 +626,25 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = normalized; - cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, - num_sampler_view > 1 ? 2 : 1, states); + if (fpv) { + const struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1, + st->state.num_samplers[PIPE_SHADER_FRAGMENT]); + uint i; + + for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) + samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i]; + + samplers[fpv->drawpix_sampler] = &sampler; + if (sv[1]) + samplers[fpv->pixelmap_sampler] = &sampler; + + cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num, samplers); + } else { + const struct pipe_sampler_state *samplers[2] = {&sampler, &sampler}; + + cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, samplers); + } } /* viewport state: viewport matching window dims */ @@ -790,8 +664,21 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, cso_set_vertex_elements(cso, 3, st->velems_util_draw); cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); - /* texture state: */ - cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv); + /* user textures, plus the drawpix textures */ + if (fpv) { + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + uint num = MAX2(MAX2(fpv->drawpix_sampler, fpv->pixelmap_sampler) + 1, + st->state.num_sampler_views[PIPE_SHADER_FRAGMENT]); + + memcpy(sampler_views, st->state.sampler_views[PIPE_SHADER_FRAGMENT], + sizeof(sampler_views)); + + sampler_views[fpv->drawpix_sampler] = sv[0]; + if (sv[1]) + sampler_views[fpv->pixelmap_sampler] = sv[1]; + cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num, sampler_views); + } else + cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, num_sampler_view, sv); /* Compute Gallium window coords (y=0=top) with pixel zoom. * Recall that these coords are transformed by the current @@ -1048,30 +935,6 @@ get_color_fp_variant(struct st_context *st) /** - * Get fragment program variant for a glDrawPixels or glCopyPixels - * command for depth/stencil data. - */ -static struct st_fp_variant * -get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth, - GLboolean write_stencil) -{ - struct st_fp_variant_key key; - struct st_fp_variant *fpv; - - memset(&key, 0, sizeof(key)); - - key.st = st; - key.drawpixels = 1; - key.drawpixels_z = write_depth; - key.drawpixels_stencil = write_stencil; - - fpv = st_get_fp_variant(st, st->fp, &key); - - return fpv; -} - - -/** * Clamp glDrawPixels width and height to the maximum texture size. */ static void @@ -1109,8 +972,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2] = { NULL }; int num_sampler_view = 1; - struct st_fp_variant *fpv; struct gl_pixelstore_attrib clippedUnpack; + struct st_fp_variant *fpv = NULL; /* Mesa state should be up to date by now */ assert(ctx->NewState == 0x0); @@ -1144,31 +1007,27 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, * Get vertex/fragment shaders */ if (write_depth || write_stencil) { - fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil); - - driver_fp = fpv->driver_shader; - + driver_fp = get_drawpix_z_stencil_program(st, write_depth, + write_stencil); driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); - color = ctx->Current.RasterColor; } else { fpv = get_color_fp_variant(st); driver_fp = fpv->driver_shader; - driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); color = NULL; - if (st->pixel_xfer.pixelmap_enabled) { + if (ctx->Pixel.MapColorFlag) { pipe_sampler_view_reference(&sv[1], st->pixel_xfer.pixelmap_sampler_view); num_sampler_view++; } - } - /* update fragment program constants */ - st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + /* update fragment program constants */ + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + } /* draw with textured quad */ { @@ -1197,7 +1056,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, sv, num_sampler_view, driver_vp, - driver_fp, + driver_fp, fpv, color, GL_FALSE, write_depth, write_stencil); pipe_sampler_view_reference(&sv[0], NULL); if (num_sampler_view > 1) @@ -1452,6 +1311,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, void *driver_vp, *driver_fp; struct pipe_resource *pt; struct pipe_sampler_view *sv[2] = { NULL }; + struct st_fp_variant *fpv = NULL; int num_sampler_view = 1; GLfloat *color; enum pipe_format srcFormat; @@ -1459,7 +1319,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, GLboolean invertTex = GL_FALSE; GLint readX, readY, readW, readH; struct gl_pixelstore_attrib pack = ctx->DefaultPacking; - struct st_fp_variant *fpv; st_validate_state(st); @@ -1491,19 +1350,22 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, * Get vertex/fragment shaders */ if (type == GL_COLOR) { + fpv = get_color_fp_variant(st); + rbRead = st_get_color_read_renderbuffer(ctx); color = NULL; - fpv = get_color_fp_variant(st); driver_fp = fpv->driver_shader; - driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); - if (st->pixel_xfer.pixelmap_enabled) { + if (ctx->Pixel.MapColorFlag) { pipe_sampler_view_reference(&sv[1], st->pixel_xfer.pixelmap_sampler_view); num_sampler_view++; } + + /* update fragment program constants */ + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); } else { assert(type == GL_DEPTH); @@ -1511,15 +1373,10 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, Attachment[BUFFER_DEPTH].Renderbuffer); color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; - fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE); - driver_fp = fpv->driver_shader; - + driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE); driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); } - /* update fragment program constants */ - st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); - /* Choose the format for the temporary texture. */ srcFormat = rbRead->texture->format; srcBind = PIPE_BIND_SAMPLER_VIEW | @@ -1645,7 +1502,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, sv, num_sampler_view, driver_vp, - driver_fp, + driver_fp, fpv, color, invertTex, GL_FALSE, GL_FALSE); pipe_resource_reference(&pt, NULL); @@ -1666,12 +1523,12 @@ st_destroy_drawpix(struct st_context *st) { GLuint i; - for (i = 0; i < ARRAY_SIZE(st->drawpix.shaders); i++) { - if (st->drawpix.shaders[i]) - _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL); + for (i = 0; i < ARRAY_SIZE(st->drawpix.zs_shaders); i++) { + if (st->drawpix.zs_shaders[i]) + cso_delete_fragment_shader(st->cso_context, + st->drawpix.zs_shaders[i]); } - st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL); if (st->drawpix.vert_shaders[0]) cso_delete_vertex_shader(st->cso_context, st->drawpix.vert_shaders[0]); if (st->drawpix.vert_shaders[1]) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h index c707ace2f9f..f1fb32dd6cf 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.h +++ b/src/mesa/state_tracker/st_cb_drawpixels.h @@ -31,6 +31,7 @@ #include "main/compiler.h" +#include <stdbool.h> struct dd_function_table; struct st_context; @@ -40,15 +41,11 @@ extern void st_init_drawpixels_functions(struct dd_function_table *functions); extern void st_destroy_drawpix(struct st_context *st); -extern void -st_make_drawpix_fragment_program(struct st_context *st, - struct gl_fragment_program *fpIn, - struct gl_fragment_program **fpOut); - -extern struct gl_fragment_program * -st_make_drawpix_z_stencil_program(struct st_context *st, - GLboolean write_depth, - GLboolean write_stencil); - +extern const struct tgsi_token * +st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord, + bool scale_and_bias, unsigned scale_const, + unsigned bias_const, bool pixel_maps, + unsigned drawpix_sampler, unsigned pixelmap_sampler, + unsigned texcoord_const); #endif /* ST_CB_DRAWPIXELS_H */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels_shader.c b/src/mesa/state_tracker/st_cb_drawpixels_shader.c new file mode 100644 index 00000000000..749b46cfbf7 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_drawpixels_shader.c @@ -0,0 +1,278 @@ +/************************************************************************** + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "st_cb_drawpixels.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_scan.h" + +struct tgsi_drawpix_transform { + struct tgsi_transform_context base; + struct tgsi_shader_info info; + bool use_texcoord; + bool scale_and_bias; + bool pixel_maps; + bool first_instruction_emitted; + unsigned scale_const; + unsigned bias_const; + unsigned color_temp; + unsigned drawpix_sampler; + unsigned pixelmap_sampler; + unsigned texcoord_const; +}; + +static inline struct tgsi_drawpix_transform * +tgsi_drawpix_transform(struct tgsi_transform_context *tctx) +{ + return (struct tgsi_drawpix_transform *)tctx; +} + +static void +set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index, + unsigned x, unsigned y, unsigned z, unsigned w) +{ + inst->Src[i].Register.File = file; + inst->Src[i].Register.Index = index; + inst->Src[i].Register.SwizzleX = x; + inst->Src[i].Register.SwizzleY = y; + inst->Src[i].Register.SwizzleZ = z; + inst->Src[i].Register.SwizzleW = w; +} + +#define SET_SRC(inst, i, file, index, x, y, z, w) \ + set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \ + TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w) + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *current_inst) +{ + struct tgsi_drawpix_transform *ctx = tgsi_drawpix_transform(tctx); + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + unsigned i, sem_texcoord = ctx->use_texcoord ? TGSI_SEMANTIC_TEXCOORD : + TGSI_SEMANTIC_GENERIC; + int texcoord_index = -1; + + if (ctx->first_instruction_emitted) + goto transform_inst; + + ctx->first_instruction_emitted = true; + + /* Add scale and bias constants. */ + if (ctx->scale_and_bias) { + if (ctx->info.const_file_max[0] < (int)ctx->scale_const) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Range.First = decl.Range.Last = ctx->scale_const; + tctx->emit_declaration(tctx, &decl); + } + + if (ctx->info.const_file_max[0] < (int)ctx->bias_const) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Range.First = decl.Range.Last = ctx->bias_const; + tctx->emit_declaration(tctx, &decl); + } + } + + if (ctx->info.const_file_max[0] < (int)ctx->texcoord_const) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Range.First = decl.Range.Last = ctx->texcoord_const; + tctx->emit_declaration(tctx, &decl); + } + + /* Add a new temp. */ + ctx->color_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1; + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = ctx->color_temp; + tctx->emit_declaration(tctx, &decl); + + /* Add TEXCOORD[texcoord_slot] if it's missing. */ + for (i = 0; i < ctx->info.num_inputs; i++) { + if (ctx->info.input_semantic_name[i] == sem_texcoord && + ctx->info.input_semantic_index[i] == 0) { + texcoord_index = i; + break; + } + } + + if (texcoord_index == -1) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.Name = sem_texcoord; + decl.Declaration.Interpolate = 1; + decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Range.First = decl.Range.Last = ctx->info.num_inputs; + texcoord_index = ctx->info.num_inputs; + tctx->emit_declaration(tctx, &decl); + } + + /* Declare the drawpix sampler if it's missing. */ + if (!(ctx->info.samplers_declared & (1 << ctx->drawpix_sampler))) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.Range.First = decl.Range.Last = ctx->drawpix_sampler; + tctx->emit_declaration(tctx, &decl); + } + + /* Declare the pixel map sampler if it's missing. */ + if (ctx->pixel_maps && + !(ctx->info.samplers_declared & (1 << ctx->pixelmap_sampler))) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.Range.First = decl.Range.Last = ctx->pixelmap_sampler; + tctx->emit_declaration(tctx, &decl); + } + + /* Get initial pixel color from the texture. + * TEX temp, fragment.texcoord[0], texture[0], 2D; + */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.Texture = 1; + inst.Texture.Texture = TGSI_TEXTURE_2D; + + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->color_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + + inst.Instruction.NumSrcRegs = 2; + SET_SRC(&inst, 0, TGSI_FILE_INPUT, texcoord_index, X, Y, Z, W); + inst.Src[1].Register.File = TGSI_FILE_SAMPLER; + inst.Src[1].Register.Index = ctx->drawpix_sampler; + + tctx->emit_instruction(tctx, &inst); + + /* Apply the scale and bias. */ + if (ctx->scale_and_bias) { + /* MAD temp, temp, scale, bias; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MAD; + + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->color_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + + inst.Instruction.NumSrcRegs = 3; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Z, W); + SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, ctx->scale_const, X, Y, Z, W); + SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, ctx->bias_const, X, Y, Z, W); + + tctx->emit_instruction(tctx, &inst); + } + + if (ctx->pixel_maps) { + /* do four pixel map look-ups with two TEX instructions: */ + + /* TEX temp.xy, temp.xyyy, texture[1], 2D; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.Texture = 1; + inst.Texture.Texture = TGSI_TEXTURE_2D; + + inst.Instruction.NumDstRegs = 1; + inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + inst.Dst[0].Register.Index = ctx->color_temp; + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY; + + inst.Instruction.NumSrcRegs = 2; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, X, Y, Y, Y); + inst.Src[1].Register.File = TGSI_FILE_SAMPLER; + inst.Src[1].Register.Index = ctx->pixelmap_sampler; + + tctx->emit_instruction(tctx, &inst); + + /* TEX temp.zw, temp.zwww, texture[1], 2D; */ + inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_ZW; + SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->color_temp, Z, W, W, W); + tctx->emit_instruction(tctx, &inst); + } + + /* Now, "color_temp" should be used in place of IN:COLOR0, + * and CONST[texcoord_slot] should be used in place of IN:TEXCOORD0. + */ + +transform_inst: + + for (i = 0; i < current_inst->Instruction.NumSrcRegs; i++) { + struct tgsi_full_src_register *src = ¤t_inst->Src[i]; + unsigned reg = src->Register.Index; + + if (src->Register.File != TGSI_FILE_INPUT || src->Register.Indirect) + continue; + + if (ctx->info.input_semantic_name[reg] == TGSI_SEMANTIC_COLOR && + ctx->info.input_semantic_index[reg] == 0) { + src->Register.File = TGSI_FILE_TEMPORARY; + src->Register.Index = ctx->color_temp; + } else if (ctx->info.input_semantic_name[reg] == sem_texcoord && + ctx->info.input_semantic_index[reg] == 0) { + src->Register.File = TGSI_FILE_CONSTANT; + src->Register.Index = ctx->texcoord_const; + } + } + + tctx->emit_instruction(tctx, current_inst); +} + +const struct tgsi_token * +st_get_drawpix_shader(const struct tgsi_token *tokens, bool use_texcoord, + bool scale_and_bias, unsigned scale_const, + unsigned bias_const, bool pixel_maps, + unsigned drawpix_sampler, unsigned pixelmap_sampler, + unsigned texcoord_const) +{ + struct tgsi_drawpix_transform ctx; + struct tgsi_token *newtoks; + int newlen; + + memset(&ctx, 0, sizeof(ctx)); + ctx.base.transform_instruction = transform_instr; + ctx.use_texcoord = use_texcoord; + ctx.scale_and_bias = scale_and_bias; + ctx.scale_const = scale_const; + ctx.bias_const = bias_const; + ctx.pixel_maps = pixel_maps; + ctx.drawpix_sampler = drawpix_sampler; + ctx.pixelmap_sampler = pixelmap_sampler; + ctx.texcoord_const = texcoord_const; + tgsi_scan_shader(tokens, &ctx.info); + + newlen = tgsi_num_tokens(tokens) + 30; + newtoks = tgsi_alloc_tokens(newlen); + if (!newtoks) + return NULL; + + tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); + return newtoks; +} diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index ff703fa41cb..2a2eb0992c8 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -456,7 +456,7 @@ st_update_renderbuffer_surface(struct st_context *st, surf_tmpl.u.tex.first_layer = first_layer; surf_tmpl.u.tex.last_layer = last_layer; - pipe_surface_reference(&strb->surface, NULL); + pipe_surface_release(pipe, &strb->surface); strb->surface = pipe->create_surface(pipe, resource, &surf_tmpl); } diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 3029909d12d..708bdf5011e 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -105,29 +105,24 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id) switch (target) { case GL_VERTEX_PROGRAM_ARB: { struct st_vertex_program *prog = ST_CALLOC_STRUCT(st_vertex_program); - return _mesa_init_vertex_program(ctx, &prog->Base, target, id); + return _mesa_init_gl_program(&prog->Base.Base, target, id); } - case GL_FRAGMENT_PROGRAM_ARB: { struct st_fragment_program *prog = ST_CALLOC_STRUCT(st_fragment_program); - return _mesa_init_fragment_program(ctx, &prog->Base, target, id); + return _mesa_init_gl_program(&prog->Base.Base, target, id); } - case GL_GEOMETRY_PROGRAM_NV: { struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program); - return _mesa_init_geometry_program(ctx, &prog->Base, target, id); + return _mesa_init_gl_program(&prog->Base.Base, target, id); } - case GL_TESS_CONTROL_PROGRAM_NV: { struct st_tessctrl_program *prog = ST_CALLOC_STRUCT(st_tessctrl_program); - return _mesa_init_tess_ctrl_program(ctx, &prog->Base, target, id); + return _mesa_init_gl_program(&prog->Base.Base, target, id); } - case GL_TESS_EVALUATION_PROGRAM_NV: { struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program); - return _mesa_init_tess_eval_program(ctx, &prog->Base, target, id); + return _mesa_init_gl_program(&prog->Base.Base, target, id); } - default: assert(0); return NULL; @@ -234,6 +229,8 @@ st_program_string_notify( struct gl_context *ctx, struct st_fragment_program *stfp = (struct st_fragment_program *) prog; st_release_fp_variants(st, stfp); + if (!st_translate_fragment_program(st, stfp)) + return false; if (st->fp == stfp) st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; @@ -242,6 +239,8 @@ st_program_string_notify( struct gl_context *ctx, struct st_geometry_program *stgp = (struct st_geometry_program *) prog; st_release_gp_variants(st, stgp); + if (!st_translate_geometry_program(st, stgp)) + return false; if (st->gp == stgp) st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; @@ -249,7 +248,9 @@ st_program_string_notify( struct gl_context *ctx, else if (target == GL_VERTEX_PROGRAM_ARB) { struct st_vertex_program *stvp = (struct st_vertex_program *) prog; - st_release_vp_variants( st, stvp ); + st_release_vp_variants(st, stvp); + if (!st_translate_vertex_program(st, stvp)) + return false; if (st->vp == stvp) st->dirty.st |= ST_NEW_VERTEX_PROGRAM; @@ -259,6 +260,8 @@ st_program_string_notify( struct gl_context *ctx, (struct st_tessctrl_program *) prog; st_release_tcp_variants(st, sttcp); + if (!st_translate_tessctrl_program(st, sttcp)) + return false; if (st->tcp == sttcp) st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM; @@ -268,6 +271,8 @@ st_program_string_notify( struct gl_context *ctx, (struct st_tesseval_program *) prog; st_release_tep_variants(st, sttep); + if (!st_translate_tesseval_program(st, sttep)) + return false; if (st->tep == sttep) st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index a9ab5edcf49..bef7307bb27 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -224,8 +224,6 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - st->pixel_xfer.cache = _mesa_new_program_cache(); - st->has_stencil_export = screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT); st->has_shader_model3 = screen->get_param(screen, PIPE_CAP_SM3); @@ -386,8 +384,8 @@ void st_destroy_context( struct st_context *st ) pipe_surface_reference(&st->state.framebuffer.cbufs[i], NULL); } pipe_surface_reference(&st->state.framebuffer.zsbuf, NULL); - - _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); + pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL); + pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL); _vbo_DestroyContext(st->ctx); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index a4cda29059d..f187d82449b 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -162,15 +162,8 @@ struct st_context struct gl_texture_object *default_texture; struct { - struct gl_program_cache *cache; - struct st_fragment_program *program; /**< cur pixel transfer prog */ - GLuint xfer_prog_sn; /**< pixel xfer program serial no. */ - GLuint user_prog_sn; /**< user fragment program serial no. */ - struct st_fragment_program *combined_prog; - GLuint combined_prog_sn; struct pipe_resource *pixelmap_texture; struct pipe_sampler_view *pixelmap_sampler_view; - boolean pixelmap_enabled; /**< use the pixelmap texture? */ } pixel_xfer; /** for glBitmap */ @@ -184,7 +177,7 @@ struct st_context /** for glDraw/CopyPixels */ struct { - struct gl_fragment_program *shaders[4]; + void *zs_shaders[4]; void *vert_shaders[2]; /**< ureg shaders */ } drawpix; diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 50891c112cb..6d859c6ab5b 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -98,7 +98,7 @@ st_print_current(void) if (st->vp->Base.Base.Parameters) _mesa_print_parameter_list(st->vp->Base.Base.Parameters); - tgsi_dump( st->fp->variants[0].tgsi.tokens, 0 ); + tgsi_dump(st->fp->tgsi.tokens, 0); if (st->fp->Base.Base.Parameters) _mesa_print_parameter_list(st->fp->Base.Base.Parameters); } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 633e90ffa38..f481e8902d8 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4334,216 +4334,6 @@ glsl_to_tgsi_visitor::renumber_registers(void) ralloc_free(first_reads); } -/** - * Returns a fragment program which implements the current pixel transfer ops. - * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. - */ -extern "C" void -get_pixel_transfer_visitor(struct st_fragment_program *fp, - glsl_to_tgsi_visitor *original, - int scale_and_bias, int pixel_maps) -{ - glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); - struct st_context *st = st_context(original->ctx); - struct gl_program *prog = &fp->Base.Base; - struct gl_program_parameter_list *params = _mesa_new_parameter_list(); - st_src_reg coord, src0; - st_dst_reg dst0; - glsl_to_tgsi_instruction *inst; - - /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ - v->ctx = original->ctx; - v->prog = prog; - v->shader_program = NULL; - v->shader = NULL; - v->glsl_version = original->glsl_version; - v->native_integers = original->native_integers; - v->options = original->options; - v->next_temp = original->next_temp; - v->num_address_regs = original->num_address_regs; - v->samplers_used = prog->SamplersUsed = original->samplers_used; - v->indirect_addr_consts = original->indirect_addr_consts; - memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); - v->num_immediates = original->num_immediates; - - /* - * Get initial pixel color from the texture. - * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; - */ - coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); - src0 = v->get_temp(glsl_type::vec4_type); - dst0 = st_dst_reg(src0); - inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord); - inst->sampler_array_size = 1; - inst->tex_target = TEXTURE_2D_INDEX; - - prog->InputsRead |= VARYING_BIT_TEX0; - prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ - v->samplers_used |= (1 << 0); - - if (scale_and_bias) { - static const gl_state_index scale_state[STATE_LENGTH] = - { STATE_INTERNAL, STATE_PT_SCALE, - (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; - static const gl_state_index bias_state[STATE_LENGTH] = - { STATE_INTERNAL, STATE_PT_BIAS, - (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; - GLint scale_p, bias_p; - st_src_reg scale, bias; - - scale_p = _mesa_add_state_reference(params, scale_state); - bias_p = _mesa_add_state_reference(params, bias_state); - - /* MAD colorTemp, colorTemp, scale, bias; */ - scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); - bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); - inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); - } - - if (pixel_maps) { - st_src_reg temp = v->get_temp(glsl_type::vec4_type); - st_dst_reg temp_dst = st_dst_reg(temp); - - assert(st->pixel_xfer.pixelmap_texture); - (void) st; - - /* With a little effort, we can do four pixel map look-ups with - * two TEX instructions: - */ - - /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ - temp_dst.writemask = WRITEMASK_XY; /* write R,G */ - inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0); - inst->sampler.index = 1; - inst->sampler_array_size = 1; - inst->tex_target = TEXTURE_2D_INDEX; - - /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ - src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); - temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ - inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0); - inst->sampler.index = 1; - inst->sampler_array_size = 1; - inst->tex_target = TEXTURE_2D_INDEX; - - prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ - v->samplers_used |= (1 << 1); - - /* MOV colorTemp, temp; */ - inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp); - } - - /* Now copy the instructions from the original glsl_to_tgsi_visitor into the - * new visitor. */ - foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) { - glsl_to_tgsi_instruction *newinst; - st_src_reg src_regs[4]; - - if (inst->dst[0].file == PROGRAM_OUTPUT) - prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index); - - for (int i = 0; i < 4; i++) { - src_regs[i] = inst->src[i]; - if (src_regs[i].file == PROGRAM_INPUT && - src_regs[i].index == VARYING_SLOT_COL0) { - src_regs[i].file = PROGRAM_TEMPORARY; - src_regs[i].index = src0.index; - } - else if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); - } - - newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]); - newinst->tex_target = inst->tex_target; - newinst->sampler_array_size = inst->sampler_array_size; - } - - /* Make modifications to fragment program info. */ - prog->Parameters = _mesa_combine_parameter_lists(params, - original->prog->Parameters); - _mesa_free_parameter_list(params); - count_resources(v, prog); - fp->glsl_to_tgsi = v; -} - -/** - * Make fragment program for glBitmap: - * Sample the texture and kill the fragment if the bit is 0. - * This program will be combined with the user's fragment program. - * - * Based on make_bitmap_fragment_program in st_cb_bitmap.c. - */ -extern "C" void -get_bitmap_visitor(struct st_fragment_program *fp, - glsl_to_tgsi_visitor *original, int samplerIndex) -{ - glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); - struct st_context *st = st_context(original->ctx); - struct gl_program *prog = &fp->Base.Base; - st_src_reg coord, src0; - st_dst_reg dst0; - glsl_to_tgsi_instruction *inst; - - /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ - v->ctx = original->ctx; - v->prog = prog; - v->shader_program = NULL; - v->shader = NULL; - v->glsl_version = original->glsl_version; - v->native_integers = original->native_integers; - v->options = original->options; - v->next_temp = original->next_temp; - v->num_address_regs = original->num_address_regs; - v->samplers_used = prog->SamplersUsed = original->samplers_used; - v->indirect_addr_consts = original->indirect_addr_consts; - memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); - v->num_immediates = original->num_immediates; - - /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ - coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type); - src0 = v->get_temp(glsl_type::vec4_type); - dst0 = st_dst_reg(src0); - inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord); - inst->sampler.index = samplerIndex; - inst->sampler_array_size = 1; - inst->tex_target = TEXTURE_2D_INDEX; - - prog->InputsRead |= VARYING_BIT_TEX0; - prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ - v->samplers_used |= (1 << samplerIndex); - - /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ - src0.negate = NEGATE_XYZW; - if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) - src0.swizzle = SWIZZLE_XXXX; - inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0); - - /* Now copy the instructions from the original glsl_to_tgsi_visitor into the - * new visitor. */ - foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) { - glsl_to_tgsi_instruction *newinst; - st_src_reg src_regs[4]; - - if (inst->dst[0].file == PROGRAM_OUTPUT) - prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index); - - for (int i = 0; i < 4; i++) { - src_regs[i] = inst->src[i]; - if (src_regs[i].file == PROGRAM_INPUT) - prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); - } - - newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]); - newinst->tex_target = inst->tex_target; - newinst->sampler_array_size = inst->sampler_array_size; - } - - /* Make modifications to fragment program info. */ - prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); - count_resources(v, prog); - fp->glsl_to_tgsi = v; -} - /* ------------------------- TGSI conversion stuff -------------------------- */ struct label { unsigned branch_target; @@ -4852,7 +4642,7 @@ src_register(struct st_translate *t, const st_src_reg *reg) static struct ureg_dst translate_dst(struct st_translate *t, const st_dst_reg *dst_reg, - bool saturate, bool clamp_color) + bool saturate) { struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, dst_reg->array_id); @@ -4864,28 +4654,6 @@ translate_dst(struct st_translate *t, if (saturate) dst = ureg_saturate(dst); - else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { - /* Clamp colors for ARB_color_buffer_float. */ - switch (t->procType) { - case TGSI_PROCESSOR_VERTEX: - /* This can only occur with a compatibility profile, which doesn't - * support geometry shaders. */ - if (dst_reg->index == VARYING_SLOT_COL0 || - dst_reg->index == VARYING_SLOT_COL1 || - dst_reg->index == VARYING_SLOT_BFC0 || - dst_reg->index == VARYING_SLOT_BFC1) { - dst = ureg_saturate(dst); - } - break; - - case TGSI_PROCESSOR_FRAGMENT: - if (dst_reg->index == FRAG_RESULT_COLOR || - dst_reg->index >= FRAG_RESULT_DATA0) { - dst = ureg_saturate(dst); - } - break; - } - } if (dst_reg->reladdr != NULL) { assert(dst_reg->file != PROGRAM_TEMPORARY); @@ -4991,8 +4759,7 @@ translate_tex_offset(struct st_translate *t, static void compile_tgsi_instruction(struct st_translate *t, - const glsl_to_tgsi_instruction *inst, - bool clamp_dst_color_output) + const glsl_to_tgsi_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -5010,8 +4777,7 @@ compile_tgsi_instruction(struct st_translate *t, for (i = 0; i < num_dst; i++) dst[i] = translate_dst(t, &inst->dst[i], - inst->saturate, - clamp_dst_color_output); + inst->saturate); for (i = 0; i < num_src; i++) src[i] = translate_src(t, &inst->src[i]); @@ -5286,16 +5052,6 @@ emit_face_var(struct gl_context *ctx, struct st_translate *t) t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); } -static void -emit_edgeflags(struct st_translate *t) -{ - struct ureg_program *ureg = t->ureg; - struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]]; - struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; - - ureg_MOV(ureg, edge_dst, edge_src); -} - static bool find_array(unsigned attr, struct array_decl *arrays, unsigned count, unsigned *array_id, unsigned *array_size) @@ -5353,9 +5109,7 @@ st_translate_program( const GLuint outputMapping[], const GLuint outputSlotToAttr[], const ubyte outputSemanticName[], - const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags, - boolean clamp_color) + const ubyte outputSemanticIndex[]) { struct st_translate *t; unsigned i; @@ -5544,8 +5298,6 @@ st_translate_program( t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); } } - if (passthrough_edgeflags) - emit_edgeflags(t); } /* Declare address register. @@ -5639,7 +5391,7 @@ st_translate_program( unsigned num_ubos = program->shader->NumUniformBlocks; for (i = 0; i < num_ubos; i++) { - unsigned size = program->shader->UniformBlocks[i].UniformBufferSize; + unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize; unsigned num_const_vecs = (size + 15) / 16; unsigned first, last; assert(num_const_vecs > 0); @@ -5696,7 +5448,7 @@ st_translate_program( */ foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) { set_insn_start(t, ureg_get_instruction_number(ureg)); - compile_tgsi_instruction(t, inst, clamp_color); + compile_tgsi_instruction(t, inst); } /* Fix up all emitted labels: diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 4af747fa9de..729295bcb52 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -52,17 +52,9 @@ enum pipe_error st_translate_program( const GLuint outputMapping[], const GLuint outputSlotToAttr[], const ubyte outputSemanticName[], - const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags, - boolean clamp_color); + const ubyte outputSemanticIndex[]); void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v); -void get_pixel_transfer_visitor(struct st_fragment_program *fp, - struct glsl_to_tgsi_visitor *original, - int scale_and_bias, int pixel_maps); -void get_bitmap_visitor(struct st_fragment_program *fp, - struct glsl_to_tgsi_visitor *original, - int samplerIndex); GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 896e239ee68..4b9dc994ea5 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -283,8 +283,7 @@ st_translate_texture_target( GLuint textarget, static struct ureg_dst translate_dst( struct st_translate *t, const struct prog_dst_register *DstReg, - boolean saturate, - boolean clamp_color) + boolean saturate) { struct ureg_dst dst = dst_register( t, DstReg->File, @@ -295,27 +294,6 @@ translate_dst( struct st_translate *t, if (saturate) dst = ureg_saturate( dst ); - else if (clamp_color && DstReg->File == PROGRAM_OUTPUT) { - /* Clamp colors for ARB_color_buffer_float. */ - switch (t->procType) { - case TGSI_PROCESSOR_VERTEX: - /* This can only occur with a compatibility profile, which doesn't - * support geometry shaders. */ - if (DstReg->Index == VARYING_SLOT_COL0 || - DstReg->Index == VARYING_SLOT_COL1 || - DstReg->Index == VARYING_SLOT_BFC0 || - DstReg->Index == VARYING_SLOT_BFC1) { - dst = ureg_saturate(dst); - } - break; - - case TGSI_PROCESSOR_FRAGMENT: - if (DstReg->Index >= FRAG_RESULT_COLOR) { - dst = ureg_saturate(dst); - } - break; - } - } if (DstReg->RelAddr) dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); @@ -649,8 +627,7 @@ static void compile_instruction( struct gl_context *ctx, struct st_translate *t, - const struct prog_instruction *inst, - boolean clamp_dst_color_output) + const struct prog_instruction *inst) { struct ureg_program *ureg = t->ureg; GLuint i; @@ -665,8 +642,7 @@ compile_instruction( if (num_dst) dst[0] = translate_dst( t, &inst->DstReg, - inst->Saturate, - clamp_dst_color_output); + inst->Saturate); for (i = 0; i < num_src; i++) src[i] = translate_src( t, &inst->SrcReg[i] ); @@ -974,18 +950,6 @@ emit_face_var( struct st_translate *t, } -static void -emit_edgeflags( struct st_translate *t, - const struct gl_program *program ) -{ - struct ureg_program *ureg = t->ureg; - struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]]; - struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; - - ureg_MOV( ureg, edge_dst, edge_src ); -} - - /** * Translate Mesa program to TGSI format. * \param program the program to translate @@ -1019,9 +983,7 @@ st_translate_mesa_program( GLuint numOutputs, const GLuint outputMapping[], const ubyte outputSemanticName[], - const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags, - boolean clamp_color) + const ubyte outputSemanticIndex[]) { struct st_translate translate, *t; unsigned i; @@ -1125,8 +1087,6 @@ st_translate_mesa_program( t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); } } - if (passthrough_edgeflags) - emit_edgeflags( t, program ); } /* Declare address register. @@ -1231,7 +1191,7 @@ st_translate_mesa_program( */ for (i = 0; i < program->NumInstructions; i++) { set_insn_start( t, ureg_get_instruction_number( ureg )); - compile_instruction( ctx, t, &program->Instructions[i], clamp_color ); + compile_instruction(ctx, t, &program->Instructions[i]); } /* Fix up all emitted labels: diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 62bb654e95a..ed7a3adfe1a 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -58,9 +58,7 @@ st_translate_mesa_program( GLuint numOutputs, const GLuint outputMapping[], const ubyte outputSemanticName[], - const ubyte outputSemanticIndex[], - boolean passthrough_edgeflags, - boolean clamp_color); + const ubyte outputSemanticIndex[]); unsigned st_translate_texture_target(GLuint textarget, GLboolean shadow); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index a07f8fec309..6a69ba7aa26 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -43,6 +43,8 @@ #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_emulate.h" +#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" #include "st_debug.h" @@ -92,6 +94,11 @@ st_release_vp_variants( struct st_context *st, } stvp->variants = NULL; + + if (stvp->tgsi.tokens) { + tgsi_free_tokens(stvp->tgsi.tokens); + stvp->tgsi.tokens = NULL; + } } @@ -107,8 +114,6 @@ delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv) cso_delete_fragment_shader(st->cso_context, fpv->driver_shader); if (fpv->parameters) _mesa_free_parameter_list(fpv->parameters); - if (fpv->tgsi.tokens) - ureg_free_tokens(fpv->tgsi.tokens); free(fpv); } @@ -128,6 +133,11 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp) } stfp->variants = NULL; + + if (stfp->tgsi.tokens) { + ureg_free_tokens(stfp->tgsi.tokens); + stfp->tgsi.tokens = NULL; + } } @@ -160,6 +170,11 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp) } stgp->variants = NULL; + + if (stgp->tgsi.tokens) { + ureg_free_tokens(stgp->tgsi.tokens); + stgp->tgsi.tokens = NULL; + } } @@ -192,6 +207,11 @@ st_release_tcp_variants(struct st_context *st, struct st_tessctrl_program *sttcp } sttcp->variants = NULL; + + if (sttcp->tgsi.tokens) { + ureg_free_tokens(sttcp->tgsi.tokens); + sttcp->tgsi.tokens = NULL; + } } @@ -224,28 +244,34 @@ st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep } sttep->variants = NULL; + + if (sttep->tgsi.tokens) { + ureg_free_tokens(sttep->tgsi.tokens); + sttep->tgsi.tokens = NULL; + } } /** - * Translate a Mesa vertex shader into a TGSI shader. - * \param outputMapping to map vertex program output registers (VARYING_SLOT_x) - * to TGSI output slots - * \param tokensOut destination for TGSI tokens - * \return pointer to cached pipe_shader object. + * Translate a vertex program. */ -void -st_prepare_vertex_program(struct gl_context *ctx, +bool +st_translate_vertex_program(struct st_context *st, struct st_vertex_program *stvp) { - struct st_context *st = st_context(ctx); - GLuint attr; + struct ureg_program *ureg; + enum pipe_error error; + unsigned num_outputs = 0; + unsigned attr; + unsigned input_to_index[VERT_ATTRIB_MAX] = {0}; + unsigned output_slot_to_attr[VARYING_SLOT_MAX] = {0}; + ubyte output_semantic_name[VARYING_SLOT_MAX] = {0}; + ubyte output_semantic_index[VARYING_SLOT_MAX] = {0}; stvp->num_inputs = 0; - stvp->num_outputs = 0; if (stvp->Base.IsPositionInvariant) - _mesa_insert_mvp_code(ctx, &stvp->Base); + _mesa_insert_mvp_code(st->ctx, &stvp->Base); /* * Determine number of inputs, the mappings between VERT_ATTRIB_x @@ -253,7 +279,7 @@ st_prepare_vertex_program(struct gl_context *ctx, */ for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { if ((stvp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) { - stvp->input_to_index[attr] = stvp->num_inputs; + input_to_index[attr] = stvp->num_inputs; stvp->index_to_input[stvp->num_inputs] = attr; stvp->num_inputs++; if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) { @@ -264,7 +290,7 @@ st_prepare_vertex_program(struct gl_context *ctx, } } /* bit of a hack, presetup potentially unused edgeflag input */ - stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs; + input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs; stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG; /* Compute mapping of vertex program outputs to slots. @@ -274,62 +300,62 @@ st_prepare_vertex_program(struct gl_context *ctx, stvp->result_to_output[attr] = ~0; } else { - unsigned slot = stvp->num_outputs++; + unsigned slot = num_outputs++; stvp->result_to_output[attr] = slot; - stvp->output_slot_to_attr[slot] = attr; + output_slot_to_attr[slot] = attr; switch (attr) { case VARYING_SLOT_POS: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_COL0: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_COL1: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - stvp->output_semantic_index[slot] = 1; + output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + output_semantic_index[slot] = 1; break; case VARYING_SLOT_BFC0: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_BFC1: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; - stvp->output_semantic_index[slot] = 1; + output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + output_semantic_index[slot] = 1; break; case VARYING_SLOT_FOGC: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_FOG; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_FOG; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_PSIZ: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST0: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST1: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; - stvp->output_semantic_index[slot] = 1; + output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; + output_semantic_index[slot] = 1; break; case VARYING_SLOT_EDGE: assert(0); break; case VARYING_SLOT_CLIP_VERTEX: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_LAYER: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_LAYER; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_LAYER; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_VIEWPORT: - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; - stvp->output_semantic_index[slot] = 0; + output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; + output_semantic_index[slot] = 0; break; case VARYING_SLOT_TEX0: @@ -341,8 +367,8 @@ st_prepare_vertex_program(struct gl_context *ctx, case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: if (st->needs_texcoord_semantic) { - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; - stvp->output_semantic_index[slot] = attr - VARYING_SLOT_TEX0; + output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; + output_semantic_index[slot] = attr - VARYING_SLOT_TEX0; break; } /* fall through */ @@ -350,55 +376,24 @@ st_prepare_vertex_program(struct gl_context *ctx, default: assert(attr >= VARYING_SLOT_VAR0 || (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7)); - stvp->output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - stvp->output_semantic_index[slot] = + output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + output_semantic_index[slot] = st_get_generic_varying_index(st, attr); break; } } } /* similar hack to above, presetup potentially unused edgeflag output */ - stvp->result_to_output[VARYING_SLOT_EDGE] = stvp->num_outputs; - stvp->output_semantic_name[stvp->num_outputs] = TGSI_SEMANTIC_EDGEFLAG; - stvp->output_semantic_index[stvp->num_outputs] = 0; -} - - -/** - * Translate a vertex program to create a new variant. - */ -static struct st_vp_variant * -st_translate_vertex_program(struct st_context *st, - struct st_vertex_program *stvp, - const struct st_vp_variant_key *key) -{ - struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant); - struct pipe_context *pipe = st->pipe; - struct ureg_program *ureg; - enum pipe_error error; - unsigned num_outputs; - - st_prepare_vertex_program(st->ctx, stvp); + stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs; + output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG; + output_semantic_index[num_outputs] = 0; if (!stvp->glsl_to_tgsi) - { _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); - } ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen); - if (ureg == NULL) { - free(vpv); - return NULL; - } - - vpv->key = *key; - - vpv->num_inputs = stvp->num_inputs; - num_outputs = stvp->num_outputs; - if (key->passthrough_edgeflags) { - vpv->num_inputs++; - num_outputs++; - } + if (ureg == NULL) + return false; if (ST_DEBUG & DEBUG_MESA) { _mesa_print_program(&stvp->Base.Base); @@ -406,15 +401,15 @@ st_translate_vertex_program(struct st_context *st, debug_printf("\n"); } - if (stvp->glsl_to_tgsi) + if (stvp->glsl_to_tgsi) { error = st_translate_program(st->ctx, TGSI_PROCESSOR_VERTEX, ureg, stvp->glsl_to_tgsi, &stvp->Base.Base, /* inputs */ - vpv->num_inputs, - stvp->input_to_index, + stvp->num_inputs, + input_to_index, NULL, /* inputSlotToAttr */ NULL, /* input semantic name */ NULL, /* input semantic index */ @@ -423,43 +418,75 @@ st_translate_vertex_program(struct st_context *st, /* outputs */ num_outputs, stvp->result_to_output, - stvp->output_slot_to_attr, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags, - key->clamp_color); - else + output_slot_to_attr, + output_semantic_name, + output_semantic_index); + + st_translate_stream_output_info(stvp->glsl_to_tgsi, + stvp->result_to_output, + &stvp->tgsi.stream_output); + + free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi); + stvp->glsl_to_tgsi = NULL; + } else error = st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_VERTEX, ureg, &stvp->Base.Base, /* inputs */ - vpv->num_inputs, - stvp->input_to_index, + stvp->num_inputs, + input_to_index, NULL, /* input semantic name */ NULL, /* input semantic index */ NULL, /* outputs */ num_outputs, stvp->result_to_output, - stvp->output_semantic_name, - stvp->output_semantic_index, - key->passthrough_edgeflags, - key->clamp_color); + output_semantic_name, + output_semantic_index); + + if (error) { + debug_printf("%s: failed to translate Mesa program:\n", __func__); + _mesa_print_program(&stvp->Base.Base); + debug_assert(0); + return false; + } + + stvp->tgsi.tokens = ureg_get_tokens(ureg, NULL); + ureg_destroy(ureg); + return stvp->tgsi.tokens != NULL; +} - if (error) - goto fail; +static struct st_vp_variant * +st_create_vp_variant(struct st_context *st, + struct st_vertex_program *stvp, + const struct st_vp_variant_key *key) +{ + struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant); + struct pipe_context *pipe = st->pipe; - vpv->tgsi.tokens = ureg_get_tokens( ureg, NULL ); - if (!vpv->tgsi.tokens) - goto fail; + vpv->key = *key; + vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens); + vpv->tgsi.stream_output = stvp->tgsi.stream_output; + vpv->num_inputs = stvp->num_inputs; - ureg_destroy( ureg ); + /* Emulate features. */ + if (key->clamp_color || key->passthrough_edgeflags) { + const struct tgsi_token *tokens; + unsigned flags = + (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) | + (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0); - if (stvp->glsl_to_tgsi) { - st_translate_stream_output_info(stvp->glsl_to_tgsi, - stvp->result_to_output, - &vpv->tgsi.stream_output); + tokens = tgsi_emulate(vpv->tgsi.tokens, flags); + + if (tokens) { + tgsi_free_tokens(vpv->tgsi.tokens); + vpv->tgsi.tokens = tokens; + + if (key->passthrough_edgeflags) + vpv->num_inputs++; + } else + fprintf(stderr, "mesa: cannot emulate deprecated features\n"); } if (ST_DEBUG & DEBUG_TGSI) { @@ -469,14 +496,6 @@ st_translate_vertex_program(struct st_context *st, vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi); return vpv; - -fail: - debug_printf("%s: failed to translate Mesa program:\n", __func__); - _mesa_print_program(&stvp->Base.Base); - debug_assert(0); - - ureg_destroy( ureg ); - return NULL; } @@ -499,7 +518,7 @@ st_get_vp_variant(struct st_context *st, if (!vpv) { /* create now */ - vpv = st_translate_vertex_program(st, stvp, key); + vpv = st_create_vp_variant(st, stvp, key); if (vpv) { /* insert into list */ vpv->next = stvp->variants; @@ -533,19 +552,12 @@ st_translate_interp(enum glsl_interp_qualifier glsl_qual, bool is_color) /** - * Translate a Mesa fragment shader into a TGSI shader using extra info in - * the key. - * \return new fragment program variant + * Translate a Mesa fragment shader into a TGSI shader. */ -static struct st_fp_variant * +bool st_translate_fragment_program(struct st_context *st, - struct st_fragment_program *stfp, - const struct st_fp_variant_key *key) + struct st_fragment_program *stfp) { - struct pipe_context *pipe = st->pipe; - struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant); - GLboolean deleteFP = GL_FALSE; - GLuint outputMapping[FRAG_RESULT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; GLuint inputSlotToAttr[VARYING_SLOT_MAX]; @@ -565,40 +577,8 @@ st_translate_fragment_program(struct st_context *st, ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; uint fs_num_outputs = 0; - if (!variant) - return NULL; - - assert(!(key->bitmap && key->drawpixels)); memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr)); - if (key->bitmap) { - /* glBitmap drawing */ - struct gl_fragment_program *fp; /* we free this temp program below */ - - st_make_bitmap_fragment_program(st, &stfp->Base, - &fp, &variant->bitmap_sampler); - - variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters); - stfp = st_fragment_program(fp); - deleteFP = GL_TRUE; - } - else if (key->drawpixels) { - /* glDrawPixels drawing */ - struct gl_fragment_program *fp; /* we free this temp program below */ - - if (key->drawpixels_z || key->drawpixels_stencil) { - fp = st_make_drawpix_z_stencil_program(st, key->drawpixels_z, - key->drawpixels_stencil); - } - else { - /* RGBA */ - st_make_drawpix_fragment_program(st, &stfp->Base, &fp); - variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters); - deleteFP = GL_TRUE; - } - stfp = st_fragment_program(fp); - } - if (!stfp->glsl_to_tgsi) _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); @@ -620,8 +600,7 @@ st_translate_fragment_program(struct st_context *st, interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER; if (stfp->Base.Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID | - SYSTEM_BIT_SAMPLE_POS) || - key->persample_shading) + SYSTEM_BIT_SAMPLE_POS)) interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE; switch (attr) { @@ -805,10 +784,8 @@ st_translate_fragment_program(struct st_context *st, } ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen); - if (ureg == NULL) { - free(variant); - return NULL; - } + if (ureg == NULL) + return false; if (ST_DEBUG & DEBUG_MESA) { _mesa_print_program(&stfp->Base.Base); @@ -841,7 +818,7 @@ st_translate_fragment_program(struct st_context *st, } } - if (stfp->glsl_to_tgsi) + if (stfp->glsl_to_tgsi) { st_translate_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, @@ -860,9 +837,11 @@ st_translate_fragment_program(struct st_context *st, outputMapping, NULL, fs_output_semantic_name, - fs_output_semantic_index, FALSE, - key->clamp_color ); - else + fs_output_semantic_index); + + free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi); + stfp->glsl_to_tgsi = NULL; + } else st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, @@ -877,31 +856,134 @@ st_translate_fragment_program(struct st_context *st, fs_num_outputs, outputMapping, fs_output_semantic_name, - fs_output_semantic_index, FALSE, - key->clamp_color); + fs_output_semantic_index); + + stfp->tgsi.tokens = ureg_get_tokens(ureg, NULL); + ureg_destroy(ureg); + return stfp->tgsi.tokens != NULL; +} + +static struct st_fp_variant * +st_create_fp_variant(struct st_context *st, + struct st_fragment_program *stfp, + const struct st_fp_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant); + struct pipe_shader_state tgsi = {0}; + + if (!variant) + return NULL; + + tgsi.tokens = stfp->tgsi.tokens; - variant->tgsi.tokens = ureg_get_tokens( ureg, NULL ); - ureg_destroy( ureg ); + assert(!(key->bitmap && key->drawpixels)); + + /* Emulate features. */ + if (key->clamp_color || key->persample_shading) { + const struct tgsi_token *tokens; + unsigned flags = + (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) | + (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0); + + tokens = tgsi_emulate(tgsi.tokens, flags); + + if (tokens) + tgsi.tokens = tokens; + else + fprintf(stderr, "mesa: cannot emulate deprecated features\n"); + } + + /* glBitmap */ + if (key->bitmap) { + const struct tgsi_token *tokens; + + variant->bitmap_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1; + + tokens = st_get_bitmap_shader(tgsi.tokens, + variant->bitmap_sampler, + st->needs_texcoord_semantic, + st->bitmap.tex_format == + PIPE_FORMAT_L8_UNORM); + + if (tokens) { + if (tgsi.tokens != stfp->tgsi.tokens) + tgsi_free_tokens(tgsi.tokens); + tgsi.tokens = tokens; + variant->parameters = + _mesa_clone_parameter_list(stfp->Base.Base.Parameters); + } else + fprintf(stderr, "mesa: cannot create a shader for glBitmap\n"); + } + + /* glDrawPixels (color only) */ + if (key->drawpixels) { + const struct tgsi_token *tokens; + unsigned scale_const = 0, bias_const = 0, texcoord_const = 0; + + /* Find the first unused slot. */ + variant->drawpix_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1; + + if (key->pixelMaps) { + unsigned samplers_used = stfp->Base.Base.SamplersUsed | + (1 << variant->drawpix_sampler); + + variant->pixelmap_sampler = ffs(~samplers_used) - 1; + } + + variant->parameters = + _mesa_clone_parameter_list(stfp->Base.Base.Parameters); + + if (key->scaleAndBias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS }; + + scale_const = _mesa_add_state_reference(variant->parameters, + scale_state); + bias_const = _mesa_add_state_reference(variant->parameters, + bias_state); + } + + { + static const gl_state_index state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 }; + + texcoord_const = _mesa_add_state_reference(variant->parameters, + state); + } + + tokens = st_get_drawpix_shader(tgsi.tokens, + st->needs_texcoord_semantic, + key->scaleAndBias, scale_const, + bias_const, key->pixelMaps, + variant->drawpix_sampler, + variant->pixelmap_sampler, + texcoord_const); + + if (tokens) { + if (tgsi.tokens != stfp->tgsi.tokens) + tgsi_free_tokens(tgsi.tokens); + tgsi.tokens = tokens; + } else + fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n"); + } if (ST_DEBUG & DEBUG_TGSI) { - tgsi_dump(variant->tgsi.tokens, 0/*TGSI_DUMP_VERBOSE*/); + tgsi_dump(tgsi.tokens, 0); debug_printf("\n"); } /* fill in variant */ - variant->driver_shader = pipe->create_fs_state(pipe, &variant->tgsi); + variant->driver_shader = pipe->create_fs_state(pipe, &tgsi); variant->key = *key; - if (deleteFP) { - /* Free the temporary program made above */ - struct gl_fragment_program *fp = &stfp->Base; - _mesa_reference_fragprog(st->ctx, &fp, NULL); - } - + if (tgsi.tokens != stfp->tgsi.tokens) + tgsi_free_tokens(tgsi.tokens); return variant; } - /** * Translate fragment program if needed. */ @@ -921,7 +1003,7 @@ st_get_fp_variant(struct st_context *st, if (!fpv) { /* create new */ - fpv = st_translate_fragment_program(st, stfp, key); + fpv = st_create_fp_variant(st, stfp, key); if (fpv) { /* insert into list */ fpv->next = stfp->variants; @@ -1191,9 +1273,7 @@ st_translate_program_common(struct st_context *st, outputMapping, outputSlotToAttr, output_semantic_name, - output_semantic_index, - FALSE, - FALSE); + output_semantic_index); out_state->tokens = ureg_get_tokens(ureg, NULL); ureg_destroy(ureg); @@ -1217,19 +1297,15 @@ st_translate_program_common(struct st_context *st, /** * Translate a geometry program to create a new variant. */ -static struct st_gp_variant * +bool st_translate_geometry_program(struct st_context *st, - struct st_geometry_program *stgp, - const struct st_gp_variant_key *key) + struct st_geometry_program *stgp) { - struct pipe_context *pipe = st->pipe; struct ureg_program *ureg; - struct st_gp_variant *gpv; - struct pipe_shader_state state; ureg = ureg_create_with_screen(TGSI_PROCESSOR_GEOMETRY, st->pipe->screen); if (ureg == NULL) - return NULL; + return false; ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType); ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType); @@ -1238,19 +1314,29 @@ st_translate_geometry_program(struct st_context *st, ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations); st_translate_program_common(st, &stgp->Base.Base, stgp->glsl_to_tgsi, ureg, - TGSI_PROCESSOR_GEOMETRY, &state); + TGSI_PROCESSOR_GEOMETRY, &stgp->tgsi); + + free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi); + stgp->glsl_to_tgsi = NULL; + return true; +} + + +static struct st_gp_variant * +st_create_gp_variant(struct st_context *st, + struct st_geometry_program *stgp, + const struct st_gp_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct st_gp_variant *gpv; gpv = CALLOC_STRUCT(st_gp_variant); - if (!gpv) { - ureg_free_tokens(state.tokens); + if (!gpv) return NULL; - } /* fill in new variant */ - gpv->driver_shader = pipe->create_gs_state(pipe, &state); + gpv->driver_shader = pipe->create_gs_state(pipe, &stgp->tgsi); gpv->key = *key; - - ureg_free_tokens(state.tokens); return gpv; } @@ -1274,7 +1360,7 @@ st_get_gp_variant(struct st_context *st, if (!gpv) { /* create new */ - gpv = st_translate_geometry_program(st, stgp, key); + gpv = st_create_gp_variant(st, stgp, key); if (gpv) { /* insert into list */ gpv->next = stgp->variants; @@ -1289,38 +1375,43 @@ st_get_gp_variant(struct st_context *st, /** * Translate a tessellation control program to create a new variant. */ -static struct st_tcp_variant * +bool st_translate_tessctrl_program(struct st_context *st, - struct st_tessctrl_program *sttcp, - const struct st_tcp_variant_key *key) + struct st_tessctrl_program *sttcp) { - struct pipe_context *pipe = st->pipe; struct ureg_program *ureg; - struct st_tcp_variant *tcpv; - struct pipe_shader_state state; - ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, pipe->screen); - if (ureg == NULL) { - return NULL; - } + ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_CTRL, st->pipe->screen); + if (ureg == NULL) + return false; ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT, sttcp->Base.VerticesOut); st_translate_program_common(st, &sttcp->Base.Base, sttcp->glsl_to_tgsi, - ureg, TGSI_PROCESSOR_TESS_CTRL, &state); + ureg, TGSI_PROCESSOR_TESS_CTRL, &sttcp->tgsi); + + free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi); + sttcp->glsl_to_tgsi = NULL; + return true; +} + + +static struct st_tcp_variant * +st_create_tcp_variant(struct st_context *st, + struct st_tessctrl_program *sttcp, + const struct st_tcp_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct st_tcp_variant *tcpv; tcpv = CALLOC_STRUCT(st_tcp_variant); - if (!tcpv) { - ureg_free_tokens(state.tokens); + if (!tcpv) return NULL; - } /* fill in new variant */ - tcpv->driver_shader = pipe->create_tcs_state(pipe, &state); + tcpv->driver_shader = pipe->create_tcs_state(pipe, &sttcp->tgsi); tcpv->key = *key; - - ureg_free_tokens(state.tokens); return tcpv; } @@ -1344,7 +1435,7 @@ st_get_tcp_variant(struct st_context *st, if (!tcpv) { /* create new */ - tcpv = st_translate_tessctrl_program(st, sttcp, key); + tcpv = st_create_tcp_variant(st, sttcp, key); if (tcpv) { /* insert into list */ tcpv->next = sttcp->variants; @@ -1359,20 +1450,15 @@ st_get_tcp_variant(struct st_context *st, /** * Translate a tessellation evaluation program to create a new variant. */ -static struct st_tep_variant * +bool st_translate_tesseval_program(struct st_context *st, - struct st_tesseval_program *sttep, - const struct st_tep_variant_key *key) + struct st_tesseval_program *sttep) { - struct pipe_context *pipe = st->pipe; struct ureg_program *ureg; - struct st_tep_variant *tepv; - struct pipe_shader_state state; - ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, pipe->screen); - if (ureg == NULL) { - return NULL; - } + ureg = ureg_create_with_screen(TGSI_PROCESSOR_TESS_EVAL, st->pipe->screen); + if (ureg == NULL) + return false; if (sttep->Base.PrimitiveMode == GL_ISOLINES) ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES); @@ -1400,19 +1486,29 @@ st_translate_tesseval_program(struct st_context *st, ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE, sttep->Base.PointMode); st_translate_program_common(st, &sttep->Base.Base, sttep->glsl_to_tgsi, - ureg, TGSI_PROCESSOR_TESS_EVAL, &state); + ureg, TGSI_PROCESSOR_TESS_EVAL, &sttep->tgsi); + + free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi); + sttep->glsl_to_tgsi = NULL; + return true; +} + + +static struct st_tep_variant * +st_create_tep_variant(struct st_context *st, + struct st_tesseval_program *sttep, + const struct st_tep_variant_key *key) +{ + struct pipe_context *pipe = st->pipe; + struct st_tep_variant *tepv; tepv = CALLOC_STRUCT(st_tep_variant); - if (!tepv) { - ureg_free_tokens(state.tokens); + if (!tepv) return NULL; - } /* fill in new variant */ - tepv->driver_shader = pipe->create_tes_state(pipe, &state); + tepv->driver_shader = pipe->create_tes_state(pipe, &sttep->tgsi); tepv->key = *key; - - ureg_free_tokens(state.tokens); return tepv; } @@ -1436,7 +1532,7 @@ st_get_tep_variant(struct st_context *st, if (!tepv) { /* create new */ - tepv = st_translate_tesseval_program(st, sttep, key); + tepv = st_create_tep_variant(st, sttep, key); if (tepv) { /* insert into list */ tepv->next = sttep->variants; diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 7013993fe38..d9b53ac008c 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -59,8 +59,6 @@ struct st_fp_variant_key GLuint drawpixels:1; /**< glDrawPixels variant */ GLuint scaleAndBias:1; /**< glDrawPixels w/ scale and/or bias? */ GLuint pixelMaps:1; /**< glDrawPixels w/ pixel lookup map? */ - GLuint drawpixels_z:1; /**< glDrawPixels(GL_DEPTH) */ - GLuint drawpixels_stencil:1; /**< glDrawPixels(GL_STENCIL) */ /** for ARB_color_buffer_float */ GLuint clamp_color:1; @@ -78,8 +76,6 @@ struct st_fp_variant /** Parameters which generated this version of fragment program */ struct st_fp_variant_key key; - struct pipe_shader_state tgsi; - /** Driver's compiled shader */ void *driver_shader; @@ -87,6 +83,10 @@ struct st_fp_variant struct gl_program_parameter_list *parameters; uint bitmap_sampler; + /** For glDrawPixels variants */ + unsigned drawpix_sampler; + unsigned pixelmap_sampler; + /** next in linked list */ struct st_fp_variant *next; }; @@ -98,6 +98,7 @@ struct st_fp_variant struct st_fragment_program { struct gl_fragment_program Base; + struct pipe_shader_state tgsi; struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct st_fp_variant *variants; @@ -153,20 +154,16 @@ struct st_vp_variant struct st_vertex_program { struct gl_vertex_program Base; /**< The Mesa vertex program */ + struct pipe_shader_state tgsi; struct glsl_to_tgsi_visitor* glsl_to_tgsi; /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ - GLuint input_to_index[VERT_ATTRIB_MAX]; /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; GLuint num_inputs; /** Maps VARYING_SLOT_x to slot */ GLuint result_to_output[VARYING_SLOT_MAX]; - GLuint output_slot_to_attr[VARYING_SLOT_MAX]; - ubyte output_semantic_name[VARYING_SLOT_MAX]; - ubyte output_semantic_index[VARYING_SLOT_MAX]; - GLuint num_outputs; /** List of translated variants of this vertex program. */ @@ -203,6 +200,7 @@ struct st_gp_variant struct st_geometry_program { struct gl_geometry_program Base; /**< The Mesa geometry program */ + struct pipe_shader_state tgsi; struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct st_gp_variant *variants; @@ -238,6 +236,7 @@ struct st_tcp_variant struct st_tessctrl_program { struct gl_tess_ctrl_program Base; /**< The Mesa tess ctrl program */ + struct pipe_shader_state tgsi; struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct st_tcp_variant *variants; @@ -273,6 +272,7 @@ struct st_tep_variant struct st_tesseval_program { struct gl_tess_eval_program Base; /**< The Mesa tess eval program */ + struct pipe_shader_state tgsi; struct glsl_to_tgsi_visitor* glsl_to_tgsi; struct st_tep_variant *variants; @@ -414,16 +414,6 @@ st_get_tep_variant(struct st_context *st, struct st_tesseval_program *stgp, const struct st_tep_variant_key *key); - -extern void -st_prepare_vertex_program(struct gl_context *ctx, - struct st_vertex_program *stvp); - -extern GLboolean -st_prepare_fragment_program(struct gl_context *ctx, - struct st_fragment_program *stfp); - - extern void st_release_vp_variants( struct st_context *st, struct st_vertex_program *stvp ); @@ -447,6 +437,25 @@ st_release_tep_variants(struct st_context *st, extern void st_destroy_program_variants(struct st_context *st); +extern bool +st_translate_vertex_program(struct st_context *st, + struct st_vertex_program *stvp); + +extern bool +st_translate_fragment_program(struct st_context *st, + struct st_fragment_program *stfp); + +extern bool +st_translate_geometry_program(struct st_context *st, + struct st_geometry_program *stgp); + +extern bool +st_translate_tessctrl_program(struct st_context *st, + struct st_tessctrl_program *sttcp); + +extern bool +st_translate_tesseval_program(struct st_context *st, + struct st_tesseval_program *sttep); extern void st_print_current_vertex_program(void); diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index c130ab3f93d..6f29abbe1ba 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -35,6 +35,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/enums.h" +#include "util/half_float.h" #include "t_context.h" #include "tnl.h" diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index e3eb286e482..5e1a760eb2c 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -33,7 +33,6 @@ #include "vbo.h" #include "vbo_context.h" -#define NR_MAT_ATTRIBS 12 static GLuint check_size( const GLfloat *attr ) { @@ -44,32 +43,47 @@ static GLuint check_size( const GLfloat *attr ) } +/** + * Helper for initializing a vertex array. + */ +static void +init_array(struct gl_context *ctx, struct gl_client_array *cl, + unsigned size, const void *pointer) +{ + memset(cl, 0, sizeof(*cl)); + + cl->Size = size; + cl->Type = GL_FLOAT; + cl->Format = GL_RGBA; + cl->Stride = 0; + cl->StrideB = 0; + cl->_ElementSize = cl->Size * sizeof(GLfloat); + cl->Ptr = pointer; + cl->Enabled = 1; + + _mesa_reference_buffer_object(ctx, &cl->BufferObj, + ctx->Shared->NullBufferObj); +} + + +/** + * Set up the vbo->currval arrays to point at the context's current + * vertex attributes (with strides = 0). + */ static void init_legacy_currval(struct gl_context *ctx) { struct vbo_context *vbo = vbo_context(ctx); - struct gl_client_array *arrays = &vbo->currval[VBO_ATTRIB_POS]; GLuint i; - memset(arrays, 0, sizeof(*arrays) * VERT_ATTRIB_FF_MAX); - /* Set up a constant (StrideB == 0) array for each current * attribute: */ for (i = 0; i < VERT_ATTRIB_FF_MAX; i++) { - struct gl_client_array *cl = &arrays[i]; + struct gl_client_array *cl = &vbo->currval[VERT_ATTRIB_FF(i)]; - /* Size will have to be determined at runtime: - */ - cl->Size = check_size(ctx->Current.Attrib[i]); - cl->Stride = 0; - cl->StrideB = 0; - cl->Enabled = 1; - cl->Type = GL_FLOAT; - cl->Format = GL_RGBA; - cl->Ptr = (const void *)ctx->Current.Attrib[i]; - cl->_ElementSize = cl->Size * sizeof(GLfloat); - _mesa_reference_buffer_object(ctx, &cl->BufferObj, - ctx->Shared->NullBufferObj); + init_array(ctx, cl, + check_size(ctx->Current.Attrib[i]), + ctx->Current.Attrib[i]); } } @@ -77,26 +91,12 @@ static void init_legacy_currval(struct gl_context *ctx) static void init_generic_currval(struct gl_context *ctx) { struct vbo_context *vbo = vbo_context(ctx); - struct gl_client_array *arrays = &vbo->currval[VBO_ATTRIB_GENERIC0]; GLuint i; - memset(arrays, 0, sizeof(*arrays) * VERT_ATTRIB_GENERIC_MAX); - for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) { - struct gl_client_array *cl = &arrays[i]; + struct gl_client_array *cl = &vbo->currval[VBO_ATTRIB_GENERIC0 + i]; - /* This will have to be determined at runtime: - */ - cl->Size = 1; - cl->Type = GL_FLOAT; - cl->Format = GL_RGBA; - cl->Ptr = (const void *)ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i]; - cl->Stride = 0; - cl->StrideB = 0; - cl->Enabled = 1; - cl->_ElementSize = cl->Size * sizeof(GLfloat); - _mesa_reference_buffer_object(ctx, &cl->BufferObj, - ctx->Shared->NullBufferObj); + init_array(ctx, cl, 1, ctx->Current.Attrib[VERT_ATTRIB_GENERIC0 + i]); } } @@ -104,46 +104,34 @@ static void init_generic_currval(struct gl_context *ctx) static void init_mat_currval(struct gl_context *ctx) { struct vbo_context *vbo = vbo_context(ctx); - struct gl_client_array *arrays = - &vbo->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT]; GLuint i; - assert(NR_MAT_ATTRIBS == MAT_ATTRIB_MAX); - - memset(arrays, 0, sizeof(*arrays) * NR_MAT_ATTRIBS); - /* Set up a constant (StrideB == 0) array for each current * attribute: */ - for (i = 0; i < NR_MAT_ATTRIBS; i++) { - struct gl_client_array *cl = &arrays[i]; + for (i = 0; i < MAT_ATTRIB_MAX; i++) { + struct gl_client_array *cl = + &vbo->currval[VBO_ATTRIB_MAT_FRONT_AMBIENT + i]; + unsigned size; /* Size is fixed for the material attributes, for others will * be determined at runtime: */ - switch (i - VERT_ATTRIB_GENERIC0) { + switch (i) { case MAT_ATTRIB_FRONT_SHININESS: case MAT_ATTRIB_BACK_SHININESS: - cl->Size = 1; - break; + size = 1; + break; case MAT_ATTRIB_FRONT_INDEXES: case MAT_ATTRIB_BACK_INDEXES: - cl->Size = 3; - break; + size = 3; + break; default: - cl->Size = 4; - break; + size = 4; + break; } - cl->Ptr = (const void *)ctx->Light.Material.Attrib[i]; - cl->Type = GL_FLOAT; - cl->Format = GL_RGBA; - cl->Stride = 0; - cl->StrideB = 0; - cl->Enabled = 1; - cl->_ElementSize = cl->Size * sizeof(GLfloat); - _mesa_reference_buffer_object(ctx, &cl->BufferObj, - ctx->Shared->NullBufferObj); + init_array(ctx, cl, size, ctx->Light.Material.Attrib[i]); } } @@ -175,7 +163,7 @@ GLboolean _vbo_CreateContext( struct gl_context *ctx ) for (i = 0; i < ARRAY_SIZE(vbo->map_vp_none); i++) vbo->map_vp_none[i] = i; /* map material attribs to generic slots */ - for (i = 0; i < NR_MAT_ATTRIBS; i++) + for (i = 0; i < MAT_ATTRIB_MAX; i++) vbo->map_vp_none[VERT_ATTRIB_GENERIC(i)] = VBO_ATTRIB_MAT_FRONT_AMBIENT + i; diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index 80f3015925d..00378eb7984 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -79,7 +79,7 @@ struct vbo_exec_copied_vtx { struct vbo_exec_context { - struct gl_context *ctx; + struct gl_context *ctx; GLvertexformat vtxfmt; GLvertexformat vtxfmt_noop; GLboolean validating; /**< if we're in the middle of state validation */ @@ -97,15 +97,17 @@ struct vbo_exec_context GLuint buffer_used; /* in bytes */ fi_type vertex[VBO_ATTRIB_MAX*4]; /* current vertex */ - GLuint vert_count; - GLuint max_vert; + GLuint vert_count; /**< Number of vertices currently in buffer */ + GLuint max_vert; /**< Max number of vertices allowed in buffer */ struct vbo_exec_copied_vtx copied; - GLubyte attrsz[VBO_ATTRIB_MAX]; - GLenum attrtype[VBO_ATTRIB_MAX]; - GLubyte active_sz[VBO_ATTRIB_MAX]; + GLubyte attrsz[VBO_ATTRIB_MAX]; /**< nr. of attrib components (1..4) */ + GLenum attrtype[VBO_ATTRIB_MAX]; /**< GL_FLOAT, GL_DOUBLE, GL_INT, etc */ + GLubyte active_sz[VBO_ATTRIB_MAX]; /**< attrib size (nr. 32-bit words) */ + /** pointers into the current 'vertex' array, declared above */ fi_type *attrptr[VBO_ATTRIB_MAX]; + struct gl_client_array arrays[VERT_ATTRIB_MAX]; /* According to program mode, the values above plus current @@ -115,7 +117,6 @@ struct vbo_exec_context const struct gl_client_array *inputs[VERT_ATTRIB_MAX]; } vtx; - struct { GLboolean recalculate_maps; struct vbo_exec_eval1_map map1[VERT_ATTRIB_MAX]; @@ -131,7 +132,7 @@ struct vbo_exec_context GLboolean recalculate_inputs; } array; - /* Which flags to set in vbo_exec_BeginVertices() */ + /* Which flags to set in vbo_exec_begin_vertices() */ GLbitfield begin_vertices_flags; #ifdef DEBUG @@ -147,8 +148,6 @@ void vbo_exec_init( struct gl_context *ctx ); void vbo_exec_destroy( struct gl_context *ctx ); void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state ); -void vbo_exec_BeginVertices( struct gl_context *ctx ); - /* Internal functions: */ diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 583a2f9b79f..7ae08fe3062 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -375,13 +375,16 @@ vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec, * This is when a vertex attribute transitions to a different size. * For example, we saw a bunch of glTexCoord2f() calls and now we got a * glTexCoord4f() call. We promote the array from size=2 to size=4. + * \param newSize size of new vertex (number of 32-bit words). */ static void -vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize, GLenum newType) +vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, + GLuint newSize, GLenum newType) { struct vbo_exec_context *exec = &vbo_context(ctx)->exec; - if (newSize > exec->vtx.attrsz[attr] || newType != exec->vtx.attrtype[attr]) { + if (newSize > exec->vtx.attrsz[attr] || + newType != exec->vtx.attrtype[attr]) { /* New size is larger. Need to flush existing vertices and get * an enlarged vertex format. */ @@ -411,20 +414,49 @@ vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize, GLenu /** + * Called upon first glVertex, glColor, glTexCoord, etc. + */ +static void +vbo_exec_begin_vertices(struct gl_context *ctx) +{ + struct vbo_exec_context *exec = &vbo_context(ctx)->exec; + + vbo_exec_vtx_map( exec ); + + assert((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0); + assert(exec->begin_vertices_flags); + + ctx->Driver.NeedFlush |= exec->begin_vertices_flags; +} + + +/** * This macro is used to implement all the glVertex, glColor, glTexCoord, * glVertexAttrib, etc functions. + * \param A attribute index + * \param N attribute size (1..4) + * \param T type (GL_FLOAT, GL_DOUBLE, GL_INT, GL_UNSIGNED_INT) + * \param C cast type (fi_type or double) + * \param V0, V1, v2, V3 attribute value */ #define ATTR_UNION( A, N, T, C, V0, V1, V2, V3 ) \ do { \ struct vbo_exec_context *exec = &vbo_context(ctx)->exec; \ int sz = (sizeof(C) / sizeof(GLfloat)); \ - if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) \ - vbo_exec_BeginVertices(ctx); \ \ + assert(sz == 1 || sz == 2); \ + \ + if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) { \ + vbo_exec_begin_vertices(ctx); \ + } \ + \ + /* check if attribute size or type is changing */ \ if (unlikely(exec->vtx.active_sz[A] != N * sz) || \ - unlikely(exec->vtx.attrtype[A] != T)) \ + unlikely(exec->vtx.attrtype[A] != T)) { \ vbo_exec_fixup_vertex(ctx, A, N * sz, T); \ + } \ \ + /* store vertex attribute in vertex buffer */ \ { \ C *dest = (C *)exec->vtx.attrptr[A]; \ if (N>0) dest[0] = V0; \ @@ -438,6 +470,7 @@ do { \ /* This is a glVertex call */ \ GLuint i; \ \ + /* copy 32-bit words */ \ for (i = 0; i < exec->vtx.vertex_size; i++) \ exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i]; \ \ @@ -1149,22 +1182,6 @@ void vbo_exec_vtx_destroy( struct vbo_exec_context *exec ) /** - * Called upon first glVertex, glColor, glTexCoord, etc. - */ -void vbo_exec_BeginVertices( struct gl_context *ctx ) -{ - struct vbo_exec_context *exec = &vbo_context(ctx)->exec; - - vbo_exec_vtx_map( exec ); - - assert((ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0); - assert(exec->begin_vertices_flags); - - ctx->Driver.NeedFlush |= exec->begin_vertices_flags; -} - - -/** * If inside glBegin()/glEnd(), it should assert(0). Otherwise, if * FLUSH_STORED_VERTICES bit in \p flags is set flushes any buffered * vertices, if FLUSH_UPDATE_CURRENT bit is set updates @@ -1197,7 +1214,7 @@ void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags ) /* Flush (draw), and make sure VBO is left unmapped when done */ vbo_exec_FlushVertices_internal(exec, GL_TRUE); - /* Need to do this to ensure vbo_exec_BeginVertices gets called again: + /* Need to do this to ensure vbo_exec_begin_vertices gets called again: */ ctx->Driver.NeedFlush &= ~(FLUSH_UPDATE_CURRENT | flags); diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 2bfb0c32b73..174cbc37c26 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -53,10 +53,10 @@ vbo_exec_debug_verts( struct vbo_exec_context *exec ) for (i = 0 ; i < exec->vtx.prim_count ; i++) { struct _mesa_prim *prim = &exec->vtx.prim[i]; printf(" prim %d: %s%s %d..%d %s %s\n", - i, + i, _mesa_lookup_prim_by_nr(prim->mode), prim->weak ? " (weak)" : "", - prim->start, + prim->start, prim->start + prim->count, prim->begin ? "BEGIN" : "(wrap)", prim->end ? "END" : "(wrap)"); @@ -79,7 +79,6 @@ vbo_copy_vertices( struct vbo_exec_context *exec ) exec->vtx.prim[exec->vtx.prim_count-1].start * exec->vtx.vertex_size); - switch (exec->ctx->Driver.CurrentExecPrimitive) { case GL_POINTS: return 0; @@ -219,7 +218,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx ) exec->vtx.inputs[attr] = &arrays[attr]; if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { - /* a real buffer obj: Ptr is an offset, not a pointer*/ + /* a real buffer obj: Ptr is an offset, not a pointer */ assert(exec->vtx.bufferobj->Mappings[MAP_INTERNAL].Pointer); assert(offset >= 0); arrays[attr].Ptr = (GLubyte *) @@ -259,7 +258,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) { if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { struct gl_context *ctx = exec->ctx; - + if (ctx->Driver.FlushMappedBufferRange) { GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Mappings[MAP_INTERNAL].Offset; @@ -277,7 +276,7 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE); assert(exec->vtx.buffer_ptr != NULL); - + ctx->Driver.UnmapBuffer(ctx, exec->vtx.bufferobj, MAP_INTERNAL); exec->vtx.buffer_map = NULL; exec->vtx.buffer_ptr = NULL; @@ -299,7 +298,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec ) GL_MAP_FLUSH_EXPLICIT_BIT | MESA_MAP_NOWAIT_BIT; const GLenum usage = GL_STREAM_DRAW_ARB; - + if (!_mesa_is_bufferobj(exec->vtx.bufferobj)) return; @@ -323,7 +322,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec ) exec->vtx.buffer_ptr = exec->vtx.buffer_map = NULL; } } - + if (!exec->vtx.buffer_map) { /* Need to allocate a new VBO */ exec->vtx.buffer_used = 0; @@ -381,14 +380,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) if (0) vbo_exec_debug_verts( exec ); - if (exec->vtx.prim_count && + if (exec->vtx.prim_count && exec->vtx.vert_count) { - exec->vtx.copied.nr = vbo_copy_vertices( exec ); + exec->vtx.copied.nr = vbo_copy_vertices( exec ); if (exec->vtx.copied.nr != exec->vtx.vert_count) { struct gl_context *ctx = exec->ctx; - + /* Before the update_state() as this may raise _NEW_VARYING_VP_INPUTS * from _mesa_set_varying_vp_inputs(). */ @@ -405,7 +404,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) printf("%s %d %d\n", __func__, exec->vtx.prim_count, exec->vtx.vert_count); - vbo_context(ctx)->draw_prims( ctx, + vbo_context(ctx)->draw_prims( ctx, exec->vtx.prim, exec->vtx.prim_count, NULL, @@ -433,7 +432,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) if (keepUnmapped || exec->vtx.vertex_size == 0) exec->vtx.max_vert = 0; else - exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / + exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / (exec->vtx.vertex_size * sizeof(GLfloat))); exec->vtx.buffer_ptr = exec->vtx.buffer_map; diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 1a70d168c55..fdc677f9a07 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -648,7 +648,8 @@ _save_upgrade_vertex(struct gl_context *ctx, GLuint attr, GLuint newsz) /* Recalculate all the attrptr[] values: */ - for (i = 0, tmp = save->vertex; i < VBO_ATTRIB_MAX; i++) { + tmp = save->vertex; + for (i = 0; i < VBO_ATTRIB_MAX; i++) { if (save->attrsz[i]) { save->attrptr[i] = tmp; tmp += save->attrsz[i]; @@ -1543,7 +1544,7 @@ vbo_print_vertex_list(struct gl_context *ctx, void *data, FILE *f) node->vertex_store->bufferobj : NULL; (void) ctx; - fprintf(f, "VBO-VERTEX-LIST, %u vertices %d primitives, %d vertsize " + fprintf(f, "VBO-VERTEX-LIST, %u vertices, %d primitives, %d vertsize, " "buffer %p\n", node->count, node->prim_count, node->vertex_size, buffer); |