diff options
author | Kristian Høgsberg Kristensen <[email protected]> | 2016-02-24 12:50:27 -0800 |
---|---|---|
committer | Kristian Høgsberg Kristensen <[email protected]> | 2016-02-24 13:04:54 -0800 |
commit | 59f57289959702e528b68bdd0d06488089517a00 (patch) | |
tree | a266656ac6129f5ad14b9d0d0c69c7077466c3f3 /src/mesa | |
parent | 25c2470b24ce8411f6747eb887137b2511b6d529 (diff) | |
parent | c95d5c5f6fbfe4a96276e67ed279562b33432fb5 (diff) |
Merge remote-tracking branch 'origin/master' into vulkan
Diffstat (limited to 'src/mesa')
114 files changed, 3338 insertions, 1252 deletions
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 6669f295399..a6c12c64828 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -405,6 +405,7 @@ STATETRACKER_FILES = \ state_tracker/st_atom_depth.c \ state_tracker/st_atom_framebuffer.c \ state_tracker/st_atom.h \ + state_tracker/st_atom_image.c \ state_tracker/st_atom_msaa.c \ state_tracker/st_atom_pixeltransfer.c \ state_tracker/st_atom_rasterizer.c \ @@ -427,6 +428,8 @@ STATETRACKER_FILES = \ state_tracker/st_cb_bufferobjects.h \ state_tracker/st_cb_clear.c \ state_tracker/st_cb_clear.h \ + state_tracker/st_cb_compute.c \ + state_tracker/st_cb_compute.h \ state_tracker/st_cb_condrender.c \ state_tracker/st_cb_condrender.h \ state_tracker/st_cb_copyimage.c \ diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 6534d432e61..57c3f686b0c 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -203,7 +203,7 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, src_tex_image = wrap_renderbuffer(ctx, src_renderbuffer); } if (!make_view(ctx, src_tex_image, &src_view_tex_image, &src_view_texture, - dst_tex_image->InternalFormat)) + dst_internal_format)) goto cleanup; } diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c index 224a67fc6eb..c2111e5e209 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c @@ -1,6 +1,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/samplerobj.h" +#include "main/teximage.h" #include "main/texobj.h" #include "intel_context.h" diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 45f7a91295d..8f92fd7cfd2 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -175,6 +175,7 @@ i965_FILES = \ gen6_blorp.cpp \ gen6_cc.c \ gen6_clip_state.c \ + gen6_constant_state.c \ gen6_depth_state.c \ gen6_depthstencil.c \ gen6_gs_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index c7cb3944870..05fff91ed57 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -70,8 +70,13 @@ brw_blorp_blit_miptrees(struct brw_context *brw, * the destination buffer because we use the standard render path to render * to destination color buffers, and the standard render path is * fast-color-aware. + * Lossless compression is only introduced for gen9 onwards whereas + * blorp is not supported even for gen8. Therefore it should be impossible + * to end up here with single sampled compressed surfaces. */ - intel_miptree_resolve_color(brw, src_mt); + assert(!intel_miptree_is_lossless_compressed(brw, src_mt)); + assert(!intel_miptree_is_lossless_compressed(brw, dst_mt)); + intel_miptree_resolve_color(brw, src_mt, 0); intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_layer); intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_layer); diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index d9f181a1cbf..0d2aca5b78a 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -35,6 +35,84 @@ static void +prepare_indirect_gpgpu_walker(struct brw_context *brw) +{ + GLintptr indirect_offset = brw->compute.num_work_groups_offset; + drm_intel_bo *bo = brw->compute.num_work_groups_bo; + + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 0); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 4); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 8); + + if (brw->gen > 7) + return; + + /* Clear upper 32-bits of SRC0 and all 64-bits of SRC1 */ + BEGIN_BATCH(7); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (7 - 2)); + OUT_BATCH(MI_PREDICATE_SRC0 + 4); + OUT_BATCH(0u); + OUT_BATCH(MI_PREDICATE_SRC1 + 0); + OUT_BATCH(0u); + OUT_BATCH(MI_PREDICATE_SRC1 + 4); + OUT_BATCH(0u); + ADVANCE_BATCH(); + + /* Load compute_dispatch_indirect_x_size into SRC0 */ + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + indirect_offset + 0); + + /* predicate = (compute_dispatch_indirect_x_size == 0); */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + /* Load compute_dispatch_indirect_y_size into SRC0 */ + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + indirect_offset + 4); + + /* predicate |= (compute_dispatch_indirect_y_size == 0); */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_OR | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + /* Load compute_dispatch_indirect_z_size into SRC0 */ + brw_load_register_mem(brw, MI_PREDICATE_SRC0, bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + indirect_offset + 8); + + /* predicate |= (compute_dispatch_indirect_z_size == 0); */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOAD | + MI_PREDICATE_COMBINEOP_OR | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + /* predicate = !predicate; */ + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + MI_PREDICATE_LOADOP_LOADINV | + MI_PREDICATE_COMBINEOP_OR | + MI_PREDICATE_COMPAREOP_FALSE); + ADVANCE_BATCH(); +} + +static void brw_emit_gpgpu_walker(struct brw_context *brw) { const struct brw_cs_prog_data *prog_data = brw->cs.prog_data; @@ -45,20 +123,10 @@ brw_emit_gpgpu_walker(struct brw_context *brw) if (brw->compute.num_work_groups_bo == NULL) { indirect_flag = 0; } else { - GLintptr indirect_offset = brw->compute.num_work_groups_offset; - drm_intel_bo *bo = brw->compute.num_work_groups_bo; - - indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE; - - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 0); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 4); - brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, - I915_GEM_DOMAIN_VERTEX, 0, - indirect_offset + 8); + indirect_flag = + GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE | + (brw->gen == 7 ? GEN7_GPGPU_PREDICATE_ENABLE : 0); + prepare_indirect_gpgpu_walker(brw); } const unsigned simd_size = prog_data->simd_size; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 44d2fe4d9e4..31b6b2a3641 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -208,7 +208,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) if (!tex_obj || !tex_obj->mt) continue; intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt); - intel_miptree_resolve_color(brw, tex_obj->mt); + /* Sampling engine understands lossless compression and resolving + * those surfaces should be skipped for performance reasons. + */ + intel_miptree_resolve_color(brw, tex_obj->mt, + INTEL_MIPTREE_IGNORE_CCS_E); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); } @@ -223,7 +227,13 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) tex_obj = intel_texture_object(u->TexObj); if (tex_obj && tex_obj->mt) { - intel_miptree_resolve_color(brw, tex_obj->mt); + /* Access to images is implemented using indirect messages + * against data port. Normal render target write understands + * lossless compression but unfortunately the typed/untyped + * read/write interface doesn't. Therefore the compressed + * surfaces need to be resolved prior to accessing them. + */ + intel_miptree_resolve_color(brw, tex_obj->mt, 0); brw_render_cache_set_check_flush(brw, tex_obj->mt->bo); } } @@ -252,7 +262,11 @@ intel_update_state(struct gl_context * ctx, GLuint new_state) _mesa_get_srgb_format_linear(mt->format) == mt->format) continue; - intel_miptree_resolve_color(brw, mt); + /* Lossless compression is not supported for SRGB formats, it + * should be impossible to get here with such surfaces. + */ + assert(!intel_miptree_is_lossless_compressed(brw, mt)); + intel_miptree_resolve_color(brw, mt, 0); brw_render_cache_set_check_flush(brw, mt->bo); } } @@ -710,6 +724,7 @@ brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_thread ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations; ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations; ctx->Const.MaxComputeWorkGroupInvocations = max_invocations; + ctx->Const.MaxComputeSharedMemorySize = 64 * 1024; } /** @@ -1227,7 +1242,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw, if (rb == NULL || rb->mt == NULL) continue; if (rb->mt->num_samples <= 1) - intel_miptree_resolve_color(brw, rb->mt); + intel_miptree_resolve_color(brw, rb->mt, 0); else intel_renderbuffer_downsample(brw, rb); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 8416a1d4708..8ef5afea149 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2729,6 +2729,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6) +# define GEN9_PS_RENDER_TARGET_RESOLVE_FULL (3 << 6) # define HSW_PS_UAV_ACCESS_ENABLE (1 << 5) # define GEN7_PS_POSOFFSET_NONE (0 << 3) # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) @@ -2939,6 +2940,7 @@ enum brw_wm_barycentric_interp_mode { #define GPGPU_WALKER 0x7105 /* GEN7 DW0 */ # define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10) +# define GEN7_GPGPU_PREDICATE_ENABLE (1 << 8) /* GEN8+ DW2 */ # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 # define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 05872255865..38a27da898c 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -384,20 +384,19 @@ static const struct brw_device_info brw_device_info_bxt = { .gt = 1, .has_llc = false, - /* XXX: These are preliminary thread counts and URB sizes. */ .num_slices = 1, - .max_vs_threads = 56, - .max_hs_threads = 56, - .max_ds_threads = 56, - .max_gs_threads = 56, + .max_vs_threads = 112, + .max_hs_threads = 112, + .max_ds_threads = 112, + .max_gs_threads = 112, .max_wm_threads = 64 * 3, - .max_cs_threads = 28, + .max_cs_threads = 6 * 6, .urb = { - .size = 64, + .size = 192, .min_vs_entries = 34, - .max_vs_entries = 640, - .max_hs_entries = 80, - .max_ds_entries = 80, + .max_vs_entries = 704, + .max_hs_entries = 256, + .max_ds_entries = 416, .max_gs_entries = 256, } }; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 23e71fd9ba6..afa8a4e9eae 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -391,6 +391,15 @@ brw_try_draw_prims(struct gl_context *ctx, if (ctx->NewState) _mesa_update_state(ctx); + /* We have to validate the textures *before* checking for fallbacks; + * otherwise, the software fallback won't be able to rely on the + * texture state, the firstLevel and lastLevel fields won't be + * set in the intel texture object (they'll both be 0), and the + * software fallback will segfault if it attempts to access any + * texture level other than level 0. + */ + brw_validate_textures(brw); + /* Find the highest sampler unit used by each shader program. A bit-count * won't work since ARB programs use the texture unit number as the sampler * index. @@ -406,15 +415,6 @@ brw_try_draw_prims(struct gl_context *ctx, brw->vs.base.sampler_count = _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); - /* We have to validate the textures *before* checking for fallbacks; - * otherwise, the software fallback won't be able to rely on the - * texture state, the firstLevel and lastLevel fields won't be - * set in the intel texture object (they'll both be 0), and the - * software fallback will segfault if it attempts to access any - * texture level other than level 0. - */ - brw_validate_textures(brw); - intel_prepare_render(brw); /* This workaround has to happen outside of brw_upload_render_state() diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bb22cfa5fab..81a83400ea0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3524,6 +3524,36 @@ fs_visitor::lower_integer_multiplication() return progress; } +bool +fs_visitor::lower_minmax() +{ + assert(devinfo->gen < 6); + + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + const fs_builder ibld(this, block, inst); + + if (inst->opcode == BRW_OPCODE_SEL && + inst->predicate == BRW_PREDICATE_NONE) { + /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of + * the original SEL.L/GE instruction + */ + ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1], + inst->conditional_mod); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + static void setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, fs_reg *dst, fs_reg color, unsigned components) @@ -5134,7 +5164,7 @@ fs_visitor::optimize() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s%d-%s-00-start", + snprintf(filename, 64, "%s%d-%s-00-00-start", stage_abbrev, dispatch_width, nir->info.name); backend_shader::dump_instructions(filename); @@ -5187,6 +5217,13 @@ fs_visitor::optimize() OPT(opt_combine_constants); OPT(lower_integer_multiplication); + if (devinfo->gen <= 5 && OPT(lower_minmax)) { + OPT(opt_cmod_propagation); + OPT(opt_cse); + OPT(opt_copy_propagate); + OPT(dead_code_eliminate); + } + lower_uniform_pull_constant_loads(); validate(); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 89fbbfc90e4..f1a81c13ef9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -175,6 +175,7 @@ public: bool lower_load_payload(); bool lower_logical_sends(); bool lower_integer_multiplication(); + bool lower_minmax(); bool lower_simd_width(); bool opt_combine_constants(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index dd3c383a17d..9a3cc3a50c2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -369,20 +369,14 @@ namespace brw { * * Generally useful to get the minimum or maximum of two values. */ - void + instruction * emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index fd25307c587..9dbe13df514 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -87,7 +87,7 @@ public: void setup_initial_values(); void run(); - void dump_block_data() const; + void dump_block_data() const UNUSED; void *mem_ctx; cfg_t *cfg; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 956cf53fd6c..8611b8dc443 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1015,28 +1015,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_fmin: case nir_op_imin: case nir_op_umin: - if (devinfo->gen >= 6) { - inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_L; - } else { - bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L); - inst = bld.SEL(result, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; - } + inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L); inst->saturate = instr->dest.saturate; break; case nir_op_fmax: case nir_op_imax: case nir_op_umax: - if (devinfo->gen >= 6) { - inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_GE; - } else { - bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE); - inst = bld.SEL(result, op[0], op[1]); - inst->predicate = BRW_PREDICATE_NORMAL; - } + inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE); inst->saturate = instr->dest.saturate; break; @@ -1318,7 +1304,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst, static brw_reg_type get_image_base_type(const glsl_type *type) { - switch ((glsl_base_type)type->sampler_type) { + switch ((glsl_base_type)type->sampled_type) { case GLSL_TYPE_UINT: return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_INT: diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index 9b0750026c4..4adffdd75fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -1143,7 +1143,7 @@ namespace brw { if (rsize && pred) set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0))); - return tmp; + return retype(tmp, src0.type); } } } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index b2b07e7e58e..488fa6c0c45 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -228,7 +228,9 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, unsigned int x_align, y_align; unsigned int x_scaledown, y_scaledown; - if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) { + /* Only single sampled surfaces need to (and actually can) be resolved. */ + if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || + intel_miptree_is_lossless_compressed(brw, irb->mt)) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): * @@ -872,7 +874,10 @@ brw_meta_resolve_color(struct brw_context *brw, * bits to let us select the type of resolve. For fast clear resolves, it * turns out we can use the same value as pre-SKL though. */ - set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); + if (intel_miptree_is_lossless_compressed(brw, mt)) + set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL); + else + set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; get_resolve_rect(brw, mt, &rect); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index f44ccd6e071..6b85eac77d6 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -76,6 +76,7 @@ extern const struct brw_tracked_state brw_tcs_samplers; extern const struct brw_tracked_state brw_tes_samplers; extern const struct brw_tracked_state brw_gs_samplers; extern const struct brw_tracked_state brw_cs_samplers; +extern const struct brw_tracked_state brw_cs_texture_surfaces; extern const struct brw_tracked_state brw_vs_ubo_surfaces; extern const struct brw_tracked_state brw_vs_abo_surfaces; extern const struct brw_tracked_state brw_vs_image_surfaces; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index cac06fa8918..c6aa1344270 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -387,8 +387,27 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) /* We need to make sure that the programs get regenerated, since * any offsets leftover in brw_context will no longer be valid. */ - brw->NewGLState |= ~0; - brw->ctx.NewDriverState |= ~0ull; + brw->NewGLState = ~0; + brw->ctx.NewDriverState = ~0ull; + brw->state.pipelines[BRW_RENDER_PIPELINE].mesa = ~0; + brw->state.pipelines[BRW_RENDER_PIPELINE].brw = ~0ull; + brw->state.pipelines[BRW_COMPUTE_PIPELINE].mesa = ~0; + brw->state.pipelines[BRW_COMPUTE_PIPELINE].brw = ~0ull; + + /* Also, NULL out any stale program pointers. */ + brw->vs.prog_data = NULL; + brw->vs.base.prog_data = NULL; + brw->tcs.prog_data = NULL; + brw->tcs.base.prog_data = NULL; + brw->tes.prog_data = NULL; + brw->tes.base.prog_data = NULL; + brw->gs.prog_data = NULL; + brw->gs.base.prog_data = NULL; + brw->wm.prog_data = NULL; + brw->wm.base.prog_data = NULL; + brw->cs.prog_data = NULL; + brw->cs.base.prog_data = NULL; + intel_batchbuffer_flush(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index ee75ca88549..a91d07446ce 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -280,7 +280,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, - &brw_texture_surfaces, + &brw_cs_texture_surfaces, &brw_cs_work_groups_surface, &brw_cs_samplers, &brw_cs_state, @@ -395,7 +395,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, - &brw_texture_surfaces, + &brw_cs_texture_surfaces, &brw_cs_work_groups_surface, &brw_cs_samplers, &brw_cs_state, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 2e79de6a883..8136339332b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -26,6 +26,7 @@ #include "brw_cfg.h" #include "brw_vs.h" #include "brw_nir.h" +#include "brw_vec4_builder.h" #include "brw_vec4_live_variables.h" #include "brw_dead_control_flow.h" #include "program/prog_parameter.h" @@ -1555,11 +1556,6 @@ vec4_vs_visitor::setup_attributes(int payload_reg) } } - if (vs_prog_data->uses_drawid) { - attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes; - nr_attributes++; - } - /* VertexID is stored by the VF as the last vertex element, but we * don't represent it with a flag in inputs_read, so we call it * VERT_ATTRIB_MAX. @@ -1570,6 +1566,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg) nr_attributes++; } + if (vs_prog_data->uses_drawid) { + attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes; + nr_attributes++; + } + lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); return payload_reg + vs_prog_data->nr_attributes; @@ -1624,6 +1625,36 @@ vec4_vs_visitor::setup_payload(void) this->first_non_payload_grf = reg; } +bool +vec4_visitor::lower_minmax() +{ + assert(devinfo->gen < 6); + + bool progress = false; + + foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { + const vec4_builder ibld(this, block, inst); + + if (inst->opcode == BRW_OPCODE_SEL && + inst->predicate == BRW_PREDICATE_NONE) { + /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of + * the original SEL.L/GE instruction + */ + ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1], + inst->conditional_mod); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + + progress = true; + } + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + src_reg vec4_visitor::get_timestamp() { @@ -1861,7 +1892,7 @@ vec4_visitor::run() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s-%s-00-start", + snprintf(filename, 64, "%s-%s-00-00-start", stage_abbrev, nir->info.name); backend_shader::dump_instructions(filename); @@ -1896,6 +1927,13 @@ vec4_visitor::run() OPT(dead_code_eliminate); } + if (devinfo->gen <= 5 && OPT(lower_minmax)) { + OPT(opt_cmod_propagation); + OPT(opt_cse); + OPT(opt_copy_propagation); + OPT(dead_code_eliminate); + } + if (failed) return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6d388b04fdb..7b86e1bc050 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -304,6 +304,8 @@ public: void resolve_ud_negate(src_reg *reg); + bool lower_minmax(); + src_reg get_timestamp(); void dump_instruction(backend_instruction *inst); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h index be1427c7db7..3a8617e05ac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h @@ -66,7 +66,8 @@ namespace brw { * instruction passed as argument. */ vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) : - shader(shader), block(block), cursor(inst) + shader(shader), block(block), cursor(inst), + force_writemask_all(inst->force_writemask_all) { annotation.str = inst->annotation; annotation.ir = inst->ir; @@ -298,18 +299,14 @@ namespace brw { * * Generally useful to get the minimum or maximum of two values. */ - void + instruction * emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); + + return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3bb768f82dc..8418a3cdc01 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -679,18 +679,8 @@ vec4_instruction * vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1) { - vec4_instruction *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(dst, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } - + vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1); + inst->conditional_mod = conditionalmod; return inst; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 5ab2f7f09df..f1b8fc4c84d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -872,16 +872,12 @@ brw_update_texture_surfaces(struct brw_context *brw) /* BRW_NEW_FRAGMENT_PROGRAM */ struct gl_program *fs = (struct gl_program *) brw->fragment_program; - /* BRW_NEW_COMPUTE_PROGRAM */ - struct gl_program *cs = (struct gl_program *) brw->compute_program; - /* _NEW_TEXTURE */ update_stage_texture_surfaces(brw, vs, &brw->vs.base, false); update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false); update_stage_texture_surfaces(brw, tes, &brw->tes.base, false); update_stage_texture_surfaces(brw, gs, &brw->gs.base, false); update_stage_texture_surfaces(brw, fs, &brw->wm.base, false); - update_stage_texture_surfaces(brw, cs, &brw->cs.base, false); /* emit alternate set of surface state for gather. this * allows the surface format to be overriden for only the @@ -897,8 +893,6 @@ brw_update_texture_surfaces(struct brw_context *brw) update_stage_texture_surfaces(brw, gs, &brw->gs.base, true); if (fs && fs->UsesGather) update_stage_texture_surfaces(brw, fs, &brw->wm.base, true); - if (cs && cs->UsesGather) - update_stage_texture_surfaces(brw, cs, &brw->cs.base, true); } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; @@ -908,7 +902,6 @@ const struct brw_tracked_state brw_texture_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, .brw = BRW_NEW_BATCH | - BRW_NEW_COMPUTE_PROGRAM | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | BRW_NEW_GEOMETRY_PROGRAM | @@ -923,6 +916,37 @@ const struct brw_tracked_state brw_texture_surfaces = { .emit = brw_update_texture_surfaces, }; +static void +brw_update_cs_texture_surfaces(struct brw_context *brw) +{ + /* BRW_NEW_COMPUTE_PROGRAM */ + struct gl_program *cs = (struct gl_program *) brw->compute_program; + + /* _NEW_TEXTURE */ + update_stage_texture_surfaces(brw, cs, &brw->cs.base, false); + + /* emit alternate set of surface state for gather. this + * allows the surface format to be overriden for only the + * gather4 messages. + */ + if (brw->gen < 8) { + if (cs && cs->UsesGather) + update_stage_texture_surfaces(brw, cs, &brw->cs.base, true); + } + + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; +} + +const struct brw_tracked_state brw_cs_texture_surfaces = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = BRW_NEW_BATCH | + BRW_NEW_COMPUTE_PROGRAM, + }, + .emit = brw_update_cs_texture_surfaces, +}; + + void brw_upload_ubo_surfaces(struct brw_context *brw, struct gl_shader *shader, @@ -1340,6 +1364,11 @@ brw_upload_image_surfaces(struct brw_context *brw, } brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + /* This may have changed the image metadata dependent on the context + * image unit state and passed to the program as uniforms, make sure + * that push and pull constants are reuploaded. + */ + brw->NewGLState |= _NEW_PROGRAM_CONSTANTS; } } diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c new file mode 100644 index 00000000000..6c0c32b26f7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -0,0 +1,190 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "program/prog_parameter.h" + +void +gen7_upload_constant_state(struct brw_context *brw, + const struct brw_stage_state *stage_state, + bool active, unsigned opcode) +{ + uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0; + + /* Disable if the shader stage is inactive or there are no push constants. */ + active = active && stage_state->push_const_size != 0; + + int dwords = brw->gen >= 8 ? 11 : 7; + BEGIN_BATCH(dwords); + OUT_BATCH(opcode << 16 | (dwords - 2)); + + /* Workaround for SKL+ (we use option #2 until we have a need for more + * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_* + * + * The driver must ensure The following case does not occur without a flush + * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to + * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length + * not equal to zero committed. Possible ways to avoid this condition + * include: + * 1. always force buffer 3 to have a non zero read length + * 2. always force buffer 0 to a zero read length + */ + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(stage_state->push_const_size); + } else { + OUT_BATCH(active ? stage_state->push_const_size : 0); + OUT_BATCH(0); + } + /* Pointer to the constant buffer. Covered by the set of state flags + * from gen6_prepare_wm_contants + */ + if (brw->gen >= 9 && active) { + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + /* XXX: When using buffers other than 0, you need to specify the + * graphics virtual address regardless of INSPM/debug bits + */ + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, + stage_state->push_const_offset); + OUT_BATCH(0); + OUT_BATCH(0); + } else if (brw->gen >= 8) { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } + + ADVANCE_BATCH(); + + /* On SKL+ the new constants don't take effect until the next corresponding + * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure + * that is sent + */ + if (brw->gen >= 9) + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; +} + +/** + * Creates a streamed BO containing the push constants for the VS or GS on + * gen6+. + * + * Push constants are constant values (such as GLSL uniforms) that are + * pre-loaded into a shader stage's register space at thread spawn time. + * + * Not all GLSL uniforms will be uploaded as push constants: The hardware has + * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be + * uploaded as push constants, while GL 4.4 requires at least 1024 components + * to be usable for the VS. Plus, currently we always use pull constants + * instead of push constants when doing variable-index array access. + * + * See brw_curbe.c for the equivalent gen4/5 code. + */ +void +gen6_upload_push_constants(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state, + enum aub_state_struct_type type) +{ + struct gl_context *ctx = &brw->ctx; + + if (prog_data->nr_params == 0) { + stage_state->push_const_size = 0; + } else { + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + /* XXX: Should this happen somewhere before to get our state flag set? */ + if (prog) + _mesa_load_state_parameters(ctx, prog->Parameters); + + gl_constant_value *param; + int i; + + param = brw_state_batch(brw, type, + prog_data->nr_params * sizeof(gl_constant_value), + 32, &stage_state->push_const_offset); + + STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); + + /* _NEW_PROGRAM_CONSTANTS + * + * Also _NEW_TRANSFORM -- we may reference clip planes other than as a + * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS + * wouldn't be set for them. + */ + for (i = 0; i < prog_data->nr_params; i++) { + param[i] = *prog_data->param[i]; + } + + if (0) { + fprintf(stderr, "%s constants:\n", + _mesa_shader_stage_to_string(stage_state->stage)); + for (i = 0; i < prog_data->nr_params; i++) { + if ((i & 7) == 0) + fprintf(stderr, "g%d: ", + prog_data->dispatch_grf_start_reg + i / 8); + fprintf(stderr, "%8f ", param[i].f); + if ((i & 7) == 7) + fprintf(stderr, "\n"); + } + if ((i & 7) != 0) + fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } + + stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; + /* We can only push 32 registers of constants at a time. */ + + /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to + * 32" + * + * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: + * + * "The sum of all four read length fields must be less than or + * equal to the size of 64" + * + * The other shader stages all match the VS's limits. + */ + assert(stage_state->push_const_size <= 32); + } +} diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 4bc0a8598d6..c18610ca38c 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -33,95 +33,6 @@ #include "program/prog_statevars.h" #include "intel_batchbuffer.h" -/** - * Creates a streamed BO containing the push constants for the VS or GS on - * gen6+. - * - * Push constants are constant values (such as GLSL uniforms) that are - * pre-loaded into a shader stage's register space at thread spawn time. - * - * Not all GLSL uniforms will be uploaded as push constants: The hardware has - * a limitation of 32 or 64 EU registers (256 or 512 floats) per stage to be - * uploaded as push constants, while GL 4.4 requires at least 1024 components - * to be usable for the VS. Plus, currently we always use pull constants - * instead of push constants when doing variable-index array access. - * - * See brw_curbe.c for the equivalent gen4/5 code. - */ -void -gen6_upload_push_constants(struct brw_context *brw, - const struct gl_program *prog, - const struct brw_stage_prog_data *prog_data, - struct brw_stage_state *stage_state, - enum aub_state_struct_type type) -{ - struct gl_context *ctx = &brw->ctx; - - if (prog_data->nr_params == 0) { - stage_state->push_const_size = 0; - } else { - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - /* XXX: Should this happen somewhere before to get our state flag set? */ - if (prog) - _mesa_load_state_parameters(ctx, prog->Parameters); - - gl_constant_value *param; - unsigned i; - - param = brw_state_batch(brw, type, - prog_data->nr_params * sizeof(gl_constant_value), - 32, &stage_state->push_const_offset); - - STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - - /* _NEW_PROGRAM_CONSTANTS - * - * Also _NEW_TRANSFORM -- we may reference clip planes other than as a - * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS - * wouldn't be set for them. - */ - for (i = 0; i < prog_data->nr_params; i++) { - param[i] = *prog_data->param[i]; - } - - if (0) { - fprintf(stderr, "%s constants:\n", - _mesa_shader_stage_to_string(stage_state->stage)); - for (i = 0; i < prog_data->nr_params; i++) { - if ((i & 7) == 0) - fprintf(stderr, "g%d: ", - prog_data->dispatch_grf_start_reg + i / 8); - fprintf(stderr, "%8f ", param[i].f); - if ((i & 7) == 7) - fprintf(stderr, "\n"); - } - if ((i & 7) != 0) - fprintf(stderr, "\n"); - fprintf(stderr, "\n"); - } - - stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8; - /* We can only push 32 registers of constants at a time. */ - - /* From the SNB PRM (vol2, part 1, section 3.2.1.4: 3DSTATE_CONSTANT_VS: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to - * 32" - * - * From the IVB PRM (vol2, part 1, section 3.2.1.3: 3DSTATE_CONSTANT_VS: - * - * "The sum of all four read length fields must be less than or - * equal to the size of 64" - * - * The other shader stages all match the VS's limits. - */ - assert(stage_state->push_const_size <= 32); - } -} - static void gen6_upload_vs_push_constants(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index 0c1813f9048..c4babc24f43 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -298,7 +298,12 @@ static struct brw_l3_weights get_pipeline_state_l3_weights(const struct brw_context *brw) { const struct brw_stage_state *stage_states[] = { - &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base + [MESA_SHADER_VERTEX] = &brw->vs.base, + [MESA_SHADER_TESS_CTRL] = &brw->tcs.base, + [MESA_SHADER_TESS_EVAL] = &brw->tes.base, + [MESA_SHADER_GEOMETRY] = &brw->gs.base, + [MESA_SHADER_FRAGMENT] = &brw->wm.base, + [MESA_SHADER_COMPUTE] = &brw->cs.base }; bool needs_dc = false, needs_slm = false; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index a18dc697651..0daecdc9ed2 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -29,81 +29,6 @@ #include "program/prog_statevars.h" #include "intel_batchbuffer.h" - -void -gen7_upload_constant_state(struct brw_context *brw, - const struct brw_stage_state *stage_state, - bool active, unsigned opcode) -{ - uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0; - - /* Disable if the shader stage is inactive or there are no push constants. */ - active = active && stage_state->push_const_size != 0; - - int dwords = brw->gen >= 8 ? 11 : 7; - BEGIN_BATCH(dwords); - OUT_BATCH(opcode << 16 | (dwords - 2)); - - /* Workaround for SKL+ (we use option #2 until we have a need for more - * constant buffers). This comes from the documentation for 3DSTATE_CONSTANT_* - * - * The driver must ensure The following case does not occur without a flush - * to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length equal to - * zero committed followed by a 3DSTATE_CONSTANT_* with buffer 0 read length - * not equal to zero committed. Possible ways to avoid this condition - * include: - * 1. always force buffer 3 to have a non zero read length - * 2. always force buffer 0 to a zero read length - */ - if (brw->gen >= 9 && active) { - OUT_BATCH(0); - OUT_BATCH(stage_state->push_const_size); - } else { - OUT_BATCH(active ? stage_state->push_const_size : 0); - OUT_BATCH(0); - } - - /* Pointer to the constant buffer. Covered by the set of state flags - * from gen6_prepare_wm_contants - */ - if (brw->gen >= 9 && active) { - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - /* XXX: When using buffers other than 0, you need to specify the - * graphics virtual address regardless of INSPM/debug bits - */ - OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, - stage_state->push_const_offset); - OUT_BATCH(0); - OUT_BATCH(0); - } else if (brw->gen>= 8) { - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } else { - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } - - ADVANCE_BATCH(); - - /* On SKL+ the new constants don't take effect until the next corresponding - * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure - * that is sent - */ - if (brw->gen >= 9) - brw->ctx.NewDriverState |= BRW_NEW_SURFACES; -} - static void upload_vs_state(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index fc8f701a24c..0a52815b8a6 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -197,6 +197,28 @@ gen8_emit_fast_clear_color(struct brw_context *brw, surf[7] |= mt->fast_clear_color_value; } +static uint32_t +gen8_get_aux_mode(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + uint32_t surf_type) +{ + if (mt->mcs_mt == NULL) + return GEN8_SURFACE_AUX_MODE_NONE; + + /* + * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): + * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" + * + * From the hardware spec for GEN9: + * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN + * 16 must be used." + */ + if (brw->gen >= 9 || mt->num_samples == 1) + assert(mt->halign == 16); + + return GEN8_SURFACE_AUX_MODE_MCS; +} + static void gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -209,13 +231,13 @@ gen8_emit_texture_surface_state(struct brw_context *brw, bool rw, bool for_gather) { const unsigned depth = max_layer - min_layer; - struct intel_mipmap_tree *aux_mt = NULL; - uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE; + struct intel_mipmap_tree *aux_mt = mt->mcs_mt; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; int surf_index = surf_offset - &brw->wm.base.surf_offset[0]; unsigned tiling_mode, pitch; const unsigned tr_mode = surface_tiling_resource_mode(mt->tr_mode); const uint32_t surf_type = translate_tex_target(target); + uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type); if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; @@ -229,20 +251,9 @@ gen8_emit_texture_surface_state(struct brw_context *brw, * buffer should always have been resolved before it is used as a texture * so there is no need for it. */ - if (mt->mcs_mt && mt->num_samples > 1) { - aux_mt = mt->mcs_mt; - aux_mode = GEN8_SURFACE_AUX_MODE_MCS; - - /* - * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): - * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" - * - * From the hardware spec for GEN9: - * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN - * 16 must be used." - */ - if (brw->gen >= 9 || mt->num_samples == 1) - assert(mt->halign == 16); + if (mt->num_samples <= 1) { + aux_mt = NULL; + aux_mode = GEN8_SURFACE_AUX_MODE_NONE; } uint32_t *surf = allocate_surface_state(brw, surf_offset, surf_index); @@ -418,8 +429,6 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; - struct intel_mipmap_tree *aux_mt = NULL; - uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE; unsigned width = mt->logical_width0; unsigned height = mt->logical_height0; unsigned pitch = mt->pitch; @@ -472,21 +481,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, __func__, _mesa_get_format_name(rb_format)); } - if (mt->mcs_mt) { - aux_mt = mt->mcs_mt; - aux_mode = GEN8_SURFACE_AUX_MODE_MCS; - - /* - * From the BDW PRM, Volume 2d, page 260 (RENDER_SURFACE_STATE): - * "When MCS is enabled for non-MSRT, HALIGN_16 must be used" - * - * From the hardware spec for GEN9: - * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN - * 16 must be used." - */ - if (brw->gen >= 9 || mt->num_samples == 1) - assert(mt->halign == 16); - } + struct intel_mipmap_tree *aux_mt = mt->mcs_mt; + const uint32_t aux_mode = gen8_get_aux_mode(brw, mt, surf_type); uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 6d29fbdde21..72cf9af5b53 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -317,8 +317,8 @@ intel_miptree_blit(struct brw_context *brw, */ intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice); intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice); - intel_miptree_resolve_color(brw, src_mt); - intel_miptree_resolve_color(brw, dst_mt); + intel_miptree_resolve_color(brw, src_mt, 0); + intel_miptree_resolve_color(brw, dst_mt, 0); if (src_flip) src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height; diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index dbbac1c95a2..08b7623e63d 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -270,11 +270,11 @@ intel_copy_image_sub_data(struct gl_context *ctx, */ intel_miptree_all_slices_resolve_hiz(brw, src_mt); intel_miptree_all_slices_resolve_depth(brw, src_mt); - intel_miptree_resolve_color(brw, src_mt); + intel_miptree_resolve_color(brw, src_mt, 0); intel_miptree_all_slices_resolve_hiz(brw, dst_mt); intel_miptree_all_slices_resolve_depth(brw, dst_mt); - intel_miptree_resolve_color(brw, dst_mt); + intel_miptree_resolve_color(brw, dst_mt, 0); _mesa_get_format_block_size(src_mt->format, &bw, &bh); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 108dd87dd8b..6c233d84df9 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -161,8 +161,9 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt, } } -static bool -intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) +bool +intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, + unsigned tiling) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p326): @@ -200,9 +201,9 @@ intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling) * - MCS and Lossless compression is supported for TiledY/TileYs/TileYf * non-MSRTs only. */ -static bool +bool intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, - struct intel_mipmap_tree *mt) + const struct intel_mipmap_tree *mt) { /* MCS support does not exist prior to Gen7 */ if (brw->gen < 7) @@ -266,6 +267,32 @@ intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, return true; } +/* On Gen9 support for color buffer compression was extended to single + * sampled surfaces. This is a helper considering both auxiliary buffer + * type and number of samples telling if the given miptree represents + * the new single sampled case - also called lossless compression. + */ +bool +intel_miptree_is_lossless_compressed(const struct brw_context *brw, + const struct intel_mipmap_tree *mt) +{ + /* Only available from Gen9 onwards. */ + if (brw->gen < 9) + return false; + + /* Compression always requires auxiliary buffer. */ + if (!mt->mcs_mt) + return false; + + /* Single sample compression is represented re-using msaa compression + * layout type: "Compressed Multisampled Surfaces". + */ + if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS) + return false; + + /* And finally distinguish between msaa and single sample case. */ + return mt->num_samples <= 1; +} /** * Determine depth format corresponding to a depth+stencil format, @@ -609,22 +636,21 @@ intel_get_yf_ys_bo_size(struct intel_mipmap_tree *mt, unsigned *alignment, return size; } -struct intel_mipmap_tree * -intel_miptree_create(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint num_samples, - uint32_t layout_flags) +static struct intel_mipmap_tree * +miptree_create(struct brw_context *brw, + GLenum target, + mesa_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint num_samples, + uint32_t layout_flags) { struct intel_mipmap_tree *mt; mesa_format tex_format = format; mesa_format etc_format = MESA_FORMAT_NONE; - GLuint total_width, total_height; uint32_t alloc_flags = 0; format = intel_lower_compressed_format(brw, format); @@ -645,21 +671,8 @@ intel_miptree_create(struct brw_context *brw, return NULL; } - total_width = mt->total_width; - total_height = mt->total_height; - - if (format == MESA_FORMAT_S_UINT8) { - /* Align to size of W tile, 64x64. */ - total_width = ALIGN(total_width, 64); - total_height = ALIGN(total_height, 64); - } - - bool y_or_x = false; - - if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) { - y_or_x = true; + if (mt->tiling == (I915_TILING_Y | I915_TILING_X)) mt->tiling = I915_TILING_Y; - } if (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) alloc_flags |= BO_ALLOC_FOR_RENDER; @@ -675,26 +688,61 @@ intel_miptree_create(struct brw_context *brw, mt->bo = drm_intel_bo_alloc_for_render(brw->bufmgr, "miptree", size, alignment); } else { - mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, - &mt->tiling, &pitch, - alloc_flags); + if (format == MESA_FORMAT_S_UINT8) { + /* Align to size of W tile, 64x64. */ + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", + ALIGN(mt->total_width, 64), + ALIGN(mt->total_height, 64), + mt->cpp, &mt->tiling, &pitch, + alloc_flags); + } else { + mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", + mt->total_width, mt->total_height, + mt->cpp, &mt->tiling, &pitch, + alloc_flags); + } } mt->pitch = pitch; + return mt; +} + +struct intel_mipmap_tree * +intel_miptree_create(struct brw_context *brw, + GLenum target, + mesa_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint num_samples, + uint32_t layout_flags) +{ + struct intel_mipmap_tree *mt = miptree_create( + brw, target, format, + first_level, last_level, + width0, height0, depth0, num_samples, + layout_flags); + /* If the BO is too large to fit in the aperture, we need to use the * BLT engine to support it. Prior to Sandybridge, the BLT paths can't * handle Y-tiling, so we need to fall back to X. */ - if (brw->gen < 6 && y_or_x && mt->bo->size >= brw->max_gtt_map_object_size) { + if (brw->gen < 6 && mt->bo->size >= brw->max_gtt_map_object_size && + mt->tiling == I915_TILING_Y) { + unsigned long pitch = mt->pitch; + const uint32_t alloc_flags = + (layout_flags & MIPTREE_LAYOUT_ACCELERATED_UPLOAD) ? + BO_ALLOC_FOR_RENDER : 0; perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", mt->total_width, mt->total_height); mt->tiling = I915_TILING_X; drm_intel_bo_unreference(mt->bo); mt->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "miptree", - total_width, total_height, mt->cpp, + mt->total_width, mt->total_height, mt->cpp, &mt->tiling, &pitch, alloc_flags); mt->pitch = pitch; } @@ -1402,6 +1450,27 @@ intel_miptree_copy_teximage(struct brw_context *brw, intel_obj->needs_validate = true; } +static void +intel_miptree_init_mcs(struct brw_context *brw, + struct intel_mipmap_tree *mt, + int init_value) +{ + /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: + * + * When MCS buffer is enabled and bound to MSRT, it is required that it + * is cleared prior to any rendering. + * + * Since we don't use the MCS buffer for any purpose other than rendering, + * it makes sense to just clear it immediately upon allocation. + * + * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. + */ + void *data = intel_miptree_map_raw(brw, mt->mcs_mt); + memset(data, init_value, mt->mcs_mt->total_height * mt->mcs_mt->pitch); + intel_miptree_unmap_raw(mt->mcs_mt); + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; +} + static bool intel_miptree_alloc_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -1447,31 +1516,18 @@ intel_miptree_alloc_mcs(struct brw_context *brw, */ const uint32_t mcs_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD | MIPTREE_LAYOUT_TILING_Y; - mt->mcs_mt = intel_miptree_create(brw, - mt->target, - format, - mt->first_level, - mt->last_level, - mt->logical_width0, - mt->logical_height0, - mt->logical_depth0, - 0 /* num_samples */, - mcs_flags); - - /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: - * - * When MCS buffer is enabled and bound to MSRT, it is required that it - * is cleared prior to any rendering. - * - * Since we don't use the MCS buffer for any purpose other than rendering, - * it makes sense to just clear it immediately upon allocation. - * - * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. - */ - void *data = intel_miptree_map_raw(brw, mt->mcs_mt); - memset(data, 0xff, mt->mcs_mt->total_height * mt->mcs_mt->pitch); - intel_miptree_unmap_raw(mt->mcs_mt); - mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; + mt->mcs_mt = miptree_create(brw, + mt->target, + format, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + 0 /* num_samples */, + mcs_flags); + + intel_miptree_init_mcs(brw, mt, 0xFF); return mt->mcs_mt; } @@ -1520,16 +1576,16 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, if (brw->gen >= 8) { layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16; } - mt->mcs_mt = intel_miptree_create(brw, - mt->target, - format, - mt->first_level, - mt->last_level, - mcs_width, - mcs_height, - mt->logical_depth0, - 0 /* num_samples */, - layout_flags); + mt->mcs_mt = miptree_create(brw, + mt->target, + format, + mt->first_level, + mt->last_level, + mcs_width, + mcs_height, + mt->logical_depth0, + 0 /* num_samples */, + layout_flags); return mt->mcs_mt; } @@ -1991,8 +2047,17 @@ intel_miptree_all_slices_resolve_depth(struct brw_context *brw, void intel_miptree_resolve_color(struct brw_context *brw, - struct intel_mipmap_tree *mt) + struct intel_mipmap_tree *mt, + int flags) { + /* From gen9 onwards there is new compression scheme for single sampled + * surfaces called "lossless compressed". These don't need to be always + * resolved. + */ + if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) && + intel_miptree_is_lossless_compressed(brw, mt)) + return; + switch (mt->fast_clear_state) { case INTEL_FAST_CLEAR_STATE_NO_MCS: case INTEL_FAST_CLEAR_STATE_RESOLVED: @@ -2001,8 +2066,10 @@ intel_miptree_resolve_color(struct brw_context *brw, case INTEL_FAST_CLEAR_STATE_UNRESOLVED: case INTEL_FAST_CLEAR_STATE_CLEAR: /* Fast color clear resolves only make sense for non-MSAA buffers. */ - if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || + intel_miptree_is_lossless_compressed(brw, mt)) { brw_meta_resolve_color(brw, mt); + } break; } } @@ -2029,7 +2096,7 @@ intel_miptree_make_shareable(struct brw_context *brw, assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE); if (mt->mcs_mt) { - intel_miptree_resolve_color(brw, mt); + intel_miptree_resolve_color(brw, mt, 0); intel_miptree_release(&mt->mcs_mt); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; } @@ -2137,7 +2204,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt) /* CPU accesses to color buffers don't understand fast color clears, so * resolve any pending fast color clears before we map. */ - intel_miptree_resolve_color(brw, mt); + intel_miptree_resolve_color(brw, mt, 0); drm_intel_bo *bo = mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 64f73ea9ae5..7cdfb37ff41 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -667,6 +667,18 @@ intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height); bool +intel_miptree_is_lossless_compressed(const struct brw_context *brw, + const struct intel_mipmap_tree *mt); + +bool +intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw, + unsigned tiling); + +bool +intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw, + const struct intel_mipmap_tree *mt); + +bool intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt); @@ -884,9 +896,19 @@ intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt) mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_UNRESOLVED; } +/** + * Flag values telling color resolve pass which special types of buffers + * can be ignored. + * + * INTEL_MIPTREE_IGNORE_CCS_E: Lossless compressed (single-sample + * compression scheme since gen9) + */ +#define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0) + void intel_miptree_resolve_color(struct brw_context *brw, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + int flags); void intel_miptree_make_shareable(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index 699e48a297c..54a741395eb 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -257,7 +257,7 @@ do_blit_bitmap( struct gl_context *ctx, /* The blitter has no idea about fast color clears, so we need to resolve * the miptree before we do anything. */ - intel_miptree_resolve_color(brw, irb->mt); + intel_miptree_resolve_color(brw, irb->mt, 0); /* Chop it all into chunks that can be digested by hardware: */ for (py = 0; py < height; py += DY) { diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 10d14623fe1..31030b1b4ea 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -155,7 +155,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, /* Since we are going to read raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ - intel_miptree_resolve_color(brw, irb->mt); + intel_miptree_resolve_color(brw, irb->mt, 0); bo = irb->mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 5d32a4ce650..e21c3ac543f 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -423,7 +423,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, /* Since we are going to write raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ - intel_miptree_resolve_color(brw, image->mt); + intel_miptree_resolve_color(brw, image->mt, 0); bo = image->mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 970ded1e66b..573f701acdd 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -140,7 +140,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, /* Since we are going to write raw data to the miptree, we need to resolve * any pending fast color clears before we start. */ - intel_miptree_resolve_color(brw, image->mt); + intel_miptree_resolve_color(brw, image->mt, 0); bo = image->mt->bo; diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 2cf9c13be33..3a811e10359 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -24,6 +24,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/samplerobj.h" +#include "main/teximage.h" #include "main/texobj.h" #include "brw_context.h" diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index 08f13178f84..61f703667aa 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -766,7 +766,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate) _mesa_set_enable(ctx, GL_TEXTURE_2D, !!(unit->Enabled & TEXTURE_2D_BIT)); _mesa_set_enable(ctx, GL_TEXTURE_3D, !!(unit->Enabled & TEXTURE_3D_BIT)); if (ctx->Extensions.ARB_texture_cube_map) { - _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP_ARB, + _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, !!(unit->Enabled & TEXTURE_CUBE_BIT)); } if (ctx->Extensions.NV_texture_rectangle) { @@ -837,7 +837,7 @@ pop_texture_group(struct gl_context *ctx, struct texture_state *texstate) /* don't restore state for unsupported targets to prevent * raising GL errors. */ - if (obj->Target == GL_TEXTURE_CUBE_MAP_ARB && + if (obj->Target == GL_TEXTURE_CUBE_MAP && !ctx->Extensions.ARB_texture_cube_map) { continue; } diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index de1aba44c1b..9aec42508a7 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -3007,8 +3007,8 @@ set_atomic_buffer_binding(struct gl_context *ctx, _mesa_reference_buffer_object(ctx, &binding->BufferObject, bufObj); if (bufObj == ctx->Shared->NullBufferObj) { - binding->Offset = -1; - binding->Size = -1; + binding->Offset = 0; + binding->Size = 0; } else { binding->Offset = offset; binding->Size = size; diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 83e238ae825..26dafd1b786 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -58,10 +58,7 @@ supported_buffer_bitmask(const struct gl_context *ctx, if (_mesa_is_user_fbo(fb)) { /* A user-created renderbuffer */ - GLuint i; - for (i = 0; i < ctx->Const.MaxColorAttachments; i++) { - mask |= (BUFFER_BIT_COLOR0 << i); - } + mask = ((1 << ctx->Const.MaxColorAttachments) - 1) << BUFFER_COLOR0; } else { /* A window system framebuffer */ @@ -159,6 +156,9 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) case GL_COLOR_ATTACHMENT7_EXT: return BUFFER_BIT_COLOR7; default: + /* not an error, but also not supported */ + if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31) + return 1 << BUFFER_COUNT; /* error */ return BAD_MASK; } @@ -171,7 +171,7 @@ draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer) * renderbuffer (a BUFFER_* value). * return -1 for an invalid buffer. */ -static GLint +static gl_buffer_index read_buffer_enum_to_index(GLenum buffer) { switch (buffer) { @@ -214,6 +214,9 @@ read_buffer_enum_to_index(GLenum buffer) case GL_COLOR_ATTACHMENT7_EXT: return BUFFER_COLOR7; default: + /* not an error, but also not supported */ + if (buffer >= GL_COLOR_ATTACHMENT8 && buffer <= GL_COLOR_ATTACHMENT31) + return BUFFER_COUNT; /* error */ return -1; } @@ -221,7 +224,7 @@ read_buffer_enum_to_index(GLenum buffer) /** - * Called by glDrawBuffer(). + * Called by glDrawBuffer() and glNamedFramebufferDrawBuffer(). * Specify which renderbuffer(s) to draw into for the first color output. * <buffer> can name zero, one, two or four renderbuffers! * \sa _mesa_DrawBuffers @@ -242,9 +245,9 @@ read_buffer_enum_to_index(GLenum buffer) * * See the GL_EXT_framebuffer_object spec for more info. */ -void -_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller) +static void +draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { GLbitfield destMask; @@ -293,7 +296,7 @@ void GLAPIENTRY _mesa_DrawBuffer(GLenum buffer) { GET_CURRENT_CONTEXT(ctx); - _mesa_draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer"); + draw_buffer(ctx, ctx->DrawBuffer, buffer, "glDrawBuffer"); } @@ -312,22 +315,22 @@ _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf) else fb = ctx->WinSysDrawBuffer; - _mesa_draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer"); + draw_buffer(ctx, fb, buf, "glNamedFramebufferDrawBuffer"); } /** - * Called by glDrawBuffersARB; specifies the destination color renderbuffers - * for N fragment program color outputs. + * Called by glDrawBuffersARB() and glNamedFramebufferDrawBuffers() to specify + * the destination color renderbuffers for N fragment program color outputs. * \sa _mesa_DrawBuffer * \param n number of outputs * \param buffers array [n] of renderbuffer names. Unlike glDrawBuffer, the * names cannot specify more than one buffer. For example, * GL_FRONT_AND_BACK is illegal. */ -void -_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, - GLsizei n, const GLenum *buffers, const char *caller) +static void +draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, + GLsizei n, const GLenum *buffers, const char *caller) { GLuint output; GLbitfield usedBufferMask, supportedMask; @@ -502,7 +505,7 @@ void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) { GET_CURRENT_CONTEXT(ctx); - _mesa_draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers"); + draw_buffers(ctx, ctx->DrawBuffer, n, buffers, "glDrawBuffers"); } @@ -522,7 +525,7 @@ _mesa_NamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, else fb = ctx->WinSysDrawBuffer; - _mesa_draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers"); + draw_buffers(ctx, fb, n, bufs, "glNamedFramebufferDrawBuffers"); } @@ -545,8 +548,8 @@ updated_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb) /** - * Helper function to set the GL_DRAW_BUFFER state in the context and - * current FBO. Called via glDrawBuffer(), glDrawBuffersARB() + * Helper function to set the GL_DRAW_BUFFER state for the given context and + * FBO. Called via glDrawBuffer(), glDrawBuffersARB() * * All error checking will have been done prior to calling this function * so nothing should go wrong at this point. @@ -662,14 +665,17 @@ _mesa_update_draw_buffers(struct gl_context *ctx) /** * Like \sa _mesa_drawbuffers(), this is a helper function for setting - * GL_READ_BUFFER state in the context and current FBO. + * GL_READ_BUFFER state for the given context and FBO. + * Note that all error checking should have been done before calling + * this function. * \param ctx the rendering context + * \param fb the framebuffer object to update * \param buffer GL_FRONT, GL_BACK, GL_COLOR_ATTACHMENT0, etc. * \param bufferIndex the numerical index corresponding to 'buffer' */ void _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, GLint bufferIndex) + GLenum buffer, gl_buffer_index bufferIndex) { if ((fb == ctx->ReadBuffer) && _mesa_is_winsys_fbo(fb)) { /* Only update the per-context READ_BUFFER state if we're bound to @@ -687,15 +693,16 @@ _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, /** - * Called by glReadBuffer to set the source renderbuffer for reading pixels. + * Called by glReadBuffer and glNamedFramebufferReadBuffer to set the source + * renderbuffer for reading pixels. * \param mode color buffer such as GL_FRONT, GL_BACK, etc. */ -void -_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller) +static void +read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, + GLenum buffer, const char *caller) { GLbitfield supportedMask; - GLint srcBuffer; + gl_buffer_index srcBuffer; FLUSH_VERTICES(ctx, 0); @@ -740,7 +747,7 @@ void GLAPIENTRY _mesa_ReadBuffer(GLenum buffer) { GET_CURRENT_CONTEXT(ctx); - _mesa_read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); + read_buffer(ctx, ctx->ReadBuffer, buffer, "glReadBuffer"); } @@ -759,5 +766,5 @@ _mesa_NamedFramebufferReadBuffer(GLuint framebuffer, GLenum src) else fb = ctx->WinSysReadBuffer; - _mesa_read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer"); + read_buffer(ctx, fb, src, "glNamedFramebufferReadBuffer"); } diff --git a/src/mesa/main/buffers.h b/src/mesa/main/buffers.h index 5aa79fda54b..9df08154688 100644 --- a/src/mesa/main/buffers.h +++ b/src/mesa/main/buffers.h @@ -34,13 +34,11 @@ #include "glheader.h" +#include "mtypes.h" struct gl_context; struct gl_framebuffer; -extern void -_mesa_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller); extern void GLAPIENTRY _mesa_DrawBuffer( GLenum mode ); @@ -48,10 +46,6 @@ _mesa_DrawBuffer( GLenum mode ); extern void GLAPIENTRY _mesa_NamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf); -extern void -_mesa_draw_buffers(struct gl_context *ctx, struct gl_framebuffer *fb, - GLsizei n, const GLenum *buffers, const char *caller); - extern void GLAPIENTRY _mesa_DrawBuffers(GLsizei n, const GLenum *buffers); @@ -66,16 +60,12 @@ _mesa_drawbuffers(struct gl_context *ctx, struct gl_framebuffer *fb, extern void _mesa_readbuffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, GLint bufferIndex); + GLenum buffer, gl_buffer_index bufferIndex); extern void _mesa_update_draw_buffers(struct gl_context *ctx); -extern void -_mesa_read_buffer(struct gl_context *ctx, struct gl_framebuffer *fb, - GLenum buffer, const char *caller); - extern void GLAPIENTRY _mesa_ReadBuffer( GLenum mode ); diff --git a/src/mesa/main/clear.c b/src/mesa/main/clear.c index 3bfcc5c0e39..92f69ab9b59 100644 --- a/src/mesa/main/clear.c +++ b/src/mesa/main/clear.c @@ -325,18 +325,6 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) _mesa_update_state( ctx ); } - /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' - * of the OpenGL 4.5 spec states: - * - * "An INVALID_ENUM error is generated by ClearBufferiv and - * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL." - */ - if (buffer == GL_DEPTH || buffer == GL_DEPTH_STENCIL) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glClearBufferiv(buffer=GL_DEPTH || GL_DEPTH_STENCIL)"); - return; - } - switch (buffer) { case GL_STENCIL: /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: @@ -386,26 +374,13 @@ _mesa_ClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint *value) } } break; - case GL_DEPTH: - /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: - * - * "The result of ClearBuffer is undefined if no conversion between - * the type of the specified value and the type of the buffer being - * cleared is defined (for example, if ClearBufferiv is called for a - * fixed- or floating-point buffer, or if ClearBufferfv is called - * for a signed or unsigned integer buffer). This is not an error." + default: + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: * - * In this case we take "undefined" and "not an error" to mean "ignore." - * Note that we still need to generate an error for the invalid - * drawbuffer case (see the GL_STENCIL case above). + * "An INVALID_ENUM error is generated by ClearBufferiv and + * ClearNamedFramebufferiv if buffer is not COLOR or STENCIL." */ - if (drawbuffer != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferiv(drawbuffer=%d)", - drawbuffer); - return; - } - return; - default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferiv(buffer=%s)", _mesa_enum_to_string(buffer)); return; @@ -470,32 +445,13 @@ _mesa_ClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint *value) } } break; - case GL_DEPTH: - case GL_STENCIL: - /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: - * - * "The result of ClearBuffer is undefined if no conversion between - * the type of the specified value and the type of the buffer being - * cleared is defined (for example, if ClearBufferiv is called for a - * fixed- or floating-point buffer, or if ClearBufferfv is called - * for a signed or unsigned integer buffer). This is not an error." - * - * In this case we take "undefined" and "not an error" to mean "ignore." - * Even though we could do something sensible for GL_STENCIL, page 263 - * (page 279 of the PDF) says: - * - * "Only ClearBufferiv should be used to clear stencil buffers." + default: + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: * - * Note that we still need to generate an error for the invalid - * drawbuffer case (see the GL_STENCIL case in _mesa_ClearBufferiv). + * "An INVALID_ENUM error is generated by ClearBufferuiv and + * ClearNamedFramebufferuiv if buffer is not COLOR." */ - if (drawbuffer != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferuiv(drawbuffer=%d)", - drawbuffer); - return; - } - return; - default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferuiv(buffer=%s)", _mesa_enum_to_string(buffer)); return; @@ -587,26 +543,13 @@ _mesa_ClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat *value) } } break; - case GL_STENCIL: - /* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says: - * - * "The result of ClearBuffer is undefined if no conversion between - * the type of the specified value and the type of the buffer being - * cleared is defined (for example, if ClearBufferiv is called for a - * fixed- or floating-point buffer, or if ClearBufferfv is called - * for a signed or unsigned integer buffer). This is not an error." + default: + /* Page 498 of the PDF, section '17.4.3.1 Clearing Individual Buffers' + * of the OpenGL 4.5 spec states: * - * In this case we take "undefined" and "not an error" to mean "ignore." - * Note that we still need to generate an error for the invalid - * drawbuffer case (see the GL_DEPTH case above). + * "An INVALID_ENUM error is generated by ClearBufferfv and + * ClearNamedFramebufferfv if buffer is not COLOR or DEPTH." */ - if (drawbuffer != 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glClearBufferfv(drawbuffer=%d)", - drawbuffer); - return; - } - return; - default: _mesa_error(ctx, GL_INVALID_ENUM, "glClearBufferfv(buffer=%s)", _mesa_enum_to_string(buffer)); return; diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c index 53e7a500f61..b71430f2b12 100644 --- a/src/mesa/main/compute.c +++ b/src/mesa/main/compute.c @@ -41,6 +41,9 @@ _mesa_DispatchCompute(GLuint num_groups_x, if (!_mesa_validate_DispatchCompute(ctx, num_groups)) return; + if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u) + return; + ctx->Driver.DispatchCompute(ctx, num_groups); } diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 2d53e2fa72f..820ae072da6 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -288,17 +288,6 @@ #define PERFQUERY_HAVE_GPA_EXTENDED_COUNTERS 0 /*@}*/ -/** For GL_ARB_compute_shader */ -/*@{*/ -#define MAX_COMPUTE_UNIFORM_BLOCKS 12 -#define MAX_COMPUTE_TEXTURE_IMAGE_UNITS 16 -#define MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 8 -#define MAX_COMPUTE_ATOMIC_COUNTERS 8 -#define MAX_COMPUTE_SHARED_MEMORY_SIZE 32768 -#define MAX_COMPUTE_UNIFORM_COMPONENTS 512 -#define MAX_COMPUTE_IMAGE_UNIFORMS 8 -/*@}*/ - /** For GL_ARB_pipeline_statistics_query */ #define MAX_PIPELINE_STATISTICS 11 diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 9388a1ca51d..26eee28db4e 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1600,9 +1600,6 @@ _mesa_check_init_viewport(struct gl_context *ctx, GLuint width, GLuint height) static void handle_first_current(struct gl_context *ctx) { - GLenum buffer; - GLint bufferIndex; - if (ctx->Version == 0) { /* probably in the process of tearing down the context */ return; @@ -1617,6 +1614,8 @@ handle_first_current(struct gl_context *ctx) * For GLES it is always GL_BACK which has a magic interpretation */ if (!ctx->HasConfig && _mesa_is_desktop_gl(ctx)) { if (ctx->DrawBuffer != _mesa_get_incomplete_framebuffer()) { + GLenum buffer; + if (ctx->DrawBuffer->Visual.doubleBufferMode) buffer = GL_BACK; else @@ -1627,6 +1626,9 @@ handle_first_current(struct gl_context *ctx) } if (ctx->ReadBuffer != _mesa_get_incomplete_framebuffer()) { + gl_buffer_index bufferIndex; + GLenum buffer; + if (ctx->ReadBuffer->Visual.doubleBufferMode) { buffer = GL_BACK; bufferIndex = BUFFER_BACK_LEFT; diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 19ef3042548..3f5aa5db051 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -35,6 +35,7 @@ #include "glheader.h" +struct gl_bitmap_atlas; struct gl_buffer_object; struct gl_context; struct gl_display_list; @@ -154,6 +155,14 @@ struct dd_function_table { GLint x, GLint y, GLsizei width, GLsizei height, const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ); + + /** + * Called by display list code for optimized glCallLists/glBitmap rendering + * The driver must support texture rectangles of width 1024 or more. + */ + void (*DrawAtlasBitmaps)(struct gl_context *ctx, + const struct gl_bitmap_atlas *atlas, + GLuint count, const GLubyte *ids); /*@}*/ diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index 0e25efbae72..afd2d83cb59 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -72,6 +72,9 @@ #include "vbo/vbo.h" +#define USE_BITMAP_ATLAS 1 + + /** * Other parts of Mesa (such as the VBO module) can plug into the display @@ -606,6 +609,261 @@ void mesa_print_display_list(GLuint list); /** + * Does the given display list only contain a single glBitmap call? + */ +static bool +is_bitmap_list(const struct gl_display_list *dlist) +{ + const Node *n = dlist->Head; + if (n[0].opcode == OPCODE_BITMAP) { + n += InstSize[OPCODE_BITMAP]; + if (n[0].opcode == OPCODE_END_OF_LIST) + return true; + } + return false; +} + + +/** + * Is the given display list an empty list? + */ +static bool +is_empty_list(const struct gl_display_list *dlist) +{ + const Node *n = dlist->Head; + return n[0].opcode == OPCODE_END_OF_LIST; +} + + +/** + * Delete/free a gl_bitmap_atlas. Called during context tear-down. + */ +void +_mesa_delete_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas) +{ + if (atlas->texObj) { + ctx->Driver.DeleteTexture(ctx, atlas->texObj); + } + free(atlas->glyphs); +} + + +/** + * Lookup a gl_bitmap_atlas by listBase ID. + */ +static struct gl_bitmap_atlas * +lookup_bitmap_atlas(struct gl_context *ctx, GLuint listBase) +{ + struct gl_bitmap_atlas *atlas; + + assert(listBase > 0); + atlas = _mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase); + return atlas; +} + + +/** + * Create new bitmap atlas and insert into hash table. + */ +static struct gl_bitmap_atlas * +alloc_bitmap_atlas(struct gl_context *ctx, GLuint listBase) +{ + struct gl_bitmap_atlas *atlas; + + assert(listBase > 0); + assert(_mesa_HashLookup(ctx->Shared->BitmapAtlas, listBase) == NULL); + + atlas = calloc(1, sizeof(*atlas)); + if (atlas) { + _mesa_HashInsert(ctx->Shared->BitmapAtlas, listBase, atlas); + } + + return atlas; +} + + +/** + * Try to build a bitmap atlas. This involves examining a sequence of + * display lists which contain glBitmap commands and putting the bitmap + * images into a texture map (the atlas). + * If we succeed, gl_bitmap_atlas::complete will be set to true. + * If we fail, gl_bitmap_atlas::incomplete will be set to true. + */ +static void +build_bitmap_atlas(struct gl_context *ctx, struct gl_bitmap_atlas *atlas, + GLuint listBase) +{ + unsigned i, row_height = 0, xpos = 0, ypos = 0; + GLubyte *map; + GLint map_stride; + + assert(atlas); + assert(!atlas->complete); + assert(atlas->numBitmaps > 0); + + /* We use a rectangle texture (non-normalized coords) for the atlas */ + assert(ctx->Extensions.NV_texture_rectangle); + assert(ctx->Const.MaxTextureRectSize >= 1024); + + atlas->texWidth = 1024; + atlas->texHeight = 0; /* determined below */ + + atlas->glyphs = malloc(atlas->numBitmaps * sizeof(atlas->glyphs[0])); + if (!atlas->glyphs) { + /* give up */ + atlas->incomplete = true; + return; + } + + /* Loop over the display lists. They should all contain a single glBitmap + * call. If not, bail out. Also, compute the position and sizes of each + * bitmap in the atlas to determine the texture atlas size. + */ + for (i = 0; i < atlas->numBitmaps; i++) { + const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i); + const Node *n; + struct gl_bitmap_glyph *g = &atlas->glyphs[i]; + unsigned bitmap_width, bitmap_height; + float bitmap_xmove, bitmap_ymove, bitmap_xorig, bitmap_yorig; + + if (!list || is_empty_list(list)) { + /* stop here */ + atlas->numBitmaps = i; + break; + } + + if (!is_bitmap_list(list)) { + /* This list does not contain exactly one glBitmap command. Give up. */ + atlas->incomplete = true; + return; + } + + /* get bitmap info from the display list command */ + n = list->Head; + assert(n[0].opcode == OPCODE_BITMAP); + bitmap_width = n[1].i; + bitmap_height = n[2].i; + bitmap_xorig = n[3].f; + bitmap_yorig = n[4].f; + bitmap_xmove = n[5].f; + bitmap_ymove = n[6].f; + + if (xpos + bitmap_width > atlas->texWidth) { + /* advance to the next row of the texture */ + xpos = 0; + ypos += row_height; + row_height = 0; + } + + /* save the bitmap's position in the atlas */ + g->x = xpos; + g->y = ypos; + g->w = bitmap_width; + g->h = bitmap_height; + g->xorig = bitmap_xorig; + g->yorig = bitmap_yorig; + g->xmove = bitmap_xmove; + g->ymove = bitmap_ymove; + + xpos += bitmap_width; + + /* keep track of tallest bitmap in the row */ + row_height = MAX2(row_height, bitmap_height); + } + + /* Now we know the texture height */ + atlas->texHeight = ypos + row_height; + + if (atlas->texHeight == 0) { + /* no glyphs found, give up */ + goto fail; + } + else if (atlas->texHeight > ctx->Const.MaxTextureRectSize) { + /* too large, give up */ + goto fail; + } + + /* Create atlas texture (texture ID is irrelevant) */ + atlas->texObj = ctx->Driver.NewTextureObject(ctx, 999, GL_TEXTURE_RECTANGLE); + if (!atlas->texObj) { + goto out_of_memory; + } + + atlas->texObj->Sampler.MinFilter = GL_NEAREST; + atlas->texObj->Sampler.MagFilter = GL_NEAREST; + atlas->texObj->MaxLevel = 0; + atlas->texObj->Immutable = GL_TRUE; + + atlas->texImage = _mesa_get_tex_image(ctx, atlas->texObj, + GL_TEXTURE_RECTANGLE, 0); + if (!atlas->texImage) { + goto out_of_memory; + } + + _mesa_init_teximage_fields(ctx, atlas->texImage, + atlas->texWidth, atlas->texHeight, 1, 0, + GL_ALPHA, MESA_FORMAT_A_UNORM8); + + /* alloc image storage */ + if (!ctx->Driver.AllocTextureImageBuffer(ctx, atlas->texImage)) { + goto out_of_memory; + } + + /* map teximage, load with bitmap glyphs */ + ctx->Driver.MapTextureImage(ctx, atlas->texImage, 0, + 0, 0, atlas->texWidth, atlas->texHeight, + GL_MAP_WRITE_BIT, &map, &map_stride); + if (!map) { + goto out_of_memory; + } + + /* Background/clear pixels are 0xff, foreground/set pixels are 0x0 */ + memset(map, 0xff, map_stride * atlas->texHeight); + + for (i = 0; i < atlas->numBitmaps; i++) { + const struct gl_display_list *list = _mesa_lookup_list(ctx, listBase + i); + const Node *n = list->Head; + + assert(n[0].opcode == OPCODE_BITMAP || + n[0].opcode == OPCODE_END_OF_LIST); + + if (n[0].opcode == OPCODE_BITMAP) { + unsigned bitmap_width = n[1].i; + unsigned bitmap_height = n[2].i; + unsigned xpos = atlas->glyphs[i].x; + unsigned ypos = atlas->glyphs[i].y; + const void *bitmap_image = get_pointer(&n[7]); + + assert(atlas->glyphs[i].w == bitmap_width); + assert(atlas->glyphs[i].h == bitmap_height); + + /* put the bitmap image into the texture image */ + _mesa_expand_bitmap(bitmap_width, bitmap_height, + &ctx->DefaultPacking, bitmap_image, + map + map_stride * ypos + xpos, /* dest addr */ + map_stride, 0x0); + } + } + + ctx->Driver.UnmapTextureImage(ctx, atlas->texImage, 0); + + atlas->complete = true; + + return; + +out_of_memory: + _mesa_error(ctx, GL_OUT_OF_MEMORY, "Display list bitmap atlas"); +fail: + if (atlas->texObj) { + ctx->Driver.DeleteTexture(ctx, atlas->texObj); + } + free(atlas->glyphs); + atlas->glyphs = NULL; + atlas->incomplete = true; +} + + +/** * Allocate a gl_display_list object with an initial block of storage. * \param count how many display list nodes/tokens to allocate */ @@ -856,6 +1114,30 @@ _mesa_delete_list(struct gl_context *ctx, struct gl_display_list *dlist) /** + * Called by _mesa_HashWalk() to check if a display list which is being + * deleted belongs to a bitmap texture atlas. + */ +static void +check_atlas_for_deleted_list(GLuint atlas_id, void *data, void *userData) +{ + struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data; + GLuint list_id = *((GLuint *) userData); /* the list being deleted */ + + /* See if the list_id falls in the range contained in this texture atlas */ + if (atlas->complete && + list_id >= atlas_id && + list_id < atlas_id + atlas->numBitmaps) { + /* Mark the atlas as incomplete so it doesn't get used. But don't + * delete it yet since we don't want to try to recreate it in the next + * glCallLists. + */ + atlas->complete = false; + atlas->incomplete = true; + } +} + + +/** * Destroy a display list and remove from hash table. * \param list - display list number */ @@ -871,6 +1153,16 @@ destroy_list(struct gl_context *ctx, GLuint list) if (!dlist) return; + if (is_bitmap_list(dlist)) { + /* If we're destroying a simple glBitmap display list, there's a + * chance that we're destroying a bitmap image that's in a texture + * atlas. Examine all atlases to see if that's the case. There's + * usually few (if any) atlases so this isn't expensive. + */ + _mesa_HashWalk(ctx->Shared->BitmapAtlas, + check_atlas_for_deleted_list, &list); + } + _mesa_delete_list(ctx, dlist); _mesa_HashRemove(ctx->Shared->DisplayList, list); } @@ -8895,6 +9187,18 @@ _mesa_DeleteLists(GLuint list, GLsizei range) _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteLists"); return; } + + if (range > 1) { + /* We may be deleting a set of bitmap lists. See if there's a + * bitmap atlas to free. + */ + struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, list); + if (atlas) { + _mesa_delete_bitmap_atlas(ctx, atlas); + _mesa_HashRemove(ctx->Shared->BitmapAtlas, list); + } + } + for (i = list; i < list + range; i++) { destroy_list(ctx, i); } @@ -8936,6 +9240,24 @@ _mesa_GenLists(GLsizei range) } } + if (USE_BITMAP_ATLAS && + range > 16 && + ctx->Driver.DrawAtlasBitmaps) { + /* "range > 16" is a rough heuristic to guess when glGenLists might be + * used to allocate display lists for glXUseXFont or wglUseFontBitmaps. + * Create the empty atlas now. + */ + struct gl_bitmap_atlas *atlas = lookup_bitmap_atlas(ctx, base); + if (!atlas) { + atlas = alloc_bitmap_atlas(ctx, base); + } + if (atlas) { + /* Atlas _should_ be new/empty now, but clobbering is OK */ + assert(atlas->numBitmaps == 0); + atlas->numBitmaps = range; + } + } + mtx_unlock(&ctx->Shared->Mutex); return base; @@ -9085,6 +9407,65 @@ _mesa_CallList(GLuint list) /** + * Try to execute a glCallLists() command where the display lists contain + * glBitmap commands with a texture atlas. + * \return true for success, false otherwise + */ +static bool +render_bitmap_atlas(struct gl_context *ctx, GLsizei n, GLenum type, + const void *lists) +{ + struct gl_bitmap_atlas *atlas; + int i; + + if (!USE_BITMAP_ATLAS || + !ctx->Current.RasterPosValid || + ctx->List.ListBase == 0 || + type != GL_UNSIGNED_BYTE || + !ctx->Driver.DrawAtlasBitmaps) { + /* unsupported */ + return false; + } + + atlas = lookup_bitmap_atlas(ctx, ctx->List.ListBase); + + if (!atlas) { + /* Even if glGenLists wasn't called, we can still try to create + * the atlas now. + */ + atlas = alloc_bitmap_atlas(ctx, ctx->List.ListBase); + } + + if (atlas && !atlas->complete && !atlas->incomplete) { + /* Try to build the bitmap atlas now. + * If the atlas was created in glGenLists, we'll have recorded the + * number of lists (bitmaps). Otherwise, take a guess at 256. + */ + if (atlas->numBitmaps == 0) + atlas->numBitmaps = 256; + build_bitmap_atlas(ctx, atlas, ctx->List.ListBase); + } + + if (!atlas || !atlas->complete) { + return false; + } + + /* check that all display list IDs are in the atlas */ + for (i = 0; i < n; i++) { + const GLubyte *ids = (const GLubyte *) lists; + + if (ids[i] >= atlas->numBitmaps) { + return false; + } + } + + ctx->Driver.DrawAtlasBitmaps(ctx, atlas, n, (const GLubyte *) lists); + + return true; +} + + +/** * Execute glCallLists: call multiple display lists. */ void GLAPIENTRY @@ -9123,6 +9504,10 @@ _mesa_CallLists(GLsizei n, GLenum type, const GLvoid * lists) return; } + if (render_bitmap_atlas(ctx, n, type, lists)) { + return; + } + /* Save the CompileFlag status, turn it off, execute display list, * and restore the CompileFlag. */ diff --git a/src/mesa/main/dlist.h b/src/mesa/main/dlist.h index 7a23208ba5a..22b696f50c1 100644 --- a/src/mesa/main/dlist.h +++ b/src/mesa/main/dlist.h @@ -36,6 +36,44 @@ #include "main/mtypes.h" +/** + * Describes the location and size of a glBitmap image in a texture atlas. + */ +struct gl_bitmap_glyph +{ + unsigned short x, y, w, h; /**< position and size in the texture */ + float xorig, yorig; /**< bitmap origin */ + float xmove, ymove; /**< rasterpos move */ +}; + + +/** + * Describes a set of glBitmap display lists which live in a texture atlas. + * The idea is when we see a code sequence of glListBase(b), glCallLists(n) + * we're probably drawing bitmap font glyphs. We try to put all the bitmap + * glyphs into one texture map then render the glCallLists as a textured + * quadstrip. + */ +struct gl_bitmap_atlas +{ + bool complete; /**< Is the atlas ready to use? */ + bool incomplete; /**< Did we fail to construct this atlas? */ + + unsigned numBitmaps; + unsigned texWidth, texHeight; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + + unsigned glyphHeight; + + struct gl_bitmap_glyph *glyphs; +}; + +void +_mesa_delete_bitmap_atlas(struct gl_context *ctx, + struct gl_bitmap_atlas *atlas); + + GLboolean GLAPIENTRY _mesa_IsList(GLuint list); diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 3fd3c2747ea..3985457f21a 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -749,7 +749,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state) return; /* GL_ARB_texture_cube_map */ - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES) goto invalid_enum_error; CHECK_EXTENSION(ARB_texture_cube_map, cap); @@ -1450,7 +1450,7 @@ _mesa_IsEnabled( GLenum cap ) return ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled; /* GL_ARB_texture_cube_map */ - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: CHECK_EXTENSION(ARB_texture_cube_map); return is_texture_enabled(ctx, TEXTURE_CUBE_BIT); diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index d1e3a99fdc0..68f36178f32 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -224,6 +224,7 @@ EXT(EXT_subtexture , dummy_true EXT(EXT_texture , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture3D , dummy_true , GLL, x , x , x , 1996) EXT(EXT_texture_array , EXT_texture_array , GLL, GLC, x , x , 2006) +EXT(EXT_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) EXT(EXT_texture_compression_dxt1 , ANGLE_texture_compression_dxt , GLL, GLC, ES1, ES2, 2004) EXT(EXT_texture_compression_latc , EXT_texture_compression_latc , GLL, x , x , x , 2006) EXT(EXT_texture_compression_rgtc , ARB_texture_compression_rgtc , GLL, GLC, x , x , 2004) @@ -325,6 +326,7 @@ EXT(OES_point_sprite , ARB_point_sprite EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003) EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003) EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005) +EXT(OES_shader_image_atomic , ARB_shader_image_load_store , x , x , x , 31, 2015) EXT(OES_single_precision , dummy_true , x , x , ES1, x , 2003) EXT(OES_standard_derivatives , OES_standard_derivatives , x , x , x , ES2, 2005) EXT(OES_stencil1 , dummy_false , x , x , x , x , 2005) @@ -333,6 +335,7 @@ EXT(OES_stencil8 , dummy_true EXT(OES_stencil_wrap , dummy_true , x , x , ES1, x , 2002) EXT(OES_surfaceless_context , dummy_true , x , x , ES1, ES2, 2012) EXT(OES_texture_3D , dummy_true , x , x , x , ES2, 2005) +EXT(OES_texture_border_clamp , ARB_texture_border_clamp , x , x , x , ES2, 2014) EXT(OES_texture_cube_map , ARB_texture_cube_map , x , x , ES1, x , 2007) EXT(OES_texture_env_crossbar , ARB_texture_env_crossbar , x , x , ES1, x , 2005) EXT(OES_texture_float , OES_texture_float , x , x , x , ES2, 2005) @@ -341,6 +344,7 @@ EXT(OES_texture_half_float , OES_texture_half_float EXT(OES_texture_half_float_linear , OES_texture_half_float_linear , x , x , x , ES2, 2005) EXT(OES_texture_mirrored_repeat , dummy_true , x , x , ES1, x , 2005) EXT(OES_texture_npot , ARB_texture_non_power_of_two , x , x , ES1, ES2, 2005) +EXT(OES_texture_stencil8 , ARB_texture_stencil8 , x , x , x , 30, 2014) EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample , x , x , ES1, 31, 2014) EXT(OES_vertex_array_object , dummy_true , x , x , ES1, ES2, 2010) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 1b9b692f001..1f10050c891 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -811,7 +811,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format, break; } - baseFormat = _mesa_get_format_base_format(texImage->TexFormat); + baseFormat = texImage->_BaseFormat; if (format == GL_COLOR) { if (!_mesa_is_legal_color_format(ctx, baseFormat)) { @@ -868,8 +868,7 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format, } } else if (att->Type == GL_RENDERBUFFER_EXT) { - const GLenum baseFormat = - _mesa_get_format_base_format(att->Renderbuffer->Format); + const GLenum baseFormat = att->Renderbuffer->_BaseFormat; assert(att->Renderbuffer); if (!att->Renderbuffer->InternalFormat || diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 5fdabd5b97f..d16d69c3795 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -179,6 +179,63 @@ _mesa_compute_rgba2base2rgba_component_mapping(GLenum baseFormat, uint8_t *map) } } + +/** + * Special case conversion function to swap r/b channels from the source + * image to the dest image. + */ +static void +convert_ubyte_rgba_to_bgra(size_t width, size_t height, + const uint8_t *src, size_t src_stride, + uint8_t *dst, size_t dst_stride) +{ + int row; + + if (sizeof(void *) == 8 && + src_stride % 8 == 0 && + dst_stride % 8 == 0 && + (GLsizeiptr) src % 8 == 0 && + (GLsizeiptr) dst % 8 == 0) { + /* use 64-bit word to swizzle two 32-bit pixels. We need 8-byte + * alignment for src/dst addresses and strides. + */ + for (row = 0; row < height; row++) { + const GLuint64 *s = (const GLuint64 *) src; + GLuint64 *d = (GLuint64 *) dst; + int i; + for (i = 0; i < width/2; i++) { + d[i] = ( (s[i] & 0xff00ff00ff00ff00) | + ((s[i] & 0xff000000ff) << 16) | + ((s[i] & 0xff000000ff0000) >> 16)); + } + if (width & 1) { + /* handle the case of odd widths */ + const GLuint s = ((const GLuint *) src)[width - 1]; + GLuint *d = (GLuint *) dst + width - 1; + *d = ( (s & 0xff00ff00) | + ((s & 0xff) << 16) | + ((s & 0xff0000) >> 16)); + } + src += src_stride; + dst += dst_stride; + } + } else { + for (row = 0; row < height; row++) { + const GLuint *s = (const GLuint *) src; + GLuint *d = (GLuint *) dst; + int i; + for (i = 0; i < width; i++) { + d[i] = ( (s[i] & 0xff00ff00) | + ((s[i] & 0xff) << 16) | + ((s[i] & 0xff0000) >> 16)); + } + src += src_stride; + dst += dst_stride; + } + } +} + + /** * This can be used to convert between most color formats. * @@ -299,11 +356,18 @@ _mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride, return; } else if (src_array_format == RGBA8_UBYTE) { assert(!_mesa_is_format_integer_color(dst_format)); - for (row = 0; row < height; ++row) { - _mesa_pack_ubyte_rgba_row(dst_format, width, - (const uint8_t (*)[4])src, dst); - src += src_stride; - dst += dst_stride; + + if (dst_format == MESA_FORMAT_B8G8R8A8_UNORM) { + convert_ubyte_rgba_to_bgra(width, height, src, src_stride, + dst, dst_stride); + } + else { + for (row = 0; row < height; ++row) { + _mesa_pack_ubyte_rgba_row(dst_format, width, + (const uint8_t (*)[4])src, dst); + src += src_stride; + dst += dst_stride; + } } return; } else if (src_array_format == RGBA32_UINT && diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c index 85f7b6b5664..816f12bf9e2 100644 --- a/src/mesa/main/formatquery.c +++ b/src/mesa/main/formatquery.c @@ -131,11 +131,14 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, internalformat, buffer); break; case GL_NUM_SAMPLE_COUNTS: { - if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalformat)) { + if ((ctx->API == API_OPENGLES2 && ctx->Version == 30) && + _mesa_is_enum_format_integer(internalformat)) { /* From GL ES 3.0 specification, section 6.1.15 page 236: "Since * multisampling is not supported for signed and unsigned integer * internal formats, the value of NUM_SAMPLE_COUNTS will be zero * for such formats. + * + * Such a restriction no longer exists in GL ES 3.1. */ buffer[0] = 0; count = 1; diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c index 4ec8385ec2f..6c2d31dbcf3 100644 --- a/src/mesa/main/genmipmap.c +++ b/src/mesa/main/genmipmap.c @@ -123,7 +123,7 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx, GLuint face; for (face = 0; face < 6; face++) { ctx->Driver.GenerateMipmap(ctx, - GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + face, texObj); + GL_TEXTURE_CUBE_MAP_POSITIVE_X + face, texObj); } } else { diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 9005dc5897d..f40c5705813 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -642,7 +642,7 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_RECTANGLE_NV: case GL_TEXTURE_EXTERNAL_OES: v->value_bool = _mesa_IsEnabled(d->pname); diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 164095c103c..07d2d20df7a 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -455,13 +455,13 @@ descriptor=[ # GL_ARB_compute_shader / GLES 3.1 [ "MAX_COMPUTE_WORK_GROUP_INVOCATIONS", "CONTEXT_INT(Const.MaxComputeWorkGroupInvocations), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONST(MAX_COMPUTE_UNIFORM_BLOCKS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONST(MAX_COMPUTE_TEXTURE_IMAGE_UNITS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONST(MAX_COMPUTE_ATOMIC_COUNTERS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONST(MAX_COMPUTE_SHARED_MEMORY_SIZE), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ], - [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformBlocks), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_SHARED_MEMORY_SIZE", "CONTEXT_INT(Const.MaxComputeSharedMemorySize), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents), extra_ARB_compute_shader_es31" ], + [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms), extra_ARB_compute_shader_es31" ], [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ], [ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ], diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index f5284447b6e..987cd0db45c 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -3153,6 +3153,14 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx, } break; + case GL_STENCIL_INDEX: + if (!_mesa_has_OES_texture_stencil8(ctx) || + type != GL_UNSIGNED_BYTE || + internalFormat != GL_STENCIL_INDEX8) { + return GL_INVALID_OPERATION; + } + break; + case GL_ALPHA: case GL_LUMINANCE: case GL_LUMINANCE_ALPHA: diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 99f253cd373..4d6ab6f2b56 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -408,9 +408,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, const GLint srcStride = _mesa_image_row_stride(unpack, width, GL_COLOR_INDEX, GL_BITMAP); GLint row, col; - -#define SET_PIXEL(COL, ROW) \ - destBuffer[(ROW) * destStride + (COL)] = onValue; + GLubyte *dstRow = destBuffer; for (row = 0; row < height; row++) { const GLubyte *src = srcRow; @@ -421,7 +419,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, for (col = 0; col < width; col++) { if (*src & mask) { - SET_PIXEL(col, row); + dstRow[col] = onValue; } if (mask == 128U) { @@ -443,7 +441,7 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, for (col = 0; col < width; col++) { if (*src & mask) { - SET_PIXEL(col, row); + dstRow[col] = onValue; } if (mask == 1U) { @@ -461,9 +459,8 @@ _mesa_expand_bitmap(GLsizei width, GLsizei height, } srcRow += srcStride; + dstRow += destStride; } /* row */ - -#undef SET_PIXEL } diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 230ebbc67f4..14cd58870f7 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -89,7 +89,7 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment) if (err) return NULL; return mem; -#elif defined(_WIN32) && defined(_MSC_VER) +#elif defined(_WIN32) return _aligned_malloc(bytes, alignment); #else uintptr_t ptr, buf; @@ -131,7 +131,7 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment) } return mem; -#elif defined(_WIN32) && defined(_MSC_VER) +#elif defined(_WIN32) void *mem; mem = _aligned_malloc(bytes, alignment); @@ -178,7 +178,7 @@ _mesa_align_free(void *ptr) { #if defined(HAVE_POSIX_MEMALIGN) free(ptr); -#elif defined(_WIN32) && defined(_MSC_VER) +#elif defined(_WIN32) _aligned_free(ptr); #else if (ptr) { @@ -196,7 +196,7 @@ void * _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize, unsigned long alignment) { -#if defined(_WIN32) && defined(_MSC_VER) +#if defined(_WIN32) (void) oldSize; return _aligned_realloc(oldBuffer, newSize, alignment); #else diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index ad7af5c1d8c..d96d666e15f 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -83,9 +83,6 @@ typedef union { GLfloat f; GLint i; GLuint u; } fi_type; #if defined(_MSC_VER) -#if _MSC_VER < 1800 /* Not req'd on VS2013 and above */ -#define strtoll(p, e, b) _strtoi64(p, e, b) -#endif /* _MSC_VER < 1800 */ #define strcasecmp(s1, s2) _stricmp(s1, s2) #endif /*@}*/ diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 50469956c6e..5a02780b960 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -1715,12 +1715,12 @@ _mesa_generate_mipmap_level(GLenum target, dstWidth, dstData[0]); break; case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: make_2d_mipmap(datatype, comps, border, srcWidth, srcHeight, srcData[0], srcRowStride, dstWidth, dstHeight, dstData[0], dstRowStride); @@ -1838,12 +1838,7 @@ _mesa_prepare_mipmap_level(struct gl_context *ctx, for (face = 0; face < numFaces; face++) { struct gl_texture_image *dstImage; - GLenum target; - - if (numFaces == 1) - target = texObj->Target; - else - target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; + const GLenum target = _mesa_cube_face_target(texObj->Target, face); dstImage = _mesa_get_tex_image(ctx, texObj, target, level); if (!dstImage) { @@ -2024,7 +2019,7 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target, /* only two types of compressed textures at this time */ assert(texObj->Target == GL_TEXTURE_2D || texObj->Target == GL_TEXTURE_2D_ARRAY || - texObj->Target == GL_TEXTURE_CUBE_MAP_ARB || + texObj->Target == GL_TEXTURE_CUBE_MAP || texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY); /* diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index a66b56c62bf..2ca9cbf808a 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -44,6 +44,7 @@ #include "math/m_matrix.h" /* GLmatrix */ #include "compiler/shader_enums.h" #include "main/formats.h" /* MESA_FORMAT_COUNT */ +#include "compiler/glsl/list.h" #ifdef __cplusplus @@ -1872,6 +1873,8 @@ typedef enum PROGRAM_UNDEFINED, /**< Invalid/TBD value */ PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */ PROGRAM_BUFFER, /**< for shader buffers, compile-time only */ + PROGRAM_MEMORY, /**< for shared, global and local memory */ + PROGRAM_IMAGE, /**< for shader images, compile-time only */ PROGRAM_FILE_MAX } gl_register_file; @@ -2044,6 +2047,11 @@ struct gl_compute_program * Size specified using local_size_{x,y,z}. */ unsigned LocalSize[3]; + + /** + * Size of shared variables accessed by the compute shader. + */ + unsigned SharedSize; }; @@ -2769,6 +2777,13 @@ struct gl_shader_program struct gl_uniform_storage **UniformRemapTable; /** + * Sometimes there are empty slots left over in UniformRemapTable after we + * allocate slots to explicit locations. This list stores the blocks of + * continuous empty slots inside UniformRemapTable. + */ + struct exec_list EmptyUniformLocations; + + /** * Size of the gl_ClipDistance array that is output from the last pipeline * stage before the fragment shader. */ @@ -3044,6 +3059,7 @@ struct gl_shared_state mtx_t Mutex; /**< for thread safety */ GLint RefCount; /**< Reference count */ struct _mesa_HashTable *DisplayList; /**< Display lists hash table */ + struct _mesa_HashTable *BitmapAtlas; /**< For optimized glBitmap text */ struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */ /** Default texture objects (shared by all texture units) */ @@ -3727,6 +3743,7 @@ struct gl_constants GLuint MaxComputeWorkGroupCount[3]; /* Array of x, y, z dimensions */ GLuint MaxComputeWorkGroupSize[3]; /* Array of x, y, z dimensions */ GLuint MaxComputeWorkGroupInvocations; + GLuint MaxComputeSharedMemorySize; /** GL_ARB_gpu_shader5 */ GLfloat MinFragmentInterpolationOffset; diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 0e4a0af0b0f..af17be2a3f4 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -964,8 +964,5 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, return; } - if (pipe->InfoLog) - _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog); - else - *length = 0; + _mesa_copy_string(infoLog, bufSize, length, pipe->InfoLog); } diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index 50659b0cd21..0d9f8aecf08 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -357,10 +357,6 @@ _mesa_GetProgramResourceiv(GLuint program, GLenum programInterface, return; } - /* No need to write any properties, user requested none. */ - if (bufSize == 0) - return; - _mesa_get_program_resourceiv(shProg, programInterface, index, propCount, props, bufSize, length, params); } diff --git a/src/mesa/main/readpix.c b/src/mesa/main/readpix.c index 470182ab23d..882d863c1c7 100644 --- a/src/mesa/main/readpix.c +++ b/src/mesa/main/readpix.c @@ -582,7 +582,7 @@ read_rgba_pixels( struct gl_context *ctx, void *luminance; uint32_t luminance_format; - luminance_stride = width * sizeof(GL_FLOAT); + luminance_stride = width * sizeof(GLfloat); if (format == GL_LUMINANCE_ALPHA) luminance_stride *= 2; luminance_bytes = height * luminance_stride; diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index fe15508696e..ca366d967ab 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -1518,7 +1518,8 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params) sampObj = _mesa_lookup_samplerobj(ctx, sampler); if (!sampObj) { - _mesa_error(ctx, GL_INVALID_VALUE, + _mesa_error(ctx, (_mesa_is_gles(ctx) ? + GL_INVALID_OPERATION : GL_INVALID_VALUE), "glGetSamplerParameterIiv(sampler %u)", sampler); return; @@ -1593,7 +1594,8 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params) sampObj = _mesa_lookup_samplerobj(ctx, sampler); if (!sampObj) { - _mesa_error(ctx, GL_INVALID_VALUE, + _mesa_error(ctx, (_mesa_is_gles(ctx) ? + GL_INVALID_OPERATION : GL_INVALID_VALUE), "glGetSamplerParameterIuiv(sampler %u)", sampler); return; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index df92c0912af..cdf15b48a0d 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -2124,6 +2124,7 @@ _mesa_copy_linked_program_data(gl_shader_stage type, int i; for (i = 0; i < 3; i++) dst_cp->LocalSize[i] = src->Comp.LocalSize[i]; + dst_cp->SharedSize = src->Comp.SharedSize; break; } default: diff --git a/src/mesa/main/shared.c b/src/mesa/main/shared.c index b9f7bb65fb6..49e5f028045 100644 --- a/src/mesa/main/shared.c +++ b/src/mesa/main/shared.c @@ -65,6 +65,7 @@ _mesa_alloc_shared_state(struct gl_context *ctx) mtx_init(&shared->Mutex, mtx_plain); shared->DisplayList = _mesa_NewHashTable(); + shared->BitmapAtlas = _mesa_NewHashTable(); shared->TexObjects = _mesa_NewHashTable(); shared->Programs = _mesa_NewHashTable(); @@ -144,6 +145,18 @@ delete_displaylist_cb(GLuint id, void *data, void *userData) /** + * Callback for deleting a bitmap atlas. Called by _mesa_HashDeleteAll(). + */ +static void +delete_bitmap_atlas_cb(GLuint id, void *data, void *userData) +{ + struct gl_bitmap_atlas *atlas = (struct gl_bitmap_atlas *) data; + struct gl_context *ctx = (struct gl_context *) userData; + _mesa_delete_bitmap_atlas(ctx, atlas); +} + + +/** * Callback for deleting a texture object. Called by _mesa_HashDeleteAll(). */ static void @@ -309,6 +322,8 @@ free_shared_state(struct gl_context *ctx, struct gl_shared_state *shared) */ _mesa_HashDeleteAll(shared->DisplayList, delete_displaylist_cb, ctx); _mesa_DeleteHashTable(shared->DisplayList); + _mesa_HashDeleteAll(shared->BitmapAtlas, delete_bitmap_atlas_cb, ctx); + _mesa_DeleteHashTable(shared->BitmapAtlas); _mesa_HashWalk(shared->ShaderObjects, free_shader_program_data_cb, ctx); _mesa_HashDeleteAll(shared->ShaderObjects, delete_shader_cb, ctx); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index e6412962251..24e3d189091 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2436,6 +2436,16 @@ const struct function gles3_functions_possible[] = { { "glGetFragDataIndexEXT", 30, -1 }, { "glBindFragDataLocationEXT", 30, -1 }, + /* GL_OES_texture_border_clamp */ + { "glTexParameterIivOES", 30, -1 }, + { "glTexParameterIuivOES", 30, -1 }, + { "glGetTexParameterIivOES", 30, -1 }, + { "glGetTexParameterIuivOES", 30, -1 }, + { "glSamplerParameterIivOES", 30, -1 }, + { "glSamplerParameterIuivOES", 30, -1 }, + { "glGetSamplerParameterIivOES", 30, -1 }, + { "glGetSamplerParameterIuivOES", 30, -1 }, + { NULL, 0, -1 } }; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index b273aaac2a1..06bc8f1ba15 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -869,12 +869,12 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa) * the targets from table 8.19 (for GetTexImage and GetnTexImage *only*), * or TEXTURE_CUBE_MAP (for GetTextureImage *only*)." (Emphasis added.) */ - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: return dsa ? GL_FALSE : ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_CUBE_MAP: return dsa ? GL_TRUE : GL_FALSE; @@ -886,7 +886,7 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa) /** * Wrapper for _mesa_select_tex_image() which can handle target being - * GL_TEXTURE_CUBE_MAP_ARB in which case we use zoffset to select a cube face. + * GL_TEXTURE_CUBE_MAP in which case we use zoffset to select a cube face. * This can happen for glGetTextureImage and glGetTextureSubImage (DSA * functions). */ diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 50141be8693..8a4c6286cbe 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -116,20 +116,6 @@ adjust_for_oes_float_texture(GLenum format, GLenum type) return format; } -/** - * For cube map faces, return a face index in [0,5]. - * For other targets return 0; - */ -GLuint -_mesa_tex_target_to_face(GLenum target) -{ - if (_mesa_is_cube_face(target)) - return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - else - return 0; -} - - /** * Install gl_texture_image in a gl_texture_object according to the target @@ -273,15 +259,15 @@ proxy_target(GLenum target) case GL_TEXTURE_3D: case GL_PROXY_TEXTURE_3D: return GL_PROXY_TEXTURE_3D; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_ARB: - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: - return GL_PROXY_TEXTURE_CUBE_MAP_ARB; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + case GL_TEXTURE_CUBE_MAP: + case GL_PROXY_TEXTURE_CUBE_MAP: + return GL_PROXY_TEXTURE_CUBE_MAP; case GL_TEXTURE_RECTANGLE_NV: case GL_PROXY_TEXTURE_RECTANGLE_NV: return GL_PROXY_TEXTURE_RECTANGLE_NV; @@ -472,13 +458,13 @@ _mesa_max_texture_levels(struct gl_context *ctx, GLenum target) case GL_PROXY_TEXTURE_3D: return ctx->Const.Max3DTextureLevels; case GL_TEXTURE_CUBE_MAP: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map ? ctx->Const.MaxCubeTextureLevels : 0; case GL_TEXTURE_RECTANGLE_NV: @@ -1016,7 +1002,7 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target, case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP: maxSize = 1 << (ctx->Const.MaxCubeTextureLevels - 1); maxSize >>= level; if (width != height) @@ -2299,8 +2285,10 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, } if (baseFormat == GL_DEPTH_COMPONENT || baseFormat == GL_DEPTH_STENCIL || + baseFormat == GL_STENCIL_INDEX || rb_base_format == GL_DEPTH_COMPONENT || rb_base_format == GL_DEPTH_STENCIL || + rb_base_format == GL_STENCIL_INDEX || ((baseFormat == GL_LUMINANCE_ALPHA || baseFormat == GL_ALPHA) && rb_base_format != GL_RGBA) || diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h index 5df36c59a28..17f2c908ecc 100644 --- a/src/mesa/main/teximage.h +++ b/src/mesa/main/teximage.h @@ -43,10 +43,63 @@ extern "C" { static inline GLboolean _mesa_is_cube_face(GLenum target) { - return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB); + return (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X && + target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z); } + +/** + * Return number of faces for a texture target. This will be 6 for + * cube maps and 1 otherwise. + * NOTE: this function is not used for cube map arrays which operate + * more like 2D arrays than cube maps. + */ +static inline GLuint +_mesa_num_tex_faces(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP: + case GL_PROXY_TEXTURE_CUBE_MAP: + return 6; + default: + return 1; + } +} + + +/** + * If the target is GL_TEXTURE_CUBE_MAP, return one of the + * GL_TEXTURE_CUBE_MAP_POSITIVE/NEGATIVE_X/Y/Z targets corresponding to + * the face parameter. + * Else, return target as-is. + */ +static inline GLenum +_mesa_cube_face_target(GLenum target, unsigned face) +{ + if (target == GL_TEXTURE_CUBE_MAP) { + assert(face < 6); + return GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; + } + else { + return target; + } +} + + +/** + * For cube map faces, return a face index in [0,5]. + * For other targets return 0; + */ +static inline GLuint +_mesa_tex_target_to_face(GLenum target) +{ + if (_mesa_is_cube_face(target)) + return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + else + return 0; +} + + /** Are any of the dimensions of given texture equal to zero? */ static inline GLboolean _mesa_is_zero_size_texture(const struct gl_texture_image *texImage) @@ -131,9 +184,6 @@ extern GLboolean _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target, GLenum intFormat, GLenum *error); -extern GLuint -_mesa_tex_target_to_face(GLenum target); - extern GLint _mesa_get_texture_dimensions(GLenum target); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index e926c7b6cd2..d8407f04340 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -171,16 +171,16 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target) return texUnit->CurrentTex[TEXTURE_3D_INDEX]; case GL_PROXY_TEXTURE_3D: return ctx->Texture.ProxyTex[TEXTURE_3D_INDEX]; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + case GL_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map ? texUnit->CurrentTex[TEXTURE_CUBE_INDEX] : NULL; - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map ? ctx->Texture.ProxyTex[TEXTURE_CUBE_INDEX] : NULL; case GL_TEXTURE_CUBE_MAP_ARRAY: @@ -239,7 +239,7 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target) * \param shared the shared GL state structure to contain the texture object * \param name integer name for the texture object * \param target either GL_TEXTURE_1D, GL_TEXTURE_2D, GL_TEXTURE_3D, - * GL_TEXTURE_CUBE_MAP_ARB or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake + * GL_TEXTURE_CUBE_MAP or GL_TEXTURE_RECTANGLE_NV. zero is ok for the sake * of GenTextures() * * \return pointer to new texture object. @@ -270,7 +270,7 @@ _mesa_initialize_texture_object( struct gl_context *ctx, target == GL_TEXTURE_1D || target == GL_TEXTURE_2D || target == GL_TEXTURE_3D || - target == GL_TEXTURE_CUBE_MAP_ARB || + target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_RECTANGLE_NV || target == GL_TEXTURE_1D_ARRAY_EXT || target == GL_TEXTURE_2D_ARRAY_EXT || @@ -513,7 +513,7 @@ valid_texture_object(const struct gl_texture_object *tex) case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_RECTANGLE_NV: case GL_TEXTURE_1D_ARRAY_EXT: case GL_TEXTURE_2D_ARRAY_EXT: @@ -725,7 +725,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, case GL_TEXTURE_3D: maxLevels = ctx->Const.Max3DTextureLevels; break; - case GL_TEXTURE_CUBE_MAP_ARB: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_CUBE_MAP_ARRAY: maxLevels = ctx->Const.MaxCubeTextureLevels; break; @@ -768,7 +768,7 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, return; } - if (t->Target == GL_TEXTURE_CUBE_MAP_ARB) { + if (t->Target == GL_TEXTURE_CUBE_MAP) { /* Make sure that all six cube map level 0 images are the same size and * format. * Note: we know that the image's width==height (we enforce that @@ -1036,12 +1036,7 @@ _mesa_get_fallback_texture(struct gl_context *ctx, gl_texture_index tex) /* need a loop here just for cube maps */ for (face = 0; face < numFaces; face++) { - GLenum faceTarget; - - if (target == GL_TEXTURE_CUBE_MAP) - faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; - else - faceTarget = target; + const GLenum faceTarget = _mesa_cube_face_target(target, face); /* initialize level[0] texture image */ texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, 0); diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index 8421337de4d..378d87a1cbb 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -120,25 +120,6 @@ _mesa_unlock_texture(struct gl_context *ctx, struct gl_texture_object *texObj) } -/** - * Return number of faces for a texture target. This will be 6 for - * cube maps (and cube map arrays) and 1 otherwise. - * NOTE: this function is not used for cube map arrays which operate - * more like 2D arrays than cube maps. - */ -static inline GLuint -_mesa_num_tex_faces(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP: - case GL_PROXY_TEXTURE_CUBE_MAP: - return 6; - default: - return 1; - } -} - - /** Is the texture "complete" with respect to the given sampler state? */ static inline GLboolean _mesa_is_texture_complete(const struct gl_texture_object *texObj, diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 89f286cc05e..20770a77e15 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -72,7 +72,7 @@ validate_texture_wrap_mode(struct gl_context * ctx, GLenum target, GLenum wrap) break; case GL_CLAMP_TO_BORDER: - supported = is_desktop_gl && e->ARB_texture_border_clamp + supported = ctx->API != API_OPENGLES && e->ARB_texture_border_clamp && (target != GL_TEXTURE_EXTERNAL_OES); break; @@ -500,9 +500,7 @@ set_tex_parameteri(struct gl_context *ctx, goto invalid_pname; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if ((_mesa_is_desktop_gl(ctx) && - ctx->Extensions.ARB_stencil_texturing) || - _mesa_is_gles31(ctx)) { + if (_mesa_has_ARB_stencil_texturing(ctx) || _mesa_is_gles31(ctx)) { bool stencil = params[0] == GL_STENCIL_INDEX; if (!stencil && params[0] != GL_DEPTH_COMPONENT) goto invalid_param; @@ -719,7 +717,8 @@ set_tex_parameterf(struct gl_context *ctx, break; case GL_TEXTURE_BORDER_COLOR: - if (!_mesa_is_desktop_gl(ctx)) + if (ctx->API == API_OPENGLES || + !ctx->Extensions.ARB_texture_border_clamp) goto invalid_pname; if (!target_allows_setting_sampler_parameters(texObj->Target)) @@ -1215,12 +1214,12 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, return GL_TRUE; case GL_TEXTURE_2D_ARRAY_EXT: return ctx->Extensions.EXT_texture_array; - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: @@ -1237,7 +1236,7 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target, case GL_PROXY_TEXTURE_2D: case GL_PROXY_TEXTURE_3D: return GL_TRUE; - case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_CUBE_MAP_ARRAY_ARB: case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB: @@ -1312,6 +1311,7 @@ get_tex_level_parameter_image(struct gl_context *ctx, dummy_image.TexFormat = MESA_FORMAT_NONE; dummy_image.InternalFormat = GL_RGBA; dummy_image._BaseFormat = GL_NONE; + dummy_image.FixedSampleLocations = GL_TRUE; img = &dummy_image; } @@ -1736,7 +1736,8 @@ get_tex_parameterfv(struct gl_context *ctx, *params = ENUM_TO_FLOAT(obj->Sampler.WrapR); break; case GL_TEXTURE_BORDER_COLOR: - if (!_mesa_is_desktop_gl(ctx)) + if (ctx->API == API_OPENGLES || + !ctx->Extensions.ARB_texture_border_clamp) goto invalid_pname; if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP)) @@ -1819,7 +1820,7 @@ get_tex_parameterfv(struct gl_context *ctx, *params = (GLfloat) obj->DepthMode; break; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing) + if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx)) goto invalid_pname; *params = (GLfloat) (obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT); @@ -1970,7 +1971,8 @@ get_tex_parameteriv(struct gl_context *ctx, *params = (GLint) obj->Sampler.WrapR; break; case GL_TEXTURE_BORDER_COLOR: - if (!_mesa_is_desktop_gl(ctx)) + if (ctx->API == API_OPENGLES || + !ctx->Extensions.ARB_texture_border_clamp) goto invalid_pname; { @@ -2054,7 +2056,7 @@ get_tex_parameteriv(struct gl_context *ctx, *params = (GLint) obj->DepthMode; break; case GL_DEPTH_STENCIL_TEXTURE_MODE: - if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_stencil_texturing) + if (!_mesa_has_ARB_stencil_texturing(ctx) && !_mesa_is_gles31(ctx)) goto invalid_pname; *params = (GLint) (obj->StencilSampling ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT); diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 9d88554d945..9ee5c6974df 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -786,7 +786,7 @@ alloc_proxy_textures( struct gl_context *ctx ) GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_1D_ARRAY_EXT, GL_TEXTURE_EXTERNAL_OES, - GL_TEXTURE_CUBE_MAP_ARB, + GL_TEXTURE_CUBE_MAP, GL_TEXTURE_3D, GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_2D, diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index d7671738b18..c33b1095900 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -87,9 +87,6 @@ enum { * Texture image storage function. */ typedef GLboolean (*StoreTexImageFunc)(TEXSTORE_PARAMS); -static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE }; -static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE }; -static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE }; /** diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c index 04b7d73da5c..316d8280338 100644 --- a/src/mesa/main/textureview.c +++ b/src/mesa/main/textureview.c @@ -211,10 +211,7 @@ initialize_texture_fields(struct gl_context *ctx, for (level = 0; level < levels; level++) { for (face = 0; face < numFaces; face++) { struct gl_texture_image *texImage; - GLenum faceTarget = target; - - if (target == GL_TEXTURE_CUBE_MAP) - faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + face; + const GLenum faceTarget = _mesa_cube_face_target(target, face); texImage = _mesa_get_tex_image(ctx, texObj, faceTarget, level); @@ -536,9 +533,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture, newViewNumLevels = MIN2(numlevels, origTexObj->NumLevels - minlevel); newViewNumLayers = MIN2(numlayers, origTexObj->NumLayers - minlayer); - faceTarget = origTexObj->Target; - if (faceTarget == GL_TEXTURE_CUBE_MAP) - faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + minlayer; + faceTarget = _mesa_cube_face_target(origTexObj->Target, minlayer); /* Get a reference to what will become this View's base level */ origTexImage = _mesa_select_tex_image(origTexObj, faceTarget, minlevel); diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index eed241271df..db53377d705 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -40,7 +40,7 @@ #include "prog_statevars.h" #include "prog_parameter.h" #include "main/samplerobj.h" -#include "framebuffer.h" +#include "main/framebuffer.h" #define ONE_DIV_SQRT_LN2 (1.201122408786449815) @@ -353,7 +353,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], } return; case STATE_NUM_SAMPLES: - ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer); + ((int *)value)[0] = MAX2(1, _mesa_geometric_samples(ctx->DrawBuffer)); return; case STATE_DEPTH_RANGE: value[0] = ctx->ViewportArray[0].Near; /* near */ diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index 4b89ade1b15..622621bdcbb 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -38,9 +38,9 @@ /** - * This is used to initialize st->atoms[]. + * This is used to initialize st->render_atoms[]. */ -static const struct st_tracked_state *atoms[] = +static const struct st_tracked_state *render_atoms[] = { &st_update_depth_stencil_alpha, &st_update_clip, @@ -85,6 +85,11 @@ static const struct st_tracked_state *atoms[] = &st_bind_tes_ssbos, &st_bind_fs_ssbos, &st_bind_gs_ssbos, + &st_bind_vs_images, + &st_bind_tcs_images, + &st_bind_tes_images, + &st_bind_gs_images, + &st_bind_fs_images, &st_update_pixel_transfer, &st_update_tess, @@ -93,6 +98,22 @@ static const struct st_tracked_state *atoms[] = }; +/** + * This is used to initialize st->compute_atoms[]. + */ +static const struct st_tracked_state *compute_atoms[] = +{ + &st_update_cp, + &st_update_compute_texture, + &st_update_sampler, /* depends on update_compute_texture for swizzle */ + &st_update_cs_constants, + &st_bind_cs_ubos, + &st_bind_cs_atomics, + &st_bind_cs_ssbos, + &st_bind_cs_images, +}; + + void st_init_atoms( struct st_context *st ) { /* no-op */ @@ -178,20 +199,41 @@ static void check_attrib_edgeflag(struct st_context *st) * Update all derived state: */ -void st_validate_state( struct st_context *st ) +void st_validate_state( struct st_context *st, enum st_pipeline pipeline ) { - struct st_state_flags *state = &st->dirty; + const struct st_tracked_state **atoms; + struct st_state_flags *state; + GLuint num_atoms; GLuint i; + /* Get pipeline state. */ + switch (pipeline) { + case ST_PIPELINE_RENDER: + atoms = render_atoms; + num_atoms = ARRAY_SIZE(render_atoms); + state = &st->dirty; + break; + case ST_PIPELINE_COMPUTE: + atoms = compute_atoms; + num_atoms = ARRAY_SIZE(compute_atoms); + state = &st->dirty_cp; + break; + default: + unreachable("Invalid pipeline specified"); + } + /* Get Mesa driver state. */ st->dirty.st |= st->ctx->NewDriverState; + st->dirty_cp.st |= st->ctx->NewDriverState; st->ctx->NewDriverState = 0; - check_attrib_edgeflag(st); + if (pipeline == ST_PIPELINE_RENDER) { + check_attrib_edgeflag(st); - check_program_state( st ); + check_program_state(st); - st_manager_validate_framebuffers(st); + st_manager_validate_framebuffers(st); + } if (state->st == 0 && state->mesa == 0) return; @@ -211,7 +253,7 @@ void st_validate_state( struct st_context *st ) memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < ARRAY_SIZE(atoms); i++) { + for (i = 0; i < num_atoms; i++) { const struct st_tracked_state *atom = atoms[i]; struct st_state_flags generated; @@ -242,7 +284,7 @@ void st_validate_state( struct st_context *st ) } else { - for (i = 0; i < ARRAY_SIZE(atoms); i++) { + for (i = 0; i < num_atoms; i++) { if (check_state(state, &atoms[i]->dirty)) atoms[i]->update( st ); } diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h index 3a9153c80cb..31bb2dd98bf 100644 --- a/src/mesa/state_tracker/st_atom.h +++ b/src/mesa/state_tracker/st_atom.h @@ -36,6 +36,9 @@ #include "main/glheader.h" +#include "state_tracker/st_api.h" +#include "state_tracker/st_context.h" + struct st_context; struct st_tracked_state; @@ -43,7 +46,7 @@ void st_init_atoms( struct st_context *st ); void st_destroy_atoms( struct st_context *st ); -void st_validate_state( struct st_context *st ); +void st_validate_state( struct st_context *st, enum st_pipeline pipeline ); extern const struct st_tracked_state st_update_array; @@ -55,6 +58,7 @@ extern const struct st_tracked_state st_update_gp; extern const struct st_tracked_state st_update_tep; extern const struct st_tracked_state st_update_tcp; extern const struct st_tracked_state st_update_vp; +extern const struct st_tracked_state st_update_cp; extern const struct st_tracked_state st_update_rasterizer; extern const struct st_tracked_state st_update_polygon_stipple; extern const struct st_tracked_state st_update_viewport; @@ -68,26 +72,37 @@ extern const struct st_tracked_state st_update_vertex_texture; extern const struct st_tracked_state st_update_geometry_texture; extern const struct st_tracked_state st_update_tessctrl_texture; extern const struct st_tracked_state st_update_tesseval_texture; +extern const struct st_tracked_state st_update_compute_texture; extern const struct st_tracked_state st_update_fs_constants; extern const struct st_tracked_state st_update_gs_constants; extern const struct st_tracked_state st_update_tes_constants; extern const struct st_tracked_state st_update_tcs_constants; extern const struct st_tracked_state st_update_vs_constants; +extern const struct st_tracked_state st_update_cs_constants; extern const struct st_tracked_state st_bind_fs_ubos; extern const struct st_tracked_state st_bind_vs_ubos; extern const struct st_tracked_state st_bind_gs_ubos; extern const struct st_tracked_state st_bind_tcs_ubos; extern const struct st_tracked_state st_bind_tes_ubos; +extern const struct st_tracked_state st_bind_cs_ubos; extern const struct st_tracked_state st_bind_fs_atomics; extern const struct st_tracked_state st_bind_vs_atomics; extern const struct st_tracked_state st_bind_gs_atomics; extern const struct st_tracked_state st_bind_tcs_atomics; extern const struct st_tracked_state st_bind_tes_atomics; +extern const struct st_tracked_state st_bind_cs_atomics; extern const struct st_tracked_state st_bind_fs_ssbos; extern const struct st_tracked_state st_bind_vs_ssbos; extern const struct st_tracked_state st_bind_gs_ssbos; extern const struct st_tracked_state st_bind_tcs_ssbos; extern const struct st_tracked_state st_bind_tes_ssbos; +extern const struct st_tracked_state st_bind_cs_ssbos; +extern const struct st_tracked_state st_bind_fs_images; +extern const struct st_tracked_state st_bind_vs_images; +extern const struct st_tracked_state st_bind_gs_images; +extern const struct st_tracked_state st_bind_tcs_images; +extern const struct st_tracked_state st_bind_tes_images; +extern const struct st_tracked_state st_bind_cs_images; extern const struct st_tracked_state st_update_pixel_transfer; extern const struct st_tracked_state st_update_tess; diff --git a/src/mesa/state_tracker/st_atom_atomicbuf.c b/src/mesa/state_tracker/st_atom_atomicbuf.c index 1c30d1fb701..a27dbe0460d 100644 --- a/src/mesa/state_tracker/st_atom_atomicbuf.c +++ b/src/mesa/state_tracker/st_atom_atomicbuf.c @@ -58,9 +58,11 @@ st_bind_atomics(struct st_context *st, st_buffer_object(binding->BufferObject); struct pipe_shader_buffer sb = { 0 }; - sb.buffer = st_obj->buffer; - sb.buffer_offset = binding->Offset; - sb.buffer_size = st_obj->buffer->width0 - binding->Offset; + if (st_obj && st_obj->buffer) { + sb.buffer = st_obj->buffer; + sb.buffer_offset = binding->Offset; + sb.buffer_size = st_obj->buffer->width0 - binding->Offset; + } st->pipe->set_shader_buffers(st->pipe, shader_type, atomic->Binding, 1, &sb); @@ -156,3 +158,21 @@ const struct st_tracked_state st_bind_tes_atomics = { }, bind_tes_atomics }; + +static void +bind_cs_atomics(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + st_bind_atomics(st, prog, PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_atomics = { + "st_bind_cs_atomics", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_ATOMIC_BUFFER, + }, + bind_cs_atomics +}; diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 66811d29c29..407dfd31c80 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -61,7 +61,8 @@ void st_upload_constants( struct st_context *st, shader_type == PIPE_SHADER_FRAGMENT || shader_type == PIPE_SHADER_GEOMETRY || shader_type == PIPE_SHADER_TESS_CTRL || - shader_type == PIPE_SHADER_TESS_EVAL); + shader_type == PIPE_SHADER_TESS_EVAL || + shader_type == PIPE_SHADER_COMPUTE); /* update constants */ if (params && params->NumParameters) { @@ -226,6 +227,28 @@ const struct st_tracked_state st_update_tes_constants = { update_tes_constants /* update */ }; +/* Compute shader: + */ +static void update_cs_constants(struct st_context *st ) +{ + struct st_compute_program *cp = st->cp; + struct gl_program_parameter_list *params; + + if (cp) { + params = cp->Base.Base.Parameters; + st_upload_constants( st, params, PIPE_SHADER_COMPUTE ); + } +} + +const struct st_tracked_state st_update_cs_constants = { + "st_update_cs_constants", /* name */ + { /* dirty */ + _NEW_PROGRAM_CONSTANTS, /* mesa */ + ST_NEW_COMPUTE_PROGRAM, /* st */ + }, + update_cs_constants /* update */ +}; + static void st_bind_ubos(struct st_context *st, struct gl_shader *shader, unsigned shader_type) @@ -363,3 +386,24 @@ const struct st_tracked_state st_bind_tes_ubos = { }, bind_tes_ubos }; + +static void bind_cs_ubos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + st_bind_ubos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_ubos = { + "st_bind_cs_ubos", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_UNIFORM_BUFFER, + }, + bind_cs_ubos +}; diff --git a/src/mesa/state_tracker/st_atom_image.c b/src/mesa/state_tracker/st_atom_image.c new file mode 100644 index 00000000000..4b48bc30b69 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_image.c @@ -0,0 +1,236 @@ +/************************************************************************** + * + * Copyright 2016 Ilia Mirkin. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "compiler/glsl/ir_uniform.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" + +#include "st_cb_texture.h" +#include "st_debug.h" +#include "st_texture.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" +#include "st_format.h" + +static void +st_bind_images(struct st_context *st, struct gl_shader *shader, + unsigned shader_type) +{ + unsigned i; + struct pipe_image_view images[MAX_IMAGE_UNIFORMS]; + struct gl_program_constants *c = &st->ctx->Const.Program[shader->Stage]; + + if (!shader || !st->pipe->set_shader_images) + return; + + for (i = 0; i < shader->NumImages; i++) { + struct gl_image_unit *u = &st->ctx->ImageUnits[shader->ImageUnits[i]]; + struct st_texture_object *stObj = st_texture_object(u->TexObj); + struct pipe_image_view *img = &images[i]; + + if (!stObj || + !st_finalize_texture(st->ctx, st->pipe, u->TexObj) || + !stObj->pt) { + memset(img, 0, sizeof(*img)); + continue; + } + + img->resource = stObj->pt; + img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat); + if (stObj->pt->target == PIPE_BUFFER) { + unsigned base, size; + unsigned f, n; + const struct util_format_description *desc + = util_format_description(img->format); + + base = stObj->base.BufferOffset; + assert(base < stObj->pt->width0); + size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize); + + f = (base / (desc->block.bits / 8)) * desc->block.width; + n = (size / (desc->block.bits / 8)) * desc->block.width; + assert(n > 0); + img->u.buf.first_element = f; + img->u.buf.last_element = f + (n - 1); + } else { + img->u.tex.level = u->Level + stObj->base.MinLevel; + if (stObj->pt->target == PIPE_TEXTURE_3D) { + if (u->Layered) { + img->u.tex.first_layer = 0; + img->u.tex.last_layer = u_minify(stObj->pt->depth0, img->u.tex.level) - 1; + } else { + img->u.tex.first_layer = u->_Layer; + img->u.tex.last_layer = u->_Layer; + } + } else { + img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer; + img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer; + if (u->Layered && img->resource->array_size > 1) { + if (stObj->base.Immutable) + img->u.tex.last_layer += stObj->base.NumLayers - 1; + else + img->u.tex.last_layer += img->resource->array_size - 1; + } + } + } + } + st->pipe->set_shader_images(st->pipe, shader_type, 0, shader->NumImages, + images); + /* clear out any stale shader images */ + if (shader->NumImages < c->MaxImageUniforms) + st->pipe->set_shader_images( + st->pipe, shader_type, + shader->NumImages, + c->MaxImageUniforms - shader->NumImages, + NULL); +} + +static void bind_vs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_VERTEX], PIPE_SHADER_VERTEX); +} + +const struct st_tracked_state st_bind_vs_images = { + "st_bind_vs_images", + { + 0, + ST_NEW_VERTEX_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_vs_images +}; + +static void bind_fs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], PIPE_SHADER_FRAGMENT); +} + +const struct st_tracked_state st_bind_fs_images = { + "st_bind_fs_images", + { + 0, + ST_NEW_FRAGMENT_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_fs_images +}; + +static void bind_gs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], PIPE_SHADER_GEOMETRY); +} + +const struct st_tracked_state st_bind_gs_images = { + "st_bind_gs_images", + { + 0, + ST_NEW_GEOMETRY_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_gs_images +}; + +static void bind_tcs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL], PIPE_SHADER_TESS_CTRL); +} + +const struct st_tracked_state st_bind_tcs_images = { + "st_bind_tcs_images", + { + 0, + ST_NEW_TESSCTRL_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_tcs_images +}; + +static void bind_tes_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL], PIPE_SHADER_TESS_EVAL); +} + +const struct st_tracked_state st_bind_tes_images = { + "st_bind_tes_images", + { + 0, + ST_NEW_TESSEVAL_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_tes_images +}; + +static void bind_cs_images(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + st_bind_images(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_images = { + "st_bind_cs_images", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_IMAGE_UNITS, + }, + bind_cs_images +}; diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 94231cf1946..82dcf5ee0ca 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -321,6 +321,14 @@ update_samplers(struct st_context *st) st->state.samplers[PIPE_SHADER_TESS_EVAL], &st->state.num_samplers[PIPE_SHADER_TESS_EVAL]); } + if (ctx->ComputeProgram._Current) { + update_shader_samplers(st, + PIPE_SHADER_COMPUTE, + &ctx->ComputeProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits, + st->state.samplers[PIPE_SHADER_COMPUTE], + &st->state.num_samplers[PIPE_SHADER_COMPUTE]); + } } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index c8650a5899a..a88f0352746 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -255,3 +255,35 @@ const struct st_tracked_state st_update_tep = { }, update_tep /* update */ }; + + + +static void +update_cp( struct st_context *st ) +{ + struct st_compute_program *stcp; + + if (!st->ctx->ComputeProgram._Current) { + cso_set_compute_shader_handle(st->cso_context, NULL); + return; + } + + stcp = st_compute_program(st->ctx->ComputeProgram._Current); + assert(stcp->Base.Base.Target == GL_COMPUTE_PROGRAM_NV); + + st->cp_variant = st_get_cp_variant(st, &stcp->tgsi, &stcp->variants); + + st_reference_compprog(st, &st->cp, stcp); + + cso_set_compute_shader_handle(st->cso_context, + st->cp_variant->driver_shader); +} + +const struct st_tracked_state st_update_cp = { + "st_update_cp", /* name */ + { /* dirty */ + 0, /* mesa */ + ST_NEW_COMPUTE_PROGRAM /* st */ + }, + update_cp /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_storagebuf.c b/src/mesa/state_tracker/st_atom_storagebuf.c index f165cc3e0a1..37b4c4d256d 100644 --- a/src/mesa/state_tracker/st_atom_storagebuf.c +++ b/src/mesa/state_tracker/st_atom_storagebuf.c @@ -194,3 +194,24 @@ const struct st_tracked_state st_bind_tes_ssbos = { }, bind_tes_ssbos }; + +static void bind_cs_ssbos(struct st_context *st) +{ + struct gl_shader_program *prog = + st->ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + PIPE_SHADER_COMPUTE); +} + +const struct st_tracked_state st_bind_cs_ssbos = { + "st_bind_cs_ssbos", + { + 0, + ST_NEW_COMPUTE_PROGRAM | ST_NEW_STORAGE_BUFFER, + }, + bind_cs_ssbos +}; diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 3e3775200a8..4b7ad77b47a 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -32,6 +32,7 @@ */ +#include "main/context.h" #include "main/macros.h" #include "main/mtypes.h" #include "main/samplerobj.h" @@ -147,9 +148,7 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, else return SWIZZLE_XYZW; case GL_STENCIL_INDEX: - return SWIZZLE_XYZW; case GL_DEPTH_STENCIL: - /* fall-through */ case GL_DEPTH_COMPONENT: /* Now examine the depth mode */ switch (depthMode) { @@ -193,15 +192,29 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, static unsigned -get_texture_format_swizzle(const struct st_texture_object *stObj, +get_texture_format_swizzle(const struct st_context *st, + const struct st_texture_object *stObj, unsigned glsl_version) { GLenum baseFormat = _mesa_texture_base_format(&stObj->base); unsigned tex_swizzle; if (baseFormat != GL_NONE) { + GLenum depth_mode = stObj->base.DepthMode; + /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures + * with depth component data specified with a sized internal format. + */ + if (_mesa_is_gles3(st->ctx) && + util_format_is_depth_or_stencil(stObj->pt->format)) { + const struct st_texture_image *firstImage = + st_texture_image_const(_mesa_base_tex_image(&stObj->base)); + if (firstImage->base.InternalFormat != GL_DEPTH_COMPONENT && + firstImage->base.InternalFormat != GL_DEPTH_STENCIL && + firstImage->base.InternalFormat != GL_STENCIL_INDEX) + depth_mode = GL_RED; + } tex_swizzle = compute_texture_format_swizzle(baseFormat, - stObj->base.DepthMode, + depth_mode, stObj->pt->format, glsl_version); } @@ -221,10 +234,11 @@ get_texture_format_swizzle(const struct st_texture_object *stObj, * \param stObj the st texture object, */ static boolean -check_sampler_swizzle(const struct st_texture_object *stObj, +check_sampler_swizzle(const struct st_context *st, + const struct st_texture_object *stObj, struct pipe_sampler_view *sv, unsigned glsl_version) { - unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version); + unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version); return ((sv->swizzle_r != GET_SWZ(swizzle, 0)) || (sv->swizzle_g != GET_SWZ(swizzle, 1)) || @@ -251,13 +265,13 @@ static unsigned last_layer(struct st_texture_object *stObj) } static struct pipe_sampler_view * -st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, +st_create_texture_sampler_view_from_stobj(struct st_context *st, struct st_texture_object *stObj, enum pipe_format format, unsigned glsl_version) { struct pipe_sampler_view templ; - unsigned swizzle = get_texture_format_swizzle(stObj, glsl_version); + unsigned swizzle = get_texture_format_swizzle(st, stObj, glsl_version); u_sampler_view_default_template(&templ, stObj->pt, @@ -297,7 +311,7 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe, templ.swizzle_a = GET_SWZ(swizzle, 3); } - return pipe->create_sampler_view(pipe, stObj->pt, &templ); + return st->pipe->create_sampler_view(st->pipe, stObj->pt, &templ); } @@ -327,7 +341,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, /* if sampler view has changed dereference it */ if (*sv) { - if (check_sampler_swizzle(stObj, *sv, glsl_version) || + if (check_sampler_swizzle(st, stObj, *sv, glsl_version) || (format != (*sv)->format) || gl_target_to_pipe(stObj->base.Target) != (*sv)->target || stObj->base.MinLevel + stObj->base.BaseLevel != (*sv)->u.tex.first_level || @@ -339,7 +353,7 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, } if (!*sv) { - *sv = st_create_texture_sampler_view_from_stobj(st->pipe, stObj, + *sv = st_create_texture_sampler_view_from_stobj(st, stObj, format, glsl_version); } else if ((*sv)->context != st->pipe) { @@ -534,6 +548,22 @@ update_tesseval_textures(struct st_context *st) } +static void +update_compute_textures(struct st_context *st) +{ + const struct gl_context *ctx = st->ctx; + + if (ctx->ComputeProgram._Current) { + update_textures(st, + MESA_SHADER_COMPUTE, + &ctx->ComputeProgram._Current->Base, + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits, + st->state.sampler_views[PIPE_SHADER_COMPUTE], + &st->state.num_sampler_views[PIPE_SHADER_COMPUTE]); + } +} + + const struct st_tracked_state st_update_fragment_texture = { "st_update_texture", /* name */ { /* dirty */ @@ -582,3 +612,13 @@ const struct st_tracked_state st_update_tesseval_texture = { }, update_tesseval_textures /* update */ }; + + +const struct st_tracked_state st_update_compute_texture = { + "st_update_compute_texture", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + ST_NEW_COMPUTE_PROGRAM | ST_NEW_SAMPLER_VIEWS, /* st */ + }, + update_compute_textures /* update */ +}; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 627b8cbd598..4fd2dfef8cc 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -33,6 +33,7 @@ #include "main/imports.h" #include "main/image.h" #include "main/bufferobj.h" +#include "main/dlist.h" #include "main/macros.h" #include "main/pbo.h" #include "program/program.h" @@ -41,6 +42,7 @@ #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" +#include "st_draw.h" #include "st_program.h" #include "st_cb_bitmap.h" #include "st_texture.h" @@ -49,7 +51,6 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" -#include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" #include "util/u_upload_mgr.h" #include "program/prog_instruction.h" @@ -183,7 +184,8 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height, static void setup_render_state(struct gl_context *ctx, struct pipe_sampler_view *sv, - const GLfloat *color) + const GLfloat *color, + bool atlas) { struct st_context *st = st_context(ctx); struct cso_context *cso = st->cso_context; @@ -194,7 +196,7 @@ setup_render_state(struct gl_context *ctx, key.st = st->has_shareable_shaders ? NULL : st; key.bitmap = GL_TRUE; key.clamp_color = st->clamp_frag_color_in_shader && - st->ctx->Color._ClampFragmentColor; + ctx->Color._ClampFragmentColor; fpv = st_get_fp_variant(st, st->fp, &key); @@ -214,18 +216,15 @@ setup_render_state(struct gl_context *ctx, COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave); } - cso_save_rasterizer(cso); - cso_save_fragment_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_viewport(cso); - cso_save_fragment_shader(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_save_state(cso, (CSO_BIT_RASTERIZER | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BITS_ALL_SHADERS)); + /* rasterizer state: just scissor */ st->bitmap.rasterizer.scissor = ctx->Scissor.EnableFlags & 1; @@ -251,7 +250,10 @@ setup_render_state(struct gl_context *ctx, for (i = 0; i < st->state.num_samplers[PIPE_SHADER_FRAGMENT]; i++) { samplers[i] = &st->state.samplers[PIPE_SHADER_FRAGMENT][i]; } - samplers[fpv->bitmap_sampler] = &st->bitmap.sampler; + if (atlas) + samplers[fpv->bitmap_sampler] = &st->bitmap.atlas_sampler; + else + samplers[fpv->bitmap_sampler] = &st->bitmap.sampler; cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, num, (const struct pipe_sampler_state **) samplers); } @@ -268,21 +270,12 @@ setup_render_state(struct gl_context *ctx, } /* viewport state: viewport matching window dims */ - { - const GLboolean invert = st->state.fb_orientation == Y_0_TOP; - const GLfloat width = (GLfloat)st->state.framebuffer.width; - const GLfloat height = (GLfloat)st->state.framebuffer.height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * width; - vp.scale[1] = height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 0.5f; - vp.translate[0] = 0.5f * width; - vp.translate[1] = 0.5f * height; - vp.translate[2] = 0.5f; - cso_set_viewport(cso, &vp); - } + cso_set_viewport_dims(cso, st->state.framebuffer.width, + st->state.framebuffer.height, + st->state.fb_orientation == Y_0_TOP); + + cso_set_vertex_elements(cso, 3, st->util_velems); - cso_set_vertex_elements(cso, 3, st->velems_util_draw); cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); } @@ -296,18 +289,7 @@ restore_render_state(struct gl_context *ctx) struct st_context *st = st_context(ctx); struct cso_context *cso = st->cso_context; - cso_restore_rasterizer(cso); - cso_restore_fragment_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_viewport(cso); - cso_restore_fragment_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); - cso_restore_stream_outputs(cso); + cso_restore_state(cso); } @@ -322,7 +304,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - struct pipe_resource *vbuf = NULL; const float fb_width = (float) st->state.framebuffer.width; const float fb_height = (float) st->state.framebuffer.height; const float x0 = (float) x; @@ -335,8 +316,6 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, const float clip_y0 = y0 / fb_height * 2.0f - 1.0f; const float clip_x1 = x1 / fb_width * 2.0f - 1.0f; const float clip_y1 = y1 / fb_height * 2.0f - 1.0f; - float (*vertices)[3][4]; /**< vertex pos + color + texcoord */ - unsigned offset, i; /* limit checks */ { @@ -349,7 +328,7 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, assert(height <= (GLsizei) maxSize); } - setup_render_state(ctx, sv, color); + setup_render_state(ctx, sv, color, false); /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ z = z * 2.0f - 1.0f; @@ -360,62 +339,13 @@ draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, tBot = (float) height; } - u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4, - &offset, &vbuf, (void **) &vertices); - if (!vbuf) { + if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z, + sLeft, tBot, sRight, tTop, color, 0)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBitmap"); - restore_render_state(ctx); - return; } - /* Positions are in clip coords since we need to do clipping in case - * the bitmap quad goes beyond the window bounds. - */ - vertices[0][0][0] = clip_x0; - vertices[0][0][1] = clip_y0; - vertices[0][2][0] = sLeft; - vertices[0][2][1] = tTop; - - vertices[1][0][0] = clip_x1; - vertices[1][0][1] = clip_y0; - vertices[1][2][0] = sRight; - vertices[1][2][1] = tTop; - - vertices[2][0][0] = clip_x1; - vertices[2][0][1] = clip_y1; - vertices[2][2][0] = sRight; - vertices[2][2][1] = tBot; - - vertices[3][0][0] = clip_x0; - vertices[3][0][1] = clip_y1; - vertices[3][2][0] = sLeft; - vertices[3][2][1] = tBot; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - vertices[i][0][2] = z; - vertices[i][0][3] = 1.0f; - vertices[i][1][0] = color[0]; - vertices[i][1][1] = color[1]; - vertices[i][1][2] = color[2]; - vertices[i][1][3] = color[3]; - vertices[i][2][2] = 0.0; /*R*/ - vertices[i][2][3] = 1.0; /*Q*/ - } - - u_upload_unmap(st->uploader); - - util_draw_vertex_buffer(pipe, st->cso_context, vbuf, - cso_get_aux_vertex_buffer_slot(st->cso_context), - offset, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 3); /* attribs/vert */ - restore_render_state(ctx); - pipe_resource_reference(&vbuf, NULL); - /* We uploaded modified constants, need to invalidate them. */ st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS; } @@ -555,7 +485,7 @@ accum_bitmap(struct gl_context *ctx, struct st_context *st = ctx->st; struct bitmap_cache *cache = st->bitmap.cache; int px = -999, py = -999; - const GLfloat z = st->ctx->Current.RasterPos[2]; + const GLfloat z = ctx->Current.RasterPos[2]; if (width > BITMAP_CACHE_WIDTH || height > BITMAP_CACHE_HEIGHT) @@ -566,7 +496,7 @@ accum_bitmap(struct gl_context *ctx, py = y - cache->ypos; if (px < 0 || px + width > BITMAP_CACHE_WIDTH || py < 0 || py + height > BITMAP_CACHE_HEIGHT || - !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) || + !TEST_EQ_4V(ctx->Current.RasterColor, cache->color) || ((fabs(z - cache->zpos) > Z_EPSILON))) { /* This bitmap would extend beyond cache bounds, or the bitmap * color is changing @@ -584,7 +514,7 @@ accum_bitmap(struct gl_context *ctx, cache->ypos = y - py; cache->zpos = z; cache->empty = GL_FALSE; - COPY_4FV(cache->color, st->ctx->Current.RasterColor); + COPY_4FV(cache->color, ctx->Current.RasterColor); } assert(px != -999); @@ -645,6 +575,9 @@ init_bitmap_state(struct st_context *st) st->bitmap.sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; st->bitmap.sampler.normalized_coords = st->internal_target == PIPE_TEXTURE_2D; + st->bitmap.atlas_sampler = st->bitmap.sampler; + st->bitmap.atlas_sampler.normalized_coords = 0; + /* init baseline rasterizer state once */ memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer)); st->bitmap.rasterizer.half_pixel_center = 1; @@ -713,7 +646,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, * explicitly uploaded in the draw_bitmap_quad() function. */ if ((st->dirty.mesa & ~_NEW_PROGRAM_CONSTANTS) || st->dirty.st) { - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); } if (UseBitmapCache && accum_bitmap(ctx, x, y, width, height, unpack, bitmap)) @@ -728,8 +661,7 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, if (sv) { draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], - width, height, sv, - st->ctx->Current.RasterColor); + width, height, sv, ctx->Current.RasterColor); pipe_sampler_view_reference(&sv, NULL); } @@ -740,11 +672,139 @@ st_Bitmap(struct gl_context *ctx, GLint x, GLint y, } +/** + * Called via ctx->Driver.DrawAtlasBitmap() + */ +static void +st_DrawAtlasBitmaps(struct gl_context *ctx, + const struct gl_bitmap_atlas *atlas, + GLuint count, const GLubyte *ids) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_texture_object *stObj = st_texture_object(atlas->texObj); + struct pipe_sampler_view *sv; + /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ + const float z = ctx->Current.RasterPos[2] * 2.0f - 1.0f; + const float *color = ctx->Current.RasterColor; + const float clip_x_scale = 2.0f / st->state.framebuffer.width; + const float clip_y_scale = 2.0f / st->state.framebuffer.height; + const unsigned num_verts = count * 4; + const unsigned num_vert_bytes = num_verts * sizeof(struct st_util_vertex); + struct st_util_vertex *verts; + struct pipe_vertex_buffer vb = {0}; + unsigned i; + + if (!st->bitmap.cache) { + init_bitmap_state(st); + } + + st_flush_bitmap_cache(st); + + st_validate_state(st, ST_PIPELINE_RENDER); + + sv = st_create_texture_sampler_view(pipe, stObj->pt); + + setup_render_state(ctx, sv, color, true); + + vb.stride = sizeof(struct st_util_vertex); + + u_upload_alloc(st->uploader, 0, num_vert_bytes, 4, + &vb.buffer_offset, &vb.buffer, (void **) &verts); + + /* build quads vertex data */ + for (i = 0; i < count; i++) { + const GLfloat epsilon = 0.0001F; + const struct gl_bitmap_glyph *g = &atlas->glyphs[ids[i]]; + const float xmove = g->xmove, ymove = g->ymove; + const float xorig = g->xorig, yorig = g->yorig; + const float s0 = g->x, t0 = g->y; + const float s1 = s0 + g->w, t1 = t0 + g->h; + const float x0 = IFLOOR(ctx->Current.RasterPos[0] - xorig + epsilon); + const float y0 = IFLOOR(ctx->Current.RasterPos[1] - yorig + epsilon); + const float x1 = x0 + g->w, y1 = y0 + g->h; + const float clip_x0 = x0 * clip_x_scale - 1.0f; + const float clip_y0 = y0 * clip_y_scale - 1.0f; + const float clip_x1 = x1 * clip_x_scale - 1.0f; + const float clip_y1 = y1 * clip_y_scale - 1.0f; + + /* lower-left corner */ + verts->x = clip_x0; + verts->y = clip_y0; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s0; + verts->t = t0; + verts++; + + /* lower-right corner */ + verts->x = clip_x1; + verts->y = clip_y0; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s1; + verts->t = t0; + verts++; + + /* upper-right corner */ + verts->x = clip_x1; + verts->y = clip_y1; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s1; + verts->t = t1; + verts++; + + /* upper-left corner */ + verts->x = clip_x0; + verts->y = clip_y1; + verts->z = z; + verts->r = color[0]; + verts->g = color[1]; + verts->b = color[2]; + verts->a = color[3]; + verts->s = s0; + verts->t = t1; + verts++; + + /* Update the raster position */ + ctx->Current.RasterPos[0] += xmove; + ctx->Current.RasterPos[1] += ymove; + } + + u_upload_unmap(st->uploader); + + cso_set_vertex_buffers(st->cso_context, + cso_get_aux_vertex_buffer_slot(st->cso_context), + 1, &vb); + + cso_draw_arrays(st->cso_context, PIPE_PRIM_QUADS, 0, num_verts); + + restore_render_state(ctx); + + pipe_resource_reference(&vb.buffer, NULL); + + /* We uploaded modified constants, need to invalidate them. */ + st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS; +} + + + /** Per-context init */ void st_init_bitmap_functions(struct dd_function_table *functions) { functions->Bitmap = st_Bitmap; + functions->DrawAtlasBitmaps = st_DrawAtlasBitmaps; } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 7b6d10e76b1..55801469f23 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -44,6 +44,7 @@ #include "st_cb_bitmap.h" #include "st_cb_clear.h" #include "st_cb_fbo.h" +#include "st_draw.h" #include "st_format.h" #include "st_program.h" @@ -55,8 +56,6 @@ #include "util/u_framebuffer.h" #include "util/u_inlines.h" #include "util/u_simple_shaders.h" -#include "util/u_draw_quad.h" -#include "util/u_upload_mgr.h" #include "cso_cache/cso_context.h" @@ -169,67 +168,6 @@ set_vertex_shader_layered(struct st_context *st) /** - * Draw a screen-aligned quadrilateral. - * Coords are clip coords with y=0=bottom. - */ -static void -draw_quad(struct st_context *st, - float x0, float y0, float x1, float y1, GLfloat z, - unsigned num_instances, - const union pipe_color_union *color) -{ - struct cso_context *cso = st->cso_context; - struct pipe_vertex_buffer vb = {0}; - GLuint i; - float (*vertices)[2][4]; /**< vertex pos + color */ - - vb.stride = 8 * sizeof(float); - - u_upload_alloc(st->uploader, 0, 4 * sizeof(vertices[0]), 4, - &vb.buffer_offset, &vb.buffer, - (void **) &vertices); - if (!vb.buffer) { - return; - } - - /* Convert Z from [0,1] to [-1,1] range */ - z = z * 2.0f - 1.0f; - - /* positions */ - vertices[0][0][0] = x0; - vertices[0][0][1] = y0; - - vertices[1][0][0] = x1; - vertices[1][0][1] = y0; - - vertices[2][0][0] = x1; - vertices[2][0][1] = y1; - - vertices[3][0][0] = x0; - vertices[3][0][1] = y1; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - vertices[i][0][2] = z; - vertices[i][0][3] = 1.0; - vertices[i][1][0] = color->f[0]; - vertices[i][1][1] = color->f[1]; - vertices[i][1][2] = color->f[2]; - vertices[i][1][3] = color->f[3]; - } - - u_upload_unmap(st->uploader); - - /* draw */ - cso_set_vertex_buffers(cso, cso_get_aux_vertex_buffer_slot(cso), 1, &vb); - cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_FAN, 0, 4, - 0, num_instances); - pipe_resource_reference(&vb.buffer, NULL); -} - - - -/** * Do glClear by drawing a quadrilateral. * The vertices of the quad will be computed from the * ctx->DrawBuffer->_X/Ymin/max fields. @@ -238,6 +176,7 @@ static void clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) { struct st_context *st = st_context(ctx); + struct cso_context *cso = st->cso_context; const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLfloat fb_width = (GLfloat) fb->Width; const GLfloat fb_height = (GLfloat) fb->Height; @@ -257,21 +196,17 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) x1, y1); */ - cso_save_blend(st->cso_context); - cso_save_stencil_ref(st->cso_context); - cso_save_depth_stencil_alpha(st->cso_context); - cso_save_rasterizer(st->cso_context); - cso_save_sample_mask(st->cso_context); - cso_save_min_samples(st->cso_context); - cso_save_viewport(st->cso_context); - cso_save_fragment_shader(st->cso_context); - cso_save_stream_outputs(st->cso_context); - cso_save_vertex_shader(st->cso_context); - cso_save_tessctrl_shader(st->cso_context); - cso_save_tesseval_shader(st->cso_context); - cso_save_geometry_shader(st->cso_context); - cso_save_vertex_elements(st->cso_context); - cso_save_aux_vertex_buffer_slot(st->cso_context); + cso_save_state(cso, (CSO_BIT_BLEND | + CSO_BIT_STENCIL_REF | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_RASTERIZER | + CSO_BIT_SAMPLE_MASK | + CSO_BIT_MIN_SAMPLES | + CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BITS_ALL_SHADERS)); /* blend state: RGBA masking */ { @@ -298,10 +233,10 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) blend.rt[i].colormask |= PIPE_MASK_A; } - if (st->ctx->Color.DitherFlag) + if (ctx->Color.DitherFlag) blend.dither = 1; } - cso_set_blend(st->cso_context, &blend); + cso_set_blend(cso, &blend); } /* depth_stencil state: always pass/set to ref value */ @@ -325,64 +260,49 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) depth_stencil.stencil[0].valuemask = 0xff; depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; stencil_ref.ref_value[0] = ctx->Stencil.Clear; - cso_set_stencil_ref(st->cso_context, &stencil_ref); + cso_set_stencil_ref(cso, &stencil_ref); } - cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); + cso_set_depth_stencil_alpha(cso, &depth_stencil); } - cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); - cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); - cso_set_sample_mask(st->cso_context, ~0); - cso_set_min_samples(st->cso_context, 1); - cso_set_rasterizer(st->cso_context, &st->clear.raster); + cso_set_vertex_elements(cso, 2, st->util_velems); + cso_set_stream_outputs(cso, 0, NULL, NULL); + cso_set_sample_mask(cso, ~0); + cso_set_min_samples(cso, 1); + cso_set_rasterizer(cso, &st->clear.raster); /* viewport state: viewport matching window dims */ - { - const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * fb_width; - vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 0.5f; - vp.translate[0] = 0.5f * fb_width; - vp.translate[1] = 0.5f * fb_height; - vp.translate[2] = 0.5f; - cso_set_viewport(st->cso_context, &vp); - } + cso_set_viewport_dims(st->cso_context, fb_width, fb_height, + st_fb_orientation(fb) == Y_0_TOP); set_fragment_shader(st); - cso_set_tessctrl_shader_handle(st->cso_context, NULL); - cso_set_tesseval_shader_handle(st->cso_context, NULL); + cso_set_tessctrl_shader_handle(cso, NULL); + cso_set_tesseval_shader_handle(cso, NULL); if (num_layers > 1) set_vertex_shader_layered(st); else set_vertex_shader(st); - /* We can't translate the clear color to the colorbuffer format, + /* draw quad matching scissor rect. + * + * Note: if we're only clearing depth/stencil we still setup vertices + * with color, but they'll be ignored. + * + * We can't translate the clear color to the colorbuffer format, * because different colorbuffers may have different formats. */ - - /* draw quad matching scissor rect */ - draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, num_layers, - (union pipe_color_union*)&ctx->Color.ClearColor); + if (!st_draw_quad(st, x0, y0, x1, y1, + ctx->Depth.Clear * 2.0f - 1.0f, + 0.0f, 0.0f, 0.0f, 0.0f, + (const float *) &ctx->Color.ClearColor.f, + num_layers)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glClear"); + } /* Restore pipe state */ - cso_restore_blend(st->cso_context); - cso_restore_stencil_ref(st->cso_context); - cso_restore_depth_stencil_alpha(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_sample_mask(st->cso_context); - cso_restore_min_samples(st->cso_context); - cso_restore_viewport(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_vertex_shader(st->cso_context); - cso_restore_tessctrl_shader(st->cso_context); - cso_restore_tesseval_shader(st->cso_context); - cso_restore_geometry_shader(st->cso_context); - cso_restore_vertex_elements(st->cso_context); - cso_restore_aux_vertex_buffer_slot(st->cso_context); - cso_restore_stream_outputs(st->cso_context); + cso_restore_state(cso); } @@ -470,7 +390,7 @@ st_Clear(struct gl_context *ctx, GLbitfield mask) st_flush_bitmap_cache(st); /* This makes sure the pipe has the latest scissor, etc values */ - st_validate_state( st ); + st_validate_state( st, ST_PIPELINE_RENDER ); if (mask & BUFFER_BITS_COLOR) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { diff --git a/src/mesa/state_tracker/st_cb_compute.c b/src/mesa/state_tracker/st_cb_compute.c new file mode 100644 index 00000000000..364159d62d8 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_compute.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2016 Samuel Pitoiset + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/state.h" +#include "st_atom.h" +#include "st_context.h" +#include "st_cb_bufferobjects.h" +#include "st_cb_compute.h" + +#include "pipe/p_context.h" + +static void st_dispatch_compute_common(struct gl_context *ctx, + const GLuint *num_groups, + struct pipe_resource *indirect, + GLintptr indirect_offset) +{ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_grid_info info = { 0 }; + + if (ctx->NewState) + _mesa_update_state(ctx); + + if (st->dirty_cp.st || ctx->NewDriverState) + st_validate_state(st, ST_PIPELINE_COMPUTE); + + for (unsigned i = 0; i < 3; i++) { + info.block[i] = prog->Comp.LocalSize[i]; + info.grid[i] = num_groups ? num_groups[i] : 0; + } + + if (indirect) { + info.indirect = indirect; + info.indirect_offset = indirect_offset; + } + + pipe->launch_grid(pipe, &info); +} + +static void st_dispatch_compute(struct gl_context *ctx, + const GLuint *num_groups) +{ + st_dispatch_compute_common(ctx, num_groups, NULL, 0); +} + +static void st_dispatch_compute_indirect(struct gl_context *ctx, + GLintptr indirect_offset) +{ + struct gl_buffer_object *indirect_buffer = ctx->DispatchIndirectBuffer; + struct pipe_resource *indirect = st_buffer_object(indirect_buffer)->buffer; + + st_dispatch_compute_common(ctx, NULL, indirect, indirect_offset); +} + +void st_init_compute_functions(struct dd_function_table *functions) +{ + functions->DispatchCompute = st_dispatch_compute; + functions->DispatchComputeIndirect = st_dispatch_compute_indirect; +} diff --git a/src/mesa/state_tracker/st_cb_compute.h b/src/mesa/state_tracker/st_cb_compute.h new file mode 100644 index 00000000000..78ec756a69e --- /dev/null +++ b/src/mesa/state_tracker/st_cb_compute.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2016 Samuel Pitoiset + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_COMPUTE_H +#define ST_CB_COMPUTE_H + +#include "main/compiler.h" + +struct dd_function_table; + +extern void +st_init_compute_functions(struct dd_function_table *functions); + +#endif /* ST_CB_COMPUTE_H */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index fd58886a782..51d4ae51918 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -35,6 +35,7 @@ #include "main/bufferobj.h" #include "main/blit.h" #include "main/format_pack.h" +#include "main/framebuffer.h" #include "main/macros.h" #include "main/mtypes.h" #include "main/pack.h" @@ -56,6 +57,7 @@ #include "st_cb_fbo.h" #include "st_context.h" #include "st_debug.h" +#include "st_draw.h" #include "st_format.h" #include "st_program.h" #include "st_texture.h" @@ -63,16 +65,45 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "tgsi/tgsi_ureg.h" -#include "util/u_draw_quad.h" #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_tile.h" -#include "util/u_upload_mgr.h" #include "cso_cache/cso_context.h" /** + * We have a simple glDrawPixels cache to try to optimize the case where the + * same image is drawn over and over again. It basically works as follows: + * + * 1. After we construct a texture map with the image and draw it, we do + * not discard the texture. We keep it around, plus we note the + * glDrawPixels width, height, format, etc. parameters and keep a copy + * of the image in a malloc'd buffer. + * + * 2. On the next glDrawPixels we check if the parameters match the previous + * call. If those match, we check if the image matches the previous image + * via a memcmp() call. If everything matches, we re-use the previous + * texture, thereby avoiding the cost creating a new texture and copying + * the image to it. + * + * The effectiveness of this cache depends upon: + * 1. If the memcmp() finds a difference, it happens relatively quickly. + Hopefully, not just the last pixels differ! + * 2. If the memcmp() finds no difference, doing that check is faster than + * creating and loading a texture. + * + * Notes: + * 1. We don't support any pixel unpacking parameters. + * 2. We don't try to cache images in Pixel Buffer Objects. + * 3. Instead of saving the whole image, perhaps some sort of reliable + * checksum function could be used instead. + */ +#define USE_DRAWPIXELS_CACHE 1 + + + +/** * Create fragment program that does a TEX() instruction to get a Z and/or * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL. * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX). @@ -162,22 +193,22 @@ make_passthrough_vertex_shader(struct st_context *st, return NULL; /* MOV result.pos, vertex.pos; */ - ureg_MOV(ureg, + ureg_MOV(ureg, ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ), ureg_DECL_vs_input( ureg, 0 )); - - /* MOV result.texcoord0, vertex.attr[1]; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, texcoord_semantic, 0 ), - ureg_DECL_vs_input( ureg, 1 )); - + if (passColor) { - /* MOV result.color0, vertex.attr[2]; */ - ureg_MOV(ureg, + /* MOV result.color0, vertex.attr[1]; */ + ureg_MOV(ureg, ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ), - ureg_DECL_vs_input( ureg, 2 )); + ureg_DECL_vs_input( ureg, 1 )); } + /* MOV result.texcoord0, vertex.attr[2]; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, texcoord_semantic, 0 ), + ureg_DECL_vs_input( ureg, 2 )); + ureg_END( ureg ); st->drawpix.vert_shaders[passColor] = @@ -347,6 +378,39 @@ make_texture(struct st_context *st, enum pipe_format pipeFormat; GLenum baseInternalFormat; +#if USE_DRAWPIXELS_CACHE + const GLint bpp = _mesa_bytes_per_pixel(format, type); + + /* Check if the glDrawPixels() parameters and state matches the cache */ + if (width == st->drawpix_cache.width && + height == st->drawpix_cache.height && + format == st->drawpix_cache.format && + type == st->drawpix_cache.type && + pixels == st->drawpix_cache.user_pointer && + !_mesa_is_bufferobj(unpack->BufferObj) && + (unpack->RowLength == 0 || unpack->RowLength == width) && + unpack->SkipPixels == 0 && + unpack->SkipRows == 0 && + unpack->SwapBytes == GL_FALSE && + st->drawpix_cache.image) { + /* check if the pixel data is the same */ + if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) { + /* OK, re-use the cached texture */ + return st->drawpix_cache.texture; + } + } + + /* discard the cached image and texture (if there is one) */ + st->drawpix_cache.width = 0; + st->drawpix_cache.height = 0; + st->drawpix_cache.user_pointer = NULL; + if (st->drawpix_cache.image) { + free(st->drawpix_cache.image); + st->drawpix_cache.image = NULL; + } + pipe_resource_reference(&st->drawpix_cache.texture, NULL); +#endif + /* Choose a pixel format for the temp texture which will hold the * image to draw. */ @@ -437,107 +501,29 @@ make_texture(struct st_context *st, _mesa_unmap_pbo_source(ctx, unpack); - return pt; -} - - -/** - * Draw quad with texcoords and optional color. - * Coords are gallium window coords with y=0=top. - * \param color may be null - * \param invertTex if true, flip texcoords vertically - */ -static void -draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, - GLfloat x1, GLfloat y1, const GLfloat *color, - GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - GLfloat (*verts)[3][4]; /* four verts, three attribs, XYZW */ - struct pipe_resource *buf = NULL; - unsigned offset; - - u_upload_alloc(st->uploader, 0, 4 * sizeof(verts[0]), 4, &offset, - &buf, (void **) &verts); - if (!buf) { - return; - } - - /* setup vertex data */ - { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat) fb->Width; - const GLfloat fb_height = (GLfloat) fb->Height; - const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; - const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; - const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; - const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; - const GLfloat sLeft = 0.0f, sRight = maxXcoord; - const GLfloat tTop = invertTex ? maxYcoord : 0.0f; - const GLfloat tBot = invertTex ? 0.0f : maxYcoord; - GLuint i; - - /* upper-left */ - verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ - verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ - - /* upper-right */ - verts[1][0][0] = clip_x1; - verts[1][0][1] = clip_y0; - - /* lower-right */ - verts[2][0][0] = clip_x1; - verts[2][0][1] = clip_y1; - - /* lower-left */ - verts[3][0][0] = clip_x0; - verts[3][0][1] = clip_y1; - - verts[0][1][0] = sLeft; /* v[0].attr[1].S */ - verts[0][1][1] = tTop; /* v[0].attr[1].T */ - verts[1][1][0] = sRight; - verts[1][1][1] = tTop; - verts[2][1][0] = sRight; - verts[2][1][1] = tBot; - verts[3][1][0] = sLeft; - verts[3][1][1] = tBot; - - /* same for all verts: */ - if (color) { - for (i = 0; i < 4; i++) { - verts[i][0][2] = z; /* v[i].attr[0].z */ - verts[i][0][3] = 1.0f; /* v[i].attr[0].w */ - verts[i][2][0] = color[0]; /* v[i].attr[2].r */ - verts[i][2][1] = color[1]; /* v[i].attr[2].g */ - verts[i][2][2] = color[2]; /* v[i].attr[2].b */ - verts[i][2][3] = color[3]; /* v[i].attr[2].a */ - verts[i][1][2] = 0.0f; /* v[i].attr[1].R */ - verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */ - } - } - else { - for (i = 0; i < 4; i++) { - verts[i][0][2] = z; /*Z*/ - verts[i][0][3] = 1.0f; /*W*/ - verts[i][1][2] = 0.0f; /*R*/ - verts[i][1][3] = 1.0f; /*Q*/ - } +#if USE_DRAWPIXELS_CACHE + /* Save the glDrawPixels parameter and image in the cache */ + if ((unpack->RowLength == 0 || unpack->RowLength == width) && + unpack->SkipPixels == 0 && + unpack->SkipRows == 0) { + st->drawpix_cache.width = width; + st->drawpix_cache.height = height; + st->drawpix_cache.format = format; + st->drawpix_cache.type = type; + st->drawpix_cache.user_pointer = pixels; + assert(!st->drawpix_cache.image); + st->drawpix_cache.image = malloc(width * height * bpp); + if (st->drawpix_cache.image) { + memcpy(st->drawpix_cache.image, pixels, width * height * bpp); } + st->drawpix_cache.texture = pt; } +#endif - u_upload_unmap(st->uploader); - util_draw_vertex_buffer(pipe, st->cso_context, buf, - cso_get_aux_vertex_buffer_slot(st->cso_context), - offset, - PIPE_PRIM_QUADS, - 4, /* verts */ - 3); /* attribs/vert */ - pipe_resource_reference(&buf, NULL); + return pt; } - static void draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, GLsizei width, GLsizei height, @@ -554,9 +540,12 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct cso_context *cso = st->cso_context; + const unsigned fb_width = _mesa_geometric_width(ctx->DrawBuffer); + const unsigned fb_height = _mesa_geometric_height(ctx->DrawBuffer); GLfloat x0, y0, x1, y1; GLsizei maxSize; boolean normalized = sv[0]->texture->target == PIPE_TEXTURE_2D; + unsigned cso_state_mask; assert(sv[0]->texture->target == st->internal_target); @@ -569,22 +558,19 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, assert(width <= maxSize); assert(height <= maxSize); - cso_save_rasterizer(cso); - cso_save_viewport(cso); - cso_save_fragment_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_fragment_shader(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_state_mask = (CSO_BIT_RASTERIZER | + CSO_BIT_VIEWPORT | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BITS_ALL_SHADERS); if (write_stencil) { - cso_save_depth_stencil_alpha(cso); - cso_save_blend(cso); + cso_state_mask |= (CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_BLEND); } + cso_save_state(cso, cso_state_mask); /* rasterizer state: just scissor */ { @@ -694,28 +680,17 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, } /* viewport state: viewport matching window dims */ - { - const float w = (float) ctx->DrawBuffer->Width; - const float h = (float) ctx->DrawBuffer->Height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * w; - vp.scale[1] = -0.5f * h; - vp.scale[2] = 0.5f; - vp.translate[0] = 0.5f * w; - vp.translate[1] = 0.5f * h; - vp.translate[2] = 0.5f; - cso_set_viewport(cso, &vp); - } + cso_set_viewport_dims(cso, fb_width, fb_height, TRUE); - cso_set_vertex_elements(cso, 3, st->velems_util_draw); - cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); + cso_set_vertex_elements(cso, 3, st->util_velems); + cso_set_stream_outputs(cso, 0, NULL, NULL); /* Compute Gallium window coords (y=0=top) with pixel zoom. * Recall that these coords are transformed by the current * vertex shader and viewport transformation. */ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) { - y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY); + y = fb_height - (int) (y + height * ctx->Pixel.ZoomY); invertTex = !invertTex; } @@ -727,27 +702,27 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ z = z * 2.0f - 1.0f; - draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, - normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, - normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height); + { + const float clip_x0 = x0 / (float) fb_width * 2.0f - 1.0f; + const float clip_y0 = y0 / (float) fb_height * 2.0f - 1.0f; + const float clip_x1 = x1 / (float) fb_width * 2.0f - 1.0f; + const float clip_y1 = y1 / (float) fb_height * 2.0f - 1.0f; + const float maxXcoord = normalized ? + ((float) width / sv[0]->texture->width0) : (float) width; + const float maxYcoord = normalized + ? ((float) height / sv[0]->texture->height0) : (float) height; + const float sLeft = 0.0f, sRight = maxXcoord; + const float tTop = invertTex ? maxYcoord : 0.0f; + const float tBot = invertTex ? 0.0f : maxYcoord; + + if (!st_draw_quad(st, clip_x0, clip_y0, clip_x1, clip_y1, z, + sLeft, tBot, sRight, tTop, color, 0)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels"); + } + } /* restore state */ - cso_restore_rasterizer(cso); - cso_restore_viewport(cso); - cso_restore_fragment_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_fragment_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); - cso_restore_stream_outputs(cso); - if (write_stencil) { - cso_restore_depth_stencil_alpha(cso); - cso_restore_blend(cso); - } + cso_restore_state(cso); } @@ -956,7 +931,7 @@ get_color_fp_variant(struct st_context *st) ctx->Pixel.AlphaScale != 1.0); key.pixelMaps = ctx->Pixel.MapColorFlag; key.clamp_color = st->clamp_frag_color_in_shader && - st->ctx->Color._ClampFragmentColor; + ctx->Color._ClampFragmentColor; fpv = st_get_fp_variant(st, st->fp, &key); @@ -1060,7 +1035,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, { void *driver_vp, *driver_fp; struct st_context *st = st_context(ctx); - const GLfloat *color; struct pipe_context *pipe = st->pipe; GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; struct pipe_sampler_view *sv[2] = { NULL }; @@ -1074,7 +1048,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); /* Limit the size of the glDrawPixels to the max texture size. * Strictly speaking, that's not correct but since we don't handle @@ -1106,7 +1080,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, driver_fp = get_drawpix_z_stencil_program(st, write_depth, write_stencil); driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); - color = ctx->Current.RasterColor; } else { fpv = get_color_fp_variant(st); @@ -1114,7 +1087,6 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, driver_fp = fpv->driver_shader; driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); - color = NULL; if (ctx->Pixel.MapColorFlag) { pipe_sampler_view_reference(&sv[1], st->pixel_xfer.pixelmap_sampler_view); @@ -1172,12 +1144,15 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, num_sampler_view, driver_vp, driver_fp, fpv, - color, GL_FALSE, write_depth, write_stencil); + ctx->Current.RasterColor, + GL_FALSE, write_depth, write_stencil); pipe_sampler_view_reference(&sv[0], NULL); if (num_sampler_view > 1) pipe_sampler_view_reference(&sv[1], NULL); +#if !USE_DRAWPIXELS_CACHE pipe_resource_reference(&pt, NULL); +#endif } @@ -1427,7 +1402,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, struct pipe_sampler_view *sv[2] = { NULL }; struct st_fp_variant *fpv = NULL; int num_sampler_view = 1; - GLfloat *color; enum pipe_format srcFormat; unsigned srcBind; GLboolean invertTex = GL_FALSE; @@ -1436,7 +1410,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); if (type == GL_DEPTH_STENCIL) { /* XXX make this more efficient */ @@ -1469,7 +1443,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, fpv = get_color_fp_variant(st); rbRead = st_get_color_read_renderbuffer(ctx); - color = NULL; driver_fp = fpv->driver_shader; driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); @@ -1490,7 +1463,6 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, assert(type == GL_DEPTH); rbRead = st_renderbuffer(ctx->ReadBuffer-> Attachment[BUFFER_DEPTH].Renderbuffer); - color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; driver_fp = get_drawpix_z_stencil_program(st, GL_TRUE, GL_FALSE); driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); @@ -1622,7 +1594,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, num_sampler_view, driver_vp, driver_fp, fpv, - color, invertTex, GL_FALSE, GL_FALSE); + ctx->Current.Attrib[VERT_ATTRIB_COLOR0], + invertTex, GL_FALSE, GL_FALSE); pipe_resource_reference(&pt, NULL); pipe_sampler_view_reference(&sv[0], NULL); diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index e6ab77fb521..a7926295277 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -49,6 +49,7 @@ struct cached_shader /** * Simple linear list cache. * Most of the time there'll only be one cached shader. + * XXX This should be per-st_context state. */ static struct cached_shader CachedShaders[MAX_SHADERS]; static GLuint NumCachedShaders = 0; @@ -99,13 +100,14 @@ lookup_shader(struct pipe_context *pipe, return CachedShaders[i].handle; } + static void st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height) { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; - struct cso_context *cso = ctx->st->cso_context; + struct cso_context *cso = st->cso_context; struct pipe_resource *vbuffer = NULL; GLuint i, numTexCoords, numAttribs; GLboolean emitColor; @@ -116,7 +118,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); /* determine if we need vertex color */ if (ctx->FragmentProgram._Current->Base.InputsRead & VARYING_BIT_COL0) @@ -150,7 +152,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; GLfloat *vbuf = NULL; - GLuint attr; + GLuint tex_attr; u_upload_alloc(st->uploader, 0, numAttribs * 4 * 4 * sizeof(GLfloat), 4, @@ -158,12 +160,12 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, if (!vbuffer) { return; } - + z = CLAMP(z, 0.0f, 1.0f); /* positions (in clip coords) */ { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLfloat fb_width = (GLfloat)fb->Width; const GLfloat fb_height = (GLfloat)fb->Height; @@ -190,10 +192,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); semantic_names[1] = TGSI_SEMANTIC_COLOR; semantic_indexes[1] = 0; - attr = 2; + tex_attr = 2; } else { - attr = 1; + tex_attr = 1; } /* texcoords */ @@ -210,17 +212,17 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ - SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ - SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ - SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ - SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ + SET_ATTRIB(0, tex_attr, s0, t0, 0.0f, 1.0f); /* lower left */ + SET_ATTRIB(1, tex_attr, s1, t0, 0.0f, 1.0f); /* lower right */ + SET_ATTRIB(2, tex_attr, s1, t1, 0.0f, 1.0f); /* upper right */ + SET_ATTRIB(3, tex_attr, s0, t1, 0.0f, 1.0f); /* upper left */ - semantic_names[attr] = st->needs_texcoord_semantic ? + semantic_names[tex_attr] = st->needs_texcoord_semantic ? TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; /* XXX: should this use semantic index i instead of 0 ? */ - semantic_indexes[attr] = 0; + semantic_indexes[tex_attr] = 0; - attr++; + tex_attr++; } } @@ -229,15 +231,14 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, #undef SET_ATTRIB } - - cso_save_viewport(cso); - cso_save_stream_outputs(cso); - cso_save_vertex_shader(cso); - cso_save_tessctrl_shader(cso); - cso_save_tesseval_shader(cso); - cso_save_geometry_shader(cso); - cso_save_vertex_elements(cso); - cso_save_aux_vertex_buffer_slot(cso); + cso_save_state(cso, (CSO_BIT_VIEWPORT | + CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_VERTEX_SHADER | + CSO_BIT_TESSCTRL_SHADER | + CSO_BIT_TESSEVAL_SHADER | + CSO_BIT_GEOMETRY_SHADER | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT)); { void *vs = lookup_shader(pipe, numAttribs, @@ -255,11 +256,11 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; } cso_set_vertex_elements(cso, numAttribs, velements); - cso_set_stream_outputs(st->cso_context, 0, NULL, NULL); + cso_set_stream_outputs(cso, 0, NULL, NULL); /* viewport state: viewport matching window dims */ { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const struct gl_framebuffer *fb = ctx->DrawBuffer; const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); const GLfloat width = (GLfloat)fb->Width; const GLfloat height = (GLfloat)fb->Height; @@ -273,7 +274,6 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, cso_set_viewport(cso, &vp); } - util_draw_vertex_buffer(pipe, cso, vbuffer, cso_get_aux_vertex_buffer_slot(cso), offset, /* offset */ @@ -281,18 +281,10 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, 4, /* verts */ numAttribs); /* attribs/vert */ - pipe_resource_reference(&vbuffer, NULL); /* restore state */ - cso_restore_viewport(cso); - cso_restore_vertex_shader(cso); - cso_restore_tessctrl_shader(cso); - cso_restore_tesseval_shader(cso); - cso_restore_geometry_shader(cso); - cso_restore_vertex_elements(cso); - cso_restore_aux_vertex_buffer_slot(cso); - cso_restore_stream_outputs(cso); + cso_restore_state(cso); } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 2a2eb0992c8..82ab914503b 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -44,6 +44,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "st_atom.h" #include "st_context.h" #include "st_cb_fbo.h" #include "st_cb_flush.h" @@ -711,9 +712,17 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer) (void) buffer; - /* add the renderbuffer on demand */ - if (fb->_ColorReadBufferIndex >= 0) + /* Check if we need to allocate a front color buffer. + * Front buffers are often allocated on demand (other color buffers are + * always allocated in advance). + */ + if ((fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT || + fb->_ColorReadBufferIndex == BUFFER_FRONT_RIGHT) && + fb->Attachment[fb->_ColorReadBufferIndex].Type == GL_NONE) { + /* add the buffer */ st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex); + st_validate_state(st, ST_PIPELINE_RENDER); + } } diff --git a/src/mesa/state_tracker/st_cb_msaa.c b/src/mesa/state_tracker/st_cb_msaa.c index e9955b62b8f..d581f2121b0 100644 --- a/src/mesa/state_tracker/st_cb_msaa.c +++ b/src/mesa/state_tracker/st_cb_msaa.c @@ -44,7 +44,7 @@ st_GetSamplePosition(struct gl_context *ctx, { struct st_context *st = st_context(ctx); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); if (st->pipe->get_sample_position) st->pipe->get_sample_position(st->pipe, (unsigned) fb->Visual.samples, diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index ca493d84715..27cc0f3d154 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -74,6 +74,9 @@ st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog) case GL_TESS_EVALUATION_PROGRAM_NV: st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; break; + case GL_COMPUTE_PROGRAM_NV: + st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; + break; } } @@ -92,6 +95,7 @@ st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg) st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM; st->dirty.st |= ST_NEW_TESSCTRL_PROGRAM; st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; + st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; } @@ -123,6 +127,10 @@ st_new_program(struct gl_context *ctx, GLenum target, GLuint id) struct st_tesseval_program *prog = ST_CALLOC_STRUCT(st_tesseval_program); return _mesa_init_gl_program(&prog->Base.Base, target, id); } + case GL_COMPUTE_PROGRAM_NV: { + struct st_compute_program *prog = ST_CALLOC_STRUCT(st_compute_program); + return _mesa_init_gl_program(&prog->Base.Base, target, id); + } default: assert(0); return NULL; @@ -195,6 +203,17 @@ st_delete_program(struct gl_context *ctx, struct gl_program *prog) free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi); } break; + case GL_COMPUTE_PROGRAM_NV: + { + struct st_compute_program *stcp = + (struct st_compute_program *) prog; + + st_release_cp_variants(st, stcp); + + if (stcp->glsl_to_tgsi) + free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi); + } + break; default: assert(0); /* problem */ } @@ -272,6 +291,17 @@ st_program_string_notify( struct gl_context *ctx, if (st->tep == sttep) st->dirty.st |= ST_NEW_TESSEVAL_PROGRAM; } + else if (target == GL_COMPUTE_PROGRAM_NV) { + struct st_compute_program *stcp = + (struct st_compute_program *) prog; + + st_release_cp_variants(st, stcp); + if (!st_translate_compute_program(st, stcp)) + return false; + + if (st->cp == stcp) + st->dirty_cp.st |= ST_NEW_COMPUTE_PROGRAM; + } if (ST_DEBUG & DEBUG_PRECOMPILE || st->shader_has_one_variant[stage]) diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 747b41464ae..eec72f8a412 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -248,7 +248,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4]) draw_set_rasterize_stage(st->draw, st->rastpos_stage); /* make sure everything's up to date */ - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); /* This will get set only if rastpos_point(), above, gets called */ ctx->Current.RasterPosValid = GL_FALSE; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index bb36e6969d6..5153c4bbba1 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -81,7 +81,7 @@ needs_integer_signed_unsigned_conversion(const struct gl_context *ctx, * we do here should be free in such cases. */ static void -st_readpixels(struct gl_context *ctx, GLint x, GLint y, +st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, @@ -104,7 +104,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, /* Validate state (to be sure we have up-to-date framebuffer surfaces) * and flush the bitmap cache prior to reading. */ - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); st_flush_bitmap_cache(st); if (!st->prefer_blit_based_texture_transfer) { @@ -257,5 +257,5 @@ fallback: void st_init_readpixels_functions(struct dd_function_table *functions) { - functions->ReadPixels = st_readpixels; + functions->ReadPixels = st_ReadPixels; } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index d53126a9441..cfec627f10c 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1272,10 +1272,11 @@ create_pbo_upload_fs(struct st_context *st) ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); } + /* temp0.w = 0 */ + ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0)); + /* out = txf(sampler, temp0.x) */ - ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, - ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X), - sampler); + ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler); ureg_release_temporary(ureg, temp0); @@ -1297,6 +1298,7 @@ try_pbo_upload_common(struct gl_context *ctx, unsigned image_height) { struct st_context *st = st_context(ctx); + struct cso_context *cso = st->cso_context; struct pipe_context *pipe = st->pipe; unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1; unsigned skip_pixels = 0; @@ -1333,6 +1335,20 @@ try_pbo_upload_common(struct gl_context *ctx, return false; } + cso_save_state(cso, (CSO_BIT_FRAGMENT_SAMPLER_VIEWS | + CSO_BIT_FRAGMENT_SAMPLERS | + CSO_BIT_VERTEX_ELEMENTS | + CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BIT_FRAMEBUFFER | + CSO_BIT_VIEWPORT | + CSO_BIT_BLEND | + CSO_BIT_DEPTH_STENCIL_ALPHA | + CSO_BIT_RASTERIZER | + CSO_BIT_STREAM_OUTPUTS | + CSO_BITS_ALL_SHADERS)); + cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); + + /* Set up the sampler_view */ { unsigned first_element = buf_offset; @@ -1340,14 +1356,17 @@ try_pbo_upload_common(struct gl_context *ctx, + (upload_height - 1 + (depth - 1) * image_height) * stride; struct pipe_sampler_view templ; struct pipe_sampler_view *sampler_view; + struct pipe_sampler_state sampler = {0}; + const struct pipe_sampler_state *samplers[1] = {&sampler}; /* This should be ensured by Mesa before calling our callbacks */ assert((last_element + 1) * bytes_per_pixel <= buffer->width0); if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1) - return false; + goto fail; memset(&templ, 0, sizeof(templ)); + templ.target = PIPE_BUFFER; templ.format = src_format; templ.u.buf.first_element = first_element; templ.u.buf.last_element = last_element; @@ -1358,13 +1377,13 @@ try_pbo_upload_common(struct gl_context *ctx, sampler_view = pipe->create_sampler_view(pipe, buffer, &templ); if (sampler_view == NULL) - return false; + goto fail; - cso_save_fragment_sampler_views(st->cso_context); - cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1, - &sampler_view); + cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1, &sampler_view); pipe_sampler_view_reference(&sampler_view, NULL); + + cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, samplers); } /* Upload vertices */ @@ -1386,7 +1405,7 @@ try_pbo_upload_common(struct gl_context *ctx, u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4, &vbo.buffer_offset, &vbo.buffer, (void **) &verts); if (!verts) - goto fail_vertex_upload; + goto fail; verts[0] = x0; verts[1] = y0; @@ -1401,30 +1420,28 @@ try_pbo_upload_common(struct gl_context *ctx, velem.src_offset = 0; velem.instance_divisor = 0; - velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context); + velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso); velem.src_format = PIPE_FORMAT_R32G32_FLOAT; - cso_save_vertex_elements(st->cso_context); - cso_set_vertex_elements(st->cso_context, 1, &velem); + cso_set_vertex_elements(cso, 1, &velem); - cso_save_aux_vertex_buffer_slot(st->cso_context); - cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index, - 1, &vbo); + cso_set_vertex_buffers(cso, velem.vertex_buffer_index, 1, &vbo); pipe_resource_reference(&vbo.buffer, NULL); } /* Upload constants */ + /* Note: the user buffer must be valid until draw time */ + struct { + int32_t xoffset; + int32_t yoffset; + int32_t stride; + int32_t image_size; + } constants; + { struct pipe_constant_buffer cb; - struct { - int32_t xoffset; - int32_t yoffset; - int32_t stride; - int32_t image_size; - } constants; - constants.xoffset = -xoffset + skip_pixels; constants.yoffset = -yoffset; constants.stride = stride; @@ -1434,10 +1451,10 @@ try_pbo_upload_common(struct gl_context *ctx, cb.buffer = NULL; cb.user_buffer = NULL; u_upload_data(st->constbuf_uploader, 0, sizeof(constants), - st->ctx->Const.UniformBufferOffsetAlignment, + ctx->Const.UniformBufferOffsetAlignment, &constants, &cb.buffer_offset, &cb.buffer); if (!cb.buffer) - goto fail_constant_upload; + goto fail; u_upload_unmap(st->constbuf_uploader); } else { @@ -1447,8 +1464,7 @@ try_pbo_upload_common(struct gl_context *ctx, } cb.buffer_size = sizeof(constants); - cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); - cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb); + cso_set_constant_buffer(cso, PIPE_SHADER_FRAGMENT, 0, &cb); pipe_resource_reference(&cb.buffer, NULL); } @@ -1462,80 +1478,52 @@ try_pbo_upload_common(struct gl_context *ctx, fb.nr_cbufs = 1; pipe_surface_reference(&fb.cbufs[0], surface); - cso_save_framebuffer(st->cso_context); - cso_set_framebuffer(st->cso_context, &fb); + cso_set_framebuffer(cso, &fb); pipe_surface_reference(&fb.cbufs[0], NULL); } - /* Viewport state */ - { - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * surface->width; - vp.scale[1] = 0.5f * surface->height; - vp.scale[2] = 1.0f; - vp.translate[0] = 0.5f * surface->width; - vp.translate[1] = 0.5f * surface->height; - vp.translate[2] = 0.0f; - - cso_save_viewport(st->cso_context); - cso_set_viewport(st->cso_context, &vp); - } + cso_set_viewport_dims(cso, surface->width, surface->height, FALSE); /* Blend state */ - cso_save_blend(st->cso_context); - cso_set_blend(st->cso_context, &st->pbo_upload.blend); + cso_set_blend(cso, &st->pbo_upload.blend); + + /* Depth/stencil/alpha state */ + { + struct pipe_depth_stencil_alpha_state dsa; + memset(&dsa, 0, sizeof(dsa)); + cso_set_depth_stencil_alpha(cso, &dsa); + } /* Rasterizer state */ - cso_save_rasterizer(st->cso_context); - cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster); + cso_set_rasterizer(cso, &st->pbo_upload.raster); /* Set up the shaders */ - cso_save_vertex_shader(st->cso_context); - cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs); + cso_set_vertex_shader_handle(cso, st->pbo_upload.vs); - cso_save_geometry_shader(st->cso_context); - cso_set_geometry_shader_handle(st->cso_context, - depth != 1 ? st->pbo_upload.gs : NULL); + cso_set_geometry_shader_handle(cso, depth != 1 ? st->pbo_upload.gs : NULL); - cso_save_tessctrl_shader(st->cso_context); - cso_set_tessctrl_shader_handle(st->cso_context, NULL); + cso_set_tessctrl_shader_handle(cso, NULL); - cso_save_tesseval_shader(st->cso_context); - cso_set_tesseval_shader_handle(st->cso_context, NULL); + cso_set_tesseval_shader_handle(cso, NULL); - cso_save_fragment_shader(st->cso_context); - cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs); + cso_set_fragment_shader_handle(cso, st->pbo_upload.fs); /* Disable stream output */ - cso_save_stream_outputs(st->cso_context); - cso_set_stream_outputs(st->cso_context, 0, NULL, 0); + cso_set_stream_outputs(cso, 0, NULL, 0); if (depth == 1) { - cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + cso_draw_arrays(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); } else { - cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, + cso_draw_arrays_instanced(cso, PIPE_PRIM_TRIANGLE_STRIP, 0, 4, 0, depth); } success = true; - cso_restore_framebuffer(st->cso_context); - cso_restore_viewport(st->cso_context); - cso_restore_blend(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_vertex_shader(st->cso_context); - cso_restore_geometry_shader(st->cso_context); - cso_restore_tessctrl_shader(st->cso_context); - cso_restore_tesseval_shader(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_stream_outputs(st->cso_context); - cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT); -fail_constant_upload: - cso_restore_vertex_elements(st->cso_context); - cso_restore_aux_vertex_buffer_slot(st->cso_context); -fail_vertex_upload: - cso_restore_fragment_sampler_views(st->cso_context); +fail: + cso_restore_state(cso); + cso_restore_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); return success; } @@ -2752,7 +2740,7 @@ st_finalize_texture(struct gl_context *ctx, { struct st_context *st = st_context(ctx); struct st_texture_object *stObj = st_texture_object(tObj); - const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + const GLuint nr_faces = _mesa_num_tex_faces(stObj->base.Target); GLuint face; const struct st_texture_image *firstImage; enum pipe_format firstImageFormat; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 9016846b148..e3ddee660f7 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -43,6 +43,7 @@ #include "st_cb_blit.h" #include "st_cb_bufferobjects.h" #include "st_cb_clear.h" +#include "st_cb_compute.h" #include "st_cb_condrender.h" #include "st_cb_copyimage.h" #include "st_cb_drawpixels.h" @@ -138,8 +139,11 @@ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state) st->dirty.st |= ST_NEW_VERTEX_PROGRAM; } + /* Invalidate render and compute pipelines. */ st->dirty.mesa |= new_state; st->dirty.st |= ST_NEW_MESA; + st->dirty_cp.mesa |= new_state; + st->dirty_cp.st |= ST_NEW_MESA; /* This is the only core Mesa module we depend upon. * No longer use swrast, swsetup, tnl. @@ -182,6 +186,10 @@ st_destroy_context_priv(struct st_context *st) u_upload_destroy(st->constbuf_uploader); } + /* free glDrawPixels cache data */ + free(st->drawpix_cache.image); + pipe_resource_reference(&st->drawpix_cache.texture, NULL); + cso_destroy_context(st->cso_context); free( st ); } @@ -208,8 +216,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* state tracker needs the VBO module */ _vbo_CreateContext(ctx); + /* Initialize render and compute pipelines flags */ st->dirty.mesa = ~0; st->dirty.st = ~0; + st->dirty_cp.mesa = ~0; + st->dirty_cp.st = ~0; /* Create upload manager for vertex data for glBitmap, glDrawPixels, * glClear, etc. @@ -241,16 +252,30 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, else st->internal_target = PIPE_TEXTURE_RECT; - /* Vertex element objects used for drawing rectangles for glBitmap, - * glDrawPixels, glClear, etc. + /* Setup vertex element info for 'struct st_util_vertex'. */ - for (i = 0; i < ARRAY_SIZE(st->velems_util_draw); i++) { - memset(&st->velems_util_draw[i], 0, sizeof(struct pipe_vertex_element)); - st->velems_util_draw[i].src_offset = i * 4 * sizeof(float); - st->velems_util_draw[i].instance_divisor = 0; - st->velems_util_draw[i].vertex_buffer_index = - cso_get_aux_vertex_buffer_slot(st->cso_context); - st->velems_util_draw[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + { + const unsigned slot = cso_get_aux_vertex_buffer_slot(st->cso_context); + + /* If this assertion ever fails all state tracker calls to + * cso_get_aux_vertex_buffer_slot() should be audited. This + * particular call would have to be moved to just before each + * drawing call. + */ + assert(slot == 0); + + STATIC_ASSERT(sizeof(struct st_util_vertex) == 9 * sizeof(float)); + + memset(&st->util_velems, 0, sizeof(st->util_velems)); + st->util_velems[0].src_offset = 0; + st->util_velems[0].vertex_buffer_index = slot; + st->util_velems[0].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + st->util_velems[1].src_offset = 3 * sizeof(float); + st->util_velems[1].vertex_buffer_index = slot; + st->util_velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + st->util_velems[2].src_offset = 7 * sizeof(float); + st->util_velems[2].vertex_buffer_index = slot; + st->util_velems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; } /* we want all vertex data to be placed in buffer objects */ @@ -262,9 +287,9 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* Need these flags: */ - st->ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; st->has_stencil_export = screen->get_param(screen, PIPE_CAP_SHADER_STENCIL_EXPORT); @@ -328,8 +353,8 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, /* called after _mesa_create_context/_mesa_init_point, fix default user * settable max point size up */ - st->ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize, - ctx->Const.MaxPointSizeAA); + ctx->Point.MaxSize = MAX2(ctx->Const.MaxPointSize, + ctx->Const.MaxPointSizeAA); /* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */ ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3; @@ -377,6 +402,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f) f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS; f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER; f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER; + f->NewImageUnits = ST_NEW_IMAGE_UNITS; } struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, @@ -440,6 +466,7 @@ void st_destroy_context( struct st_context *st ) st_reference_vertprog(st, &st->vp, NULL); st_reference_tesscprog(st, &st->tcp, NULL); st_reference_tesseprog(st, &st->tep, NULL); + st_reference_compprog(st, &st->cp, NULL); /* release framebuffer surfaces */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { @@ -449,7 +476,7 @@ void st_destroy_context( struct st_context *st ) pipe_sampler_view_reference(&st->pixel_xfer.pixelmap_sampler_view, NULL); pipe_resource_reference(&st->pixel_xfer.pixelmap_texture, NULL); - _vbo_DestroyContext(st->ctx); + _vbo_DestroyContext(ctx); st_destroy_program_variants(st); @@ -503,6 +530,7 @@ void st_init_driver_functions(struct pipe_screen *screen, st_init_flush_functions(screen, functions); st_init_string_functions(functions); st_init_viewport_functions(functions); + st_init_compute_functions(functions); st_init_xformfb_functions(functions); st_init_syncobj_functions(functions); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 9a80f4bae70..f960c64cbe8 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -64,6 +64,8 @@ struct u_upload_mgr; #define ST_NEW_SAMPLER_VIEWS (1 << 11) #define ST_NEW_ATOMIC_BUFFER (1 << 12) #define ST_NEW_STORAGE_BUFFER (1 << 13) +#define ST_NEW_COMPUTE_PROGRAM (1 << 14) +#define ST_NEW_IMAGE_UNITS (1 << 15) struct st_state_flags { @@ -78,6 +80,23 @@ struct st_tracked_state { }; +/** + * Enumeration of state tracker pipelines. + */ +enum st_pipeline { + ST_PIPELINE_RENDER, + ST_PIPELINE_COMPUTE, +}; + + +/** For drawing quads for glClear, glDraw/CopyPixels, glBitmap, etc. */ +struct st_util_vertex +{ + float x, y, z; + float r, g, b, a; + float s, t; +}; + struct st_context { @@ -153,6 +172,7 @@ struct st_context char renderer[100]; struct st_state_flags dirty; + struct st_state_flags dirty_cp; GLboolean vertdata_edgeflags; GLboolean edgeflag_culls_prims; @@ -165,12 +185,14 @@ struct st_context struct st_geometry_program *gp; /**< Currently bound geometry program */ struct st_tessctrl_program *tcp; /**< Currently bound tess control program */ struct st_tesseval_program *tep; /**< Currently bound tess eval program */ + struct st_compute_program *cp; /**< Currently bound compute program */ struct st_vp_variant *vp_variant; struct st_fp_variant *fp_variant; struct st_basic_variant *gp_variant; struct st_basic_variant *tcp_variant; struct st_basic_variant *tep_variant; + struct st_basic_variant *cp_variant; struct gl_texture_object *default_texture; @@ -183,6 +205,7 @@ struct st_context struct { struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_sampler_state atlas_sampler; enum pipe_format tex_format; void *vs; struct bitmap_cache *cache; @@ -194,6 +217,14 @@ struct st_context void *vert_shaders[2]; /**< ureg shaders */ } drawpix; + struct { + GLsizei width, height; + GLenum format, type; + const void *user_pointer; /**< Last user 'pixels' pointer */ + void *image; /**< Copy of the glDrawPixels image data */ + struct pipe_resource *texture; + } drawpix_cache; + /** for glClear */ struct { struct pipe_rasterizer_state raster; @@ -217,8 +248,8 @@ struct st_context bool use_gs; } pbo_upload; - /** used for anything using util_draw_vertex_buffer */ - struct pipe_vertex_element velems_util_draw[3]; + /** for drawing with st_util_vertex */ + struct pipe_vertex_element util_velems[3]; void *passthrough_fs; /**< simple pass-through frag shader */ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 10e294cd147..2de6620602d 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -60,7 +60,7 @@ #include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_prim.h" -#include "util/u_draw_quad.h" +#include "util/u_draw.h" #include "util/u_upload_mgr.h" #include "draw/draw_context.h" #include "cso_cache/cso_context.h" @@ -202,7 +202,7 @@ st_draw_vbo(struct gl_context *ctx, /* Validate state. */ if (st->dirty.st || ctx->NewDriverState) { - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); #if 0 if (MESA_VERBOSE & VERBOSE_GLSL) { @@ -315,7 +315,7 @@ st_indirect_draw_vbo(struct gl_context *ctx, /* Validate state. */ if (st->dirty.st || ctx->NewDriverState) { - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); } if (st->vertex_array_out_of_memory) { @@ -398,3 +398,93 @@ st_destroy_draw(struct st_context *st) { draw_destroy(st->draw); } + + +/** + * Draw a quad with given position, texcoords and color. + */ +bool +st_draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, float z, + float s0, float t0, float s1, float t1, + const float *color, + unsigned num_instances) +{ + struct pipe_vertex_buffer vb = {0}; + struct st_util_vertex *verts; + + vb.stride = sizeof(struct st_util_vertex); + + u_upload_alloc(st->uploader, 0, 4 * sizeof(struct st_util_vertex), 4, + &vb.buffer_offset, &vb.buffer, (void **) &verts); + if (!vb.buffer) { + return false; + } + + /* lower-left */ + verts[0].x = x0; + verts[0].y = y1; + verts[0].z = z; + verts[0].r = color[0]; + verts[0].g = color[1]; + verts[0].b = color[2]; + verts[0].a = color[3]; + verts[0].s = s0; + verts[0].t = t0; + + /* lower-right */ + verts[1].x = x1; + verts[1].y = y1; + verts[1].z = z; + verts[1].r = color[0]; + verts[1].g = color[1]; + verts[1].b = color[2]; + verts[1].a = color[3]; + verts[1].s = s1; + verts[1].t = t0; + + /* upper-right */ + verts[2].x = x1; + verts[2].y = y0; + verts[2].z = z; + verts[2].r = color[0]; + verts[2].g = color[1]; + verts[2].b = color[2]; + verts[2].a = color[3]; + verts[2].s = s1; + verts[2].t = t1; + + /* upper-left */ + verts[3].x = x0; + verts[3].y = y0; + verts[3].z = z; + verts[3].r = color[0]; + verts[3].g = color[1]; + verts[3].b = color[2]; + verts[3].a = color[3]; + verts[3].s = s0; + verts[3].t = t1; + + u_upload_unmap(st->uploader); + + /* At the time of writing, cso_get_aux_vertex_buffer_slot() always returns + * zero. If that ever changes we need to audit the calls to that function + * and make sure the slot number is used consistently everywhere. + */ + assert(cso_get_aux_vertex_buffer_slot(st->cso_context) == 0); + + cso_set_vertex_buffers(st->cso_context, + cso_get_aux_vertex_buffer_slot(st->cso_context), + 1, &vb); + + if (num_instances > 1) { + cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4, + 0, num_instances); + } else { + cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_FAN, 0, 4); + } + + pipe_resource_reference(&vb.buffer, NULL); + + return true; +} diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index a973c8a4a5d..d85c3b7facd 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -85,4 +85,11 @@ pointer_to_offset(const void *ptr) } +bool +st_draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, float z, + float s0, float t0, float s1, float t1, + const float *color, + unsigned num_instances); + #endif diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index b6e6dea5b27..9f48945d74d 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -140,7 +140,7 @@ st_feedback_draw_vbo(struct gl_context *ctx, st_flush_bitmap_cache(st); - st_validate_state(st); + st_validate_state(st, ST_PIPELINE_RENDER); if (!index_bounds_valid) vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index feabe6290eb..24c64447f44 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -75,6 +75,7 @@ static int _clamp(int a, int min, int max) void st_init_limits(struct pipe_screen *screen, struct gl_constants *c, struct gl_extensions *extensions) { + int supported_irs; unsigned sh; boolean can_ubo = TRUE; @@ -174,9 +175,19 @@ void st_init_limits(struct pipe_screen *screen, pc = &c->Program[MESA_SHADER_TESS_EVAL]; options = &c->ShaderCompilerOptions[MESA_SHADER_TESS_EVAL]; break; + case PIPE_SHADER_COMPUTE: + pc = &c->Program[MESA_SHADER_COMPUTE]; + options = &c->ShaderCompilerOptions[MESA_SHADER_COMPUTE]; + + if (!screen->get_param(screen, PIPE_CAP_COMPUTE)) + continue; + supported_irs = + screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUPPORTED_IRS); + if (!(supported_irs & (1 << PIPE_SHADER_IR_TGSI))) + continue; + break; default: - /* compute shader, etc. */ - continue; + assert(0); } pc->MaxTextureImageUnits = @@ -223,6 +234,9 @@ void st_init_limits(struct pipe_screen *screen, screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2; pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers; + pc->MaxImageUniforms = screen->get_shader_param( + screen, sh, PIPE_SHADER_CAP_MAX_SHADER_IMAGES); + /* Gallium doesn't really care about local vs. env parameters so use the * same limits. */ @@ -261,6 +275,9 @@ void st_init_limits(struct pipe_screen *screen, options->LowerClipDistance = true; options->LowerBufferInterfaceBlocks = true; + + if (sh == PIPE_SHADER_COMPUTE) + options->LowerShaderSharedVariables = true; } c->LowerTessLevel = true; @@ -270,7 +287,8 @@ void st_init_limits(struct pipe_screen *screen, c->Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits + c->Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits + c->Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + - c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits, + c->Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits + + c->Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits, MAX_COMBINED_TEXTURE_IMAGE_UNITS); /* This depends on program constants. */ @@ -330,7 +348,8 @@ void st_init_limits(struct pipe_screen *screen, c->Program[MESA_SHADER_TESS_CTRL].MaxUniformBlocks + c->Program[MESA_SHADER_TESS_EVAL].MaxUniformBlocks + c->Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks + - c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks; + c->Program[MESA_SHADER_FRAGMENT].MaxUniformBlocks + + c->Program[MESA_SHADER_COMPUTE].MaxUniformBlocks; assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS); } @@ -363,6 +382,21 @@ void st_init_limits(struct pipe_screen *screen, c->MaxShaderStorageBlockSize = 1 << 27; extensions->ARB_shader_storage_buffer_object = GL_TRUE; } + + c->MaxCombinedImageUniforms = + c->Program[MESA_SHADER_VERTEX].MaxImageUniforms + + c->Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms + + c->Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms + + c->Program[MESA_SHADER_GEOMETRY].MaxImageUniforms + + c->Program[MESA_SHADER_FRAGMENT].MaxImageUniforms + + c->Program[MESA_SHADER_COMPUTE].MaxImageUniforms; + c->MaxCombinedShaderOutputResources += c->MaxCombinedImageUniforms; + c->MaxImageUnits = MAX_IMAGE_UNITS; + c->MaxImageSamples = 0; /* XXX */ + if (c->MaxCombinedImageUniforms) { + extensions->ARB_shader_image_load_store = GL_TRUE; + extensions->ARB_shader_image_size = GL_TRUE; + } } @@ -1012,4 +1046,31 @@ void st_init_extensions(struct pipe_screen *screen, if ((ST_DEBUG & DEBUG_GREMEDY) && screen->get_param(screen, PIPE_CAP_STRING_MARKER)) extensions->GREMEDY_string_marker = GL_TRUE; + + if (screen->get_param(screen, PIPE_CAP_COMPUTE)) { + int compute_supported_irs = + screen->get_shader_param(screen, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_SUPPORTED_IRS); + if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { + uint64_t grid_size[3], block_size[3]; + + screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE, + grid_size); + screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, + block_size); + screen->get_compute_param(screen, + PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, + &consts->MaxComputeWorkGroupInvocations); + screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, + &consts->MaxComputeSharedMemorySize); + + for (i = 0; i < 3; i++) { + consts->MaxComputeWorkGroupCount[i] = grid_size[i]; + consts->MaxComputeWorkGroupSize[i] = block_size[i]; + } + /* XXX: ARB_compute_shader is not enabled by default because images + * support is still not implemented yet. */ + /* extensions->ARB_compute_shader = true; */ + } + } } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 2b92bade440..82bf3a185ad 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1484,6 +1484,48 @@ static const struct format_mapping format_map[] = { { PIPE_FORMAT_ETC1_RGB8, 0 } }, + /* ETC2 */ + { + { GL_COMPRESSED_RGB8_ETC2, 0 }, + { PIPE_FORMAT_ETC2_RGB8, 0 } + }, + { + { GL_COMPRESSED_SRGB8_ETC2, 0 }, + { PIPE_FORMAT_ETC2_SRGB8, 0 } + }, + { + { GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 }, + { PIPE_FORMAT_ETC2_RGB8A1, 0 } + }, + { + { GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2, 0 }, + { PIPE_FORMAT_ETC2_SRGB8A1, 0 } + }, + { + { GL_COMPRESSED_RGBA8_ETC2_EAC, 0 }, + { PIPE_FORMAT_ETC2_RGBA8, 0 } + }, + { + { GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC, 0 }, + { PIPE_FORMAT_ETC2_SRGBA8, 0 } + }, + { + { GL_COMPRESSED_R11_EAC, 0 }, + { PIPE_FORMAT_ETC2_R11_UNORM, 0 } + }, + { + { GL_COMPRESSED_SIGNED_R11_EAC, 0 }, + { PIPE_FORMAT_ETC2_R11_SNORM, 0 } + }, + { + { GL_COMPRESSED_RG11_EAC, 0 }, + { PIPE_FORMAT_ETC2_RG11_UNORM, 0 } + }, + { + { GL_COMPRESSED_SIGNED_RG11_EAC, 0 }, + { PIPE_FORMAT_ETC2_RG11_SNORM, 0 } + }, + /* BPTC */ { { GL_COMPRESSED_RGBA_BPTC_UNORM, 0 }, diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index 90e00e8ebf6..3e10aa64bc6 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -36,6 +36,10 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" +#ifdef __cplusplus +extern "C" { +#endif + struct gl_context; struct pipe_screen; @@ -77,4 +81,8 @@ st_translate_color(const union gl_color_union *colorIn, union pipe_color_union *colorOut, GLenum baseFormat, GLboolean is_integer); +#ifdef __cplusplus +} +#endif + #endif /* ST_FORMAT_H */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 2ad91ecf4df..943582d447a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -40,6 +40,7 @@ #include "main/shaderobj.h" #include "main/uniforms.h" #include "main/shaderapi.h" +#include "main/shaderimage.h" #include "program/prog_instruction.h" #include "pipe/p_context.h" @@ -50,6 +51,7 @@ #include "util/u_memory.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "st_format.h" #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ @@ -262,6 +264,7 @@ public: int tex_target; /**< One of TEXTURE_*_INDEX */ glsl_base_type tex_type; GLboolean tex_shadow; + unsigned image_format; st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned tex_offset_num_offset; @@ -395,6 +398,9 @@ public: glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */ int buffers_used; + int images_used; + int image_targets[PIPE_MAX_SHADER_IMAGES]; + unsigned image_formats[PIPE_MAX_SHADER_IMAGES]; bool indirect_addr_consts; int wpos_transform_const; @@ -402,6 +408,7 @@ public: bool native_integers; bool have_sqrt; bool have_fma; + bool use_shared_memory; variable_storage *find_variable_storage(ir_variable *var); @@ -451,6 +458,8 @@ public: void visit_atomic_counter_intrinsic(ir_call *); void visit_ssbo_intrinsic(ir_call *); void visit_membar_intrinsic(ir_call *); + void visit_shared_intrinsic(ir_call *); + void visit_image_intrinsic(ir_call *); st_src_reg result; @@ -1214,6 +1223,7 @@ attrib_type_size(const struct glsl_type *type, bool is_vs_input) case GLSL_TYPE_INTERFACE: case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: + case GLSL_TYPE_FUNCTION: assert(!"Invalid type in type_size"); break; } @@ -1969,6 +1979,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_u2i: /* Converting between signed and unsigned integers is a no-op. */ result_src = op[0]; + result_src.type = result_dst.type; break; case ir_unop_b2i: if (native_integers) { @@ -3341,6 +3352,239 @@ glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) } void +glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); + + /* Calculate the surface offset */ + offset->accept(this); + st_src_reg off = this->result; + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_load_shared", callee)) { + inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); + inst->buffer = buffer; + } else if (!strcmp("__intrinsic_store_shared", callee)) { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + dst.writemask = write_mask->value.u[0]; + + dst.type = this->result.type; + inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); + inst->buffer = buffer; + } else { + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + + st_src_reg data = this->result, data2 = undef_src; + unsigned opcode; + if (!strcmp("__intrinsic_atomic_add_shared", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_atomic_min_shared", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_atomic_max_shared", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_atomic_and_shared", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_atomic_or_shared", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_atomic_xor_shared", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_atomic_exchange_shared", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + opcode = TGSI_OPCODE_ATOMCAS; + param = param->get_next(); + val = ((ir_instruction *)param)->as_rvalue(); + val->accept(this); + data2 = this->result; + } else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, off, data, data2); + inst->buffer = buffer; + } +} + +void +glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) +{ + const char *callee = ir->callee->function_name(); + exec_node *param = ir->actual_parameters.get_head(); + + ir_dereference *img = (ir_dereference *)param; + const ir_variable *imgvar = img->variable_referenced(); + const glsl_type *type = imgvar->type->without_array(); + unsigned sampler_array_size = 1, sampler_base = 0; + + st_src_reg reladdr; + st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); + + get_deref_offsets(img, &sampler_array_size, &sampler_base, + (unsigned int *)&image.index, &reladdr); + if (reladdr.file != PROGRAM_UNDEFINED) { + emit_arl(ir, sampler_reladdr, reladdr); + image.reladdr = ralloc(mem_ctx, st_src_reg); + memcpy(image.reladdr, &sampler_reladdr, sizeof(reladdr)); + } + + st_dst_reg dst = undef_dst; + if (ir->return_deref) { + ir->return_deref->accept(this); + dst = st_dst_reg(this->result); + dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; + } + + glsl_to_tgsi_instruction *inst; + + if (!strcmp("__intrinsic_image_size", callee)) { + dst.writemask = WRITEMASK_XYZ; + inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); + } else if (!strcmp("__intrinsic_image_samples", callee)) { + st_src_reg res = get_temp(glsl_type::ivec4_type); + st_dst_reg dstres = st_dst_reg(res); + dstres.writemask = WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_RESQ, dstres); + res.swizzle = SWIZZLE_WWWW; + inst = emit_asm(ir, TGSI_OPCODE_MOV, dst, res); + } else { + st_src_reg arg1 = undef_src, arg2 = undef_src; + st_src_reg coord; + st_dst_reg coord_dst; + coord = get_temp(glsl_type::ivec4_type); + coord_dst = st_dst_reg(coord); + coord_dst.writemask = (1 << type->coordinate_components()) - 1; + param = param->get_next(); + ((ir_dereference *)param)->accept(this); + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); + coord.swizzle = SWIZZLE_XXXX; + switch (type->coordinate_components()) { + case 4: assert(!"unexpected coord count"); + /* fallthrough */ + case 3: coord.swizzle |= SWIZZLE_Z << 6; + /* fallthrough */ + case 2: coord.swizzle |= SWIZZLE_Y << 3; + } + + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + param = param->get_next(); + ((ir_dereference *)param)->accept(this); + st_src_reg sample = this->result; + sample.swizzle = SWIZZLE_XXXX; + coord_dst.writemask = WRITEMASK_W; + emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); + coord.swizzle |= SWIZZLE_W << 9; + } + + param = param->get_next(); + if (!param->is_tail_sentinel()) { + ((ir_dereference *)param)->accept(this); + arg1 = this->result; + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + ((ir_dereference *)param)->accept(this); + arg2 = this->result; + param = param->get_next(); + } + + assert(param->is_tail_sentinel()); + + unsigned opcode; + if (!strcmp("__intrinsic_image_load", callee)) + opcode = TGSI_OPCODE_LOAD; + else if (!strcmp("__intrinsic_image_store", callee)) + opcode = TGSI_OPCODE_STORE; + else if (!strcmp("__intrinsic_image_atomic_add", callee)) + opcode = TGSI_OPCODE_ATOMUADD; + else if (!strcmp("__intrinsic_image_atomic_min", callee)) + opcode = TGSI_OPCODE_ATOMIMIN; + else if (!strcmp("__intrinsic_image_atomic_max", callee)) + opcode = TGSI_OPCODE_ATOMIMAX; + else if (!strcmp("__intrinsic_image_atomic_and", callee)) + opcode = TGSI_OPCODE_ATOMAND; + else if (!strcmp("__intrinsic_image_atomic_or", callee)) + opcode = TGSI_OPCODE_ATOMOR; + else if (!strcmp("__intrinsic_image_atomic_xor", callee)) + opcode = TGSI_OPCODE_ATOMXOR; + else if (!strcmp("__intrinsic_image_atomic_exchange", callee)) + opcode = TGSI_OPCODE_ATOMXCHG; + else if (!strcmp("__intrinsic_image_atomic_comp_swap", callee)) + opcode = TGSI_OPCODE_ATOMCAS; + else { + assert(!"Unexpected intrinsic"); + return; + } + + inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); + if (opcode == TGSI_OPCODE_STORE) + inst->dst[0].writemask = WRITEMASK_XYZW; + } + + inst->buffer = image; + inst->sampler_array_size = sampler_array_size; + inst->sampler_base = sampler_base; + + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + inst->tex_target = (type->sampler_array) + ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + break; + case GLSL_SAMPLER_DIM_2D: + inst->tex_target = (type->sampler_array) + ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + break; + case GLSL_SAMPLER_DIM_3D: + inst->tex_target = TEXTURE_3D_INDEX; + break; + case GLSL_SAMPLER_DIM_CUBE: + inst->tex_target = (type->sampler_array) + ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; + break; + case GLSL_SAMPLER_DIM_RECT: + inst->tex_target = TEXTURE_RECT_INDEX; + break; + case GLSL_SAMPLER_DIM_BUF: + inst->tex_target = TEXTURE_BUFFER_INDEX; + break; + case GLSL_SAMPLER_DIM_EXTERNAL: + inst->tex_target = TEXTURE_EXTERNAL_INDEX; + break; + case GLSL_SAMPLER_DIM_MS: + inst->tex_target = (type->sampler_array) + ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + break; + default: + assert(!"Should not get here."); + } + + inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), + _mesa_get_shader_image_format(imgvar->data.image_format)); +} + +void glsl_to_tgsi_visitor::visit(ir_call *ir) { glsl_to_tgsi_instruction *call_inst; @@ -3381,6 +3625,36 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) return; } + if (!strcmp("__intrinsic_load_shared", callee) || + !strcmp("__intrinsic_store_shared", callee) || + !strcmp("__intrinsic_atomic_add_shared", callee) || + !strcmp("__intrinsic_atomic_min_shared", callee) || + !strcmp("__intrinsic_atomic_max_shared", callee) || + !strcmp("__intrinsic_atomic_and_shared", callee) || + !strcmp("__intrinsic_atomic_or_shared", callee) || + !strcmp("__intrinsic_atomic_xor_shared", callee) || + !strcmp("__intrinsic_atomic_exchange_shared", callee) || + !strcmp("__intrinsic_atomic_comp_swap_shared", callee)) { + visit_shared_intrinsic(ir); + return; + } + + if (!strcmp("__intrinsic_image_load", callee) || + !strcmp("__intrinsic_image_store", callee) || + !strcmp("__intrinsic_image_atomic_add", callee) || + !strcmp("__intrinsic_image_atomic_min", callee) || + !strcmp("__intrinsic_image_atomic_max", callee) || + !strcmp("__intrinsic_image_atomic_and", callee) || + !strcmp("__intrinsic_image_atomic_or", callee) || + !strcmp("__intrinsic_image_atomic_xor", callee) || + !strcmp("__intrinsic_image_atomic_exchange", callee) || + !strcmp("__intrinsic_image_atomic_comp_swap", callee) || + !strcmp("__intrinsic_image_size", callee) || + !strcmp("__intrinsic_image_samples", callee)) { + visit_image_intrinsic(ir); + return; + } + entry = get_function_signature(sig); /* Process in parameters. */ foreach_two_lists(formal_node, &sig->parameters, @@ -3980,6 +4254,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_address_regs = 0; samplers_used = 0; buffers_used = 0; + images_used = 0; indirect_addr_consts = false; wpos_transform_const = -1; glsl_version = 0; @@ -3992,6 +4267,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() options = NULL; have_sqrt = false; have_fma = false; + use_shared_memory = false; } glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() @@ -4015,6 +4291,7 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) { v->samplers_used = 0; v->buffers_used = 0; + v->images_used = 0; foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { if (inst->info->is_tex) { @@ -4035,8 +4312,20 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) if (inst->buffer.file != PROGRAM_UNDEFINED && ( is_resource_instruction(inst->op) || inst->op == TGSI_OPCODE_STORE)) { - if (inst->buffer.file == PROGRAM_BUFFER) + if (inst->buffer.file == PROGRAM_BUFFER) { v->buffers_used |= 1 << inst->buffer.index; + } else if (inst->buffer.file == PROGRAM_MEMORY) { + v->use_shared_memory = true; + } else { + assert(inst->buffer.file == PROGRAM_IMAGE); + for (int i = 0; i < inst->sampler_array_size; i++) { + unsigned idx = inst->sampler_base + i; + v->images_used |= 1 << idx; + v->image_targets[idx] = + st_translate_texture_target(inst->tex_target, false); + v->image_formats[idx] = inst->image_format; + } + } } } prog->SamplersUsed = v->samplers_used; @@ -4819,7 +5108,9 @@ struct st_translate { struct ureg_dst address[3]; struct ureg_src samplers[PIPE_MAX_SAMPLERS]; struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; + struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; struct ureg_src systemValues[SYSTEM_VALUE_MAX]; + struct ureg_src shared_memory; struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; unsigned *array_sizes; struct array_decl *input_arrays; @@ -4880,6 +5171,12 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { TGSI_SEMANTIC_PRIMID, TGSI_SEMANTIC_TESSOUTER, TGSI_SEMANTIC_TESSINNER, + + /* Compute shaders + */ + TGSI_SEMANTIC_THREAD_ID, + TGSI_SEMANTIC_BLOCK_ID, + TGSI_SEMANTIC_GRID_SIZE, }; /** @@ -5308,7 +5605,12 @@ compile_tgsi_instruction(struct st_translate *t, for (i = num_src - 1; i >= 0; i--) src[i + 1] = src[i]; num_src++; - src[0] = t->buffers[inst->buffer.index]; + if (inst->buffer.file == PROGRAM_MEMORY) + src[0] = t->shared_memory; + else if (inst->buffer.file == PROGRAM_BUFFER) + src[0] = t->buffers[inst->buffer.index]; + else + src[0] = t->images[inst->buffer.index]; if (inst->buffer.reladdr) src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2])); assert(src[0].File != TGSI_FILE_NULL); @@ -5317,7 +5619,13 @@ compile_tgsi_instruction(struct st_translate *t, break; case TGSI_OPCODE_STORE: - dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask); + if (inst->buffer.file == PROGRAM_MEMORY) + dst[0] = ureg_dst(t->shared_memory); + else if (inst->buffer.file == PROGRAM_BUFFER) + dst[0] = ureg_dst(t->buffers[inst->buffer.index]); + else + dst[0] = ureg_dst(t->images[inst->buffer.index]); + dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); if (inst->buffer.reladdr) dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2])); assert(dst[0].File != TGSI_FILE_NULL); @@ -5643,6 +5951,12 @@ st_translate_program( TGSI_SEMANTIC_TESSCOORD); assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] == TGSI_SEMANTIC_HELPER_INVOCATION); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] == + TGSI_SEMANTIC_THREAD_ID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] == + TGSI_SEMANTIC_BLOCK_ID); + assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] == + TGSI_SEMANTIC_GRID_SIZE); t = CALLOC_STRUCT(st_translate); if (!t) { @@ -5710,6 +6024,8 @@ st_translate_program( t->inputs[i] = ureg_DECL_vs_input(ureg, i); } break; + case TGSI_PROCESSOR_COMPUTE: + break; default: assert(0); } @@ -5719,6 +6035,7 @@ st_translate_program( */ switch (procType) { case TGSI_PROCESSOR_FRAGMENT: + case TGSI_PROCESSOR_COMPUTE: break; case TGSI_PROCESSOR_GEOMETRY: case TGSI_PROCESSOR_TESS_EVAL: @@ -5969,7 +6286,17 @@ st_translate_program( } } + if (program->use_shared_memory) + t->shared_memory = ureg_DECL_shared_memory(ureg); + for (i = 0; i < program->shader->NumImages; i++) { + if (program->images_used & (1 << i)) { + t->images[i] = ureg_DECL_image(ureg, i, + program->image_targets[i], + program->image_formats[i], + true, false); + } + } /* Emit each instruction in turn: */ @@ -6188,6 +6515,7 @@ get_mesa_program(struct gl_context *ctx, struct st_geometry_program *stgp; struct st_tessctrl_program *sttcp; struct st_tesseval_program *sttep; + struct st_compute_program *stcp; switch (shader->Type) { case GL_VERTEX_SHADER: @@ -6210,6 +6538,10 @@ get_mesa_program(struct gl_context *ctx, sttep = (struct st_tesseval_program *)prog; sttep->glsl_to_tgsi = v; break; + case GL_COMPUTE_SHADER: + stcp = (struct st_compute_program *)prog; + stcp->glsl_to_tgsi = v; + break; default: assert(!"should not be reached"); return NULL; diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 84b65369d80..a983d64b5d5 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -906,7 +906,6 @@ st_manager_add_color_renderbuffer(struct st_context *st, break; default: return FALSE; - break; } if (!st_framebuffer_add_renderbuffer(stfb, idx)) diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 5bd626f8610..2e21d02b8b5 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -158,6 +158,9 @@ delete_basic_variant(struct st_context *st, struct st_basic_variant *v, case GL_GEOMETRY_PROGRAM_NV: cso_delete_geometry_shader(st->cso_context, v->driver_shader); break; + case GL_COMPUTE_PROGRAM_NV: + cso_delete_compute_shader(st->cso_context, v->driver_shader); + break; default: assert(!"this shouldn't occur"); } @@ -193,6 +196,30 @@ st_release_basic_variants(struct st_context *st, GLenum target, /** + * Free all variants of a compute program. + */ +void +st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp) +{ + struct st_basic_variant **variants = &stcp->variants; + struct st_basic_variant *v; + + for (v = *variants; v; ) { + struct st_basic_variant *next = v->next; + delete_basic_variant(st, v, stcp->Base.Base.Target); + v = next; + } + + *variants = NULL; + + if (stcp->tgsi.prog) { + ureg_free_tokens(stcp->tgsi.prog); + stcp->tgsi.prog = NULL; + } +} + + +/** * Translate a vertex program. */ bool @@ -1395,6 +1422,74 @@ st_translate_tesseval_program(struct st_context *st, /** + * Translate a compute program to create a new variant. + */ +bool +st_translate_compute_program(struct st_context *st, + struct st_compute_program *stcp) +{ + struct ureg_program *ureg; + struct pipe_shader_state prog; + + ureg = ureg_create_with_screen(TGSI_PROCESSOR_COMPUTE, st->pipe->screen); + if (ureg == NULL) + return false; + + st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg, + TGSI_PROCESSOR_COMPUTE, &prog); + + stcp->tgsi.prog = prog.tokens; + stcp->tgsi.req_local_mem = stcp->Base.SharedSize; + stcp->tgsi.req_private_mem = 0; + stcp->tgsi.req_input_mem = 0; + + free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi); + stcp->glsl_to_tgsi = NULL; + return true; +} + + +/** + * Get/create compute program variant. + */ +struct st_basic_variant * +st_get_cp_variant(struct st_context *st, + struct pipe_compute_state *tgsi, + struct st_basic_variant **variants) +{ + struct pipe_context *pipe = st->pipe; + struct st_basic_variant *v; + struct st_basic_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st->has_shareable_shaders ? NULL : st; + + /* Search for existing variant */ + for (v = *variants; v; v = v->next) { + if (memcmp(&v->key, &key, sizeof(key)) == 0) { + break; + } + } + + if (!v) { + /* create new */ + v = CALLOC_STRUCT(st_basic_variant); + if (v) { + /* fill in new variant */ + v->driver_shader = pipe->create_compute_state(pipe, tgsi); + v->key = key; + + /* insert into list */ + v->next = *variants; + *variants = v; + } + } + + return v; +} + + +/** * Vert/Geom/Frag programs have per-context variants. Free all the * variants attached to the given program which match the given context. */ @@ -1449,14 +1544,17 @@ destroy_program_variants(struct st_context *st, struct gl_program *target) case GL_GEOMETRY_PROGRAM_NV: case GL_TESS_CONTROL_PROGRAM_NV: case GL_TESS_EVALUATION_PROGRAM_NV: + case GL_COMPUTE_PROGRAM_NV: { struct st_geometry_program *gp = (struct st_geometry_program*)target; struct st_tessctrl_program *tcp = (struct st_tessctrl_program*)target; struct st_tesseval_program *tep = (struct st_tesseval_program*)target; + struct st_compute_program *cp = (struct st_compute_program*)target; struct st_basic_variant **variants = target->Target == GL_GEOMETRY_PROGRAM_NV ? &gp->variants : target->Target == GL_TESS_CONTROL_PROGRAM_NV ? &tcp->variants : target->Target == GL_TESS_EVALUATION_PROGRAM_NV ? &tep->variants : + target->Target == GL_COMPUTE_PROGRAM_NV ? &cp->variants : NULL; struct st_basic_variant *v, **prevPtr = variants; @@ -1513,6 +1611,7 @@ destroy_shader_program_variants_cb(GLuint key, void *data, void *userData) case GL_GEOMETRY_SHADER: case GL_TESS_CONTROL_SHADER: case GL_TESS_EVALUATION_SHADER: + case GL_COMPUTE_SHADER: { destroy_program_variants(st, shader->Program); } @@ -1629,6 +1728,12 @@ st_precompile_shader_variant(struct st_context *st, break; } + case GL_COMPUTE_PROGRAM_NV: { + struct st_compute_program *p = (struct st_compute_program *)prog; + st_get_cp_variant(st, &p->tgsi, &p->variants); + break; + } + default: assert(0); } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 74f3def6095..028fba99a74 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -231,6 +231,18 @@ struct st_tesseval_program }; +/** + * Derived from Mesa gl_compute_program: + */ +struct st_compute_program +{ + struct gl_compute_program Base; /**< The Mesa compute program */ + struct pipe_compute_state tgsi; + struct glsl_to_tgsi_visitor* glsl_to_tgsi; + + struct st_basic_variant *variants; +}; + static inline struct st_fragment_program * st_fragment_program( struct gl_fragment_program *fp ) @@ -263,6 +275,12 @@ st_tesseval_program( struct gl_tess_eval_program *tep ) return (struct st_tesseval_program *)tep; } +static inline struct st_compute_program * +st_compute_program( struct gl_compute_program *cp ) +{ + return (struct st_compute_program *)cp; +} + static inline void st_reference_vertprog(struct st_context *st, struct st_vertex_program **ptr, @@ -313,6 +331,16 @@ st_reference_tesseprog(struct st_context *st, (struct gl_program *) prog); } +static inline void +st_reference_compprog(struct st_context *st, + struct st_compute_program **ptr, + struct st_compute_program *prog) +{ + _mesa_reference_program(st->ctx, + (struct gl_program **) ptr, + (struct gl_program *) prog); +} + /** * This defines mapping from Mesa VARYING_SLOTs to TGSI GENERIC slots. */ @@ -351,6 +379,11 @@ st_get_fp_variant(struct st_context *st, const struct st_fp_variant_key *key); extern struct st_basic_variant * +st_get_cp_variant(struct st_context *st, + struct pipe_compute_state *tgsi, + struct st_basic_variant **variants); + +extern struct st_basic_variant * st_get_basic_variant(struct st_context *st, unsigned pipe_shader, struct pipe_shader_state *tgsi, @@ -365,6 +398,10 @@ st_release_fp_variants( struct st_context *st, struct st_fragment_program *stfp ); extern void +st_release_cp_variants(struct st_context *st, + struct st_compute_program *stcp); + +extern void st_release_basic_variants(struct st_context *st, GLenum target, struct st_basic_variant **variants, struct pipe_shader_state *tgsi); @@ -392,6 +429,10 @@ extern bool st_translate_tesseval_program(struct st_context *st, struct st_tesseval_program *sttep); +extern bool +st_translate_compute_program(struct st_context *st, + struct st_compute_program *stcp); + extern void st_print_current_vertex_program(void); |