diff options
Diffstat (limited to 'src/mesa')
42 files changed, 473 insertions, 340 deletions
diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 8dc44fda0f2..390381828e9 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -179,10 +179,8 @@ libmesagallium_la_LIBADD = \ $(ARCH_LIBS) libmesa_sse41_la_SOURCES = \ - main/streaming-load-memcpy.c \ - main/streaming-load-memcpy.h \ - main/sse_minmax.c \ - main/sse_minmax.h + $(X86_SSE41_FILES) + libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS) pkgconfigdir = $(libdir)/pkgconfig diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 7425f01273d..2ffbb152e3c 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -396,6 +396,7 @@ VBO_FILES = \ STATETRACKER_FILES = \ state_tracker/st_atifs_to_tgsi.c \ + state_tracker/st_atifs_to_tgsi.h \ state_tracker/st_atom_array.c \ state_tracker/st_atom_atomicbuf.c \ state_tracker/st_atom_blend.c \ @@ -589,7 +590,9 @@ X86_64_FILES = \ X86_SSE41_FILES = \ main/streaming-load-memcpy.c \ - main/sse_minmax.c + main/streaming-load-memcpy.h \ + main/sse_minmax.c \ + main/sse_minmax.h SPARC_FILES = \ sparc/sparc.h \ diff --git a/src/mesa/drivers/dri/i965/.gitignore b/src/mesa/drivers/dri/i965/.gitignore index 8eb9f4e1598..70aae3f4d4c 100644 --- a/src/mesa/drivers/dri/i965/.gitignore +++ b/src/mesa/drivers/dri/i965/.gitignore @@ -1,3 +1,4 @@ +brw_nir_trig_workarounds.c i965_symbols_test test_eu_compact test_vec4_copy_propagation diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 0db5a51e725..a41c8305a80 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -33,6 +33,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ -I$(top_srcdir)/src/gtest/include \ + -I$(top_srcdir)/src/compiler/nir \ -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/mesa/drivers/dri/common \ $(DEFINES) \ @@ -41,6 +42,10 @@ AM_CFLAGS = \ AM_CXXFLAGS = $(AM_CFLAGS) +brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py + $(MKDIR_GEN) + $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false) + noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la libi965_dri_la_SOURCES = $(i965_FILES) libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 2802ec9887c..c314d7470bb 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -44,6 +44,7 @@ i965_compiler_FILES = \ brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ brw_nir_attribute_workarounds.c \ + brw_nir_trig_workarounds.c \ brw_nir_opt_peephole_ffma.c \ brw_nir_uniforms.cpp \ brw_packed_float.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 2d480d02366..63ac3bc31ed 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -1151,10 +1151,9 @@ intel_gles3_srgb_workaround(struct brw_context *brw, */ fb->Visual.sRGBCapable = false; for (int i = 0; i < BUFFER_COUNT; i++) { - if (fb->Attachment[i].Renderbuffer && - fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) { - fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM; - } + struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer; + if (rb) + rb->Format = _mesa_get_srgb_format_linear(rb->Format); } } diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 09eb2392836..88bd7a499a7 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -1505,19 +1505,33 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, break; } - case BRW_SFID_URB: + case BRW_SFID_URB: { + unsigned opcode = brw_inst_urb_opcode(devinfo, inst); + format(file, " %ld", brw_inst_urb_global_offset(devinfo, inst)); space = 1; - if (devinfo->gen >= 7) { - err |= control(file, "urb opcode", gen7_urb_opcode, - brw_inst_urb_opcode(devinfo, inst), &space); - } else if (devinfo->gen >= 5) { - err |= control(file, "urb opcode", gen5_urb_opcode, - brw_inst_urb_opcode(devinfo, inst), &space); + + err |= control(file, "urb opcode", + devinfo->gen >= 7 ? gen7_urb_opcode + : gen5_urb_opcode, + opcode, &space); + + if (devinfo->gen >= 7 && + brw_inst_urb_per_slot_offset(devinfo, inst)) { + string(file, " per-slot"); + } + + if (opcode == GEN8_URB_OPCODE_SIMD8_WRITE || + opcode == GEN8_URB_OPCODE_SIMD8_READ) { + if (brw_inst_urb_channel_mask_present(devinfo, inst)) + string(file, " masked"); + } else { + err |= control(file, "urb swizzle", urb_swizzle, + brw_inst_urb_swizzle_control(devinfo, inst), + &space); } - err |= control(file, "urb swizzle", urb_swizzle, - brw_inst_urb_swizzle_control(devinfo, inst), &space); + if (devinfo->gen < 7) { err |= control(file, "urb allocate", urb_allocate, brw_inst_urb_allocate(devinfo, inst), &space); @@ -1529,6 +1543,7 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, brw_inst_urb_complete(devinfo, inst), &space); } break; + } case BRW_SFID_THREAD_SPAWNER: break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 4c2e360edf9..7ae7b2ecdf6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -289,6 +289,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_binop_gequal: case ir_binop_equal: case ir_binop_nequal: + case ir_binop_ldexp: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); ir_rvalue *op1 = get_element(op_var[1], i); @@ -404,7 +405,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: - case ir_binop_ldexp: case ir_binop_vector_extract: case ir_triop_vector_insert: case ir_quadop_vector: diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index b27b170ebc3..ab564bbcb9e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -766,24 +766,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_fsin: - if (!compiler->precise_trig) { - inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); - } else { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); - inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]); - inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); - } + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - if (!compiler->precise_trig) { - inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); - } else { - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); - inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]); - inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); - } + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); inst->saturate = instr->dest.saturate; break; @@ -876,6 +864,10 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * When we XOR the sources, the top bit is 0 if they are the same and 1 * if they are different. We can then use a conditional modifier to * turn that into a predicate. This leads us to an XOR.l instruction. + * + * Technically, according to the PRM, you're not allowed to use .l on a + * XOR instruction. However, emperical experiments and Curro's reading + * of the simulator source both indicate that it's safe. */ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D); inst = bld.XOR(tmp, op[0], op[1]); diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index c7d6fb8c79b..bb7e1eb128c 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -110,7 +110,6 @@ process_glsl_ir(gl_shader_stage stage, SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2 | - LDEXP_TO_ARITH | CARRY_TO_ARITH | BORROW_TO_ARITH); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 83921891d1c..fb7fa235861 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -437,14 +437,19 @@ nir_optimize(nir_shader *nir, bool is_scalar) * is_scalar = true to scalarize everything prior to code gen. */ nir_shader * -brw_preprocess_nir(nir_shader *nir, bool is_scalar) +brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) { bool progress; /* Written by OPT and OPT_V */ (void)progress; + const bool is_scalar = compiler->scalar_stage[nir->stage]; + if (nir->stage == MESA_SHADER_GEOMETRY) OPT(nir_lower_gs_intrinsics); + if (compiler->precise_trig) + OPT(brw_nir_apply_trig_workarounds); + static const nir_lower_tex_options tex_options = { .lower_txp = ~0, }; @@ -568,7 +573,7 @@ brw_create_nir(struct brw_context *brw, (void)progress; - nir = brw_preprocess_nir(nir, is_scalar); + nir = brw_preprocess_nir(brw->intelScreen->compiler, nir); OPT(nir_lower_system_values); OPT_V(brw_nir_lower_uniforms, is_scalar); diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 440b4ceb669..2711606511d 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -81,7 +81,8 @@ nir_shader *brw_create_nir(struct brw_context *brw, gl_shader_stage stage, bool is_scalar); -nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar); +nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler, + nir_shader *nir); void brw_nir_lower_vs_inputs(nir_shader *nir, const struct brw_device_info *devinfo, @@ -105,6 +106,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir, bool use_legacy_snorm_formula, const uint8_t *attrib_wa_flags); +bool brw_nir_apply_trig_workarounds(nir_shader *nir); + nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, const struct brw_device_info *devinfo, const struct brw_sampler_prog_key_data *key, diff --git a/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py new file mode 100755 index 00000000000..67dab9ab326 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py @@ -0,0 +1,43 @@ +#! /usr/bin/env python +# +# Copyright (C) 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import nir_algebraic + +# The SIN and COS instructions on Intel hardware can produce values +# slightly outside of the [-1.0, 1.0] range for a small set of values. +# Obviously, this can break everyone's expectations about trig functions. +# +# According to an internal presentation, the COS instruction can produce +# a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One +# suggested workaround is to multiply by 0.99997, scaling down the +# amplitude slightly. Apparently this also minimizes the error function, +# reducing the maximum error from 0.00006 to about 0.00003. + +trig_workarounds = [ + (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)), + (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)), +] + +print '#include "brw_nir.h"' +print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds", + trig_workarounds).render() diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index f3361d69786..636340add35 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -75,7 +75,7 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, int namelen = strlen(var->name); /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that + * gl_uniform_storage structs for each subcomponent that * glGetUniformLocation() could name. We know it's been set up in the same * order we'd walk the type, so walk the list of storage and find anything * with our name, or the prefix of a component that starts with our name. diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index b63e44a3bfb..032fdaa4d23 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1094,24 +1094,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fsin: - if (!compiler->precise_trig) { - inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); - } else { - src_reg tmp = src_reg(this, glsl_type::vec4_type); - inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]); - inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); - } + inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - if (!compiler->precise_trig) { - inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); - } else { - src_reg tmp = src_reg(this, glsl_type::vec4_type); - inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]); - inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); - } + inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); inst->saturate = instr->dest.saturate; break; @@ -1141,6 +1129,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) * When we XOR the sources, the top bit is 0 if they are the same and 1 * if they are different. We can then use a conditional modifier to * turn that into a predicate. This leads us to an XOR.l instruction. + * + * Technically, according to the PRM, you're not allowed to use .l on a + * XOR instruction. However, emperical experiments and Curro's reading + * of the simulator source both indicate that it's safe. */ src_reg tmp = src_reg(this, glsl_type::ivec4_type); inst = emit(XOR(dst_reg(tmp), op[0], op[1])); diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 60ac124ecd0..6a20bd6d925 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -340,6 +340,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_view = true; ctx->Extensions.ARB_shader_storage_buffer_object = true; ctx->Extensions.EXT_shader_samples_identical = true; + ctx->Extensions.OES_texture_buffer = true; if (brw->can_do_pipelined_register_writes) { ctx->Extensions.ARB_draw_indirect = true; diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 31030b1b4ea..a486d6e1ab9 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -141,8 +141,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, if (rb->_BaseFormat == GL_RGB) return false; - if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp, - INTEL_DOWNLOAD)) + if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp)) return false; if (!irb->mt || diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index c6eb50aaba8..dbec82fbd44 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1000,14 +1000,18 @@ intelCreateBuffer(__DRIscreen * driScrnPriv, fb->Visual.samples = num_samples; } - if (mesaVis->redBits == 5) - rgbFormat = MESA_FORMAT_B5G6R5_UNORM; - else if (mesaVis->sRGBCapable) - rgbFormat = MESA_FORMAT_B8G8R8A8_SRGB; - else if (mesaVis->alphaBits == 0) - rgbFormat = MESA_FORMAT_B8G8R8X8_UNORM; - else { - rgbFormat = MESA_FORMAT_B8G8R8A8_SRGB; + if (mesaVis->redBits == 5) { + rgbFormat = mesaVis->redMask == 0x1f ? MESA_FORMAT_R5G6B5_UNORM + : MESA_FORMAT_B5G6R5_UNORM; + } else if (mesaVis->sRGBCapable) { + rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB + : MESA_FORMAT_B8G8R8A8_SRGB; + } else if (mesaVis->alphaBits == 0) { + rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8X8_UNORM + : MESA_FORMAT_B8G8R8X8_UNORM; + } else { + rgbFormat = mesaVis->redMask == 0xff ? MESA_FORMAT_R8G8B8A8_SRGB + : MESA_FORMAT_B8G8R8A8_SRGB; fb->Visual.sRGBCapable = true; } @@ -1078,11 +1082,45 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv) _mesa_reference_framebuffer(&fb, NULL); } +static void +intel_detect_sseu(struct intel_screen *intelScreen) +{ + assert(intelScreen->devinfo->gen >= 8); + int ret; + + intelScreen->subslice_total = -1; + intelScreen->eu_total = -1; + + ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL, + &intelScreen->subslice_total); + if (ret != -EINVAL) + goto err_out; + + ret = intel_get_param(intelScreen->driScrnPriv, + I915_PARAM_EU_TOTAL, &intelScreen->eu_total); + if (ret != -EINVAL) + goto err_out; + + /* Without this information, we cannot get the right Braswell brandstrings, + * and we have to use conservative numbers for GPGPU on many platforms, but + * otherwise, things will just work. + */ + if (intelScreen->subslice_total < 1 || intelScreen->eu_total < 1) + _mesa_warning(NULL, + "Kernel 4.1 required to properly query GPU properties.\n"); + + return; + +err_out: + intelScreen->subslice_total = -1; + intelScreen->eu_total = -1; + _mesa_warning(NULL, "Failed to query GPU properties.\n"); +} + static bool intel_init_bufmgr(struct intel_screen *intelScreen) { __DRIscreen *spriv = intelScreen->driScrnPriv; - bool devid_override = getenv("INTEL_DEVID_OVERRIDE") != NULL; intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; @@ -1100,25 +1138,6 @@ intel_init_bufmgr(struct intel_screen *intelScreen) return false; } - intelScreen->subslice_total = -1; - intelScreen->eu_total = -1; - - /* Everything below this is for real hardware only */ - if (intelScreen->no_hw || devid_override) - return true; - - intel_get_param(spriv, I915_PARAM_SUBSLICE_TOTAL, - &intelScreen->subslice_total); - intel_get_param(spriv, I915_PARAM_EU_TOTAL, &intelScreen->eu_total); - - /* Without this information, we cannot get the right Braswell brandstrings, - * and we have to use conservative numbers for GPGPU on many platforms, but - * otherwise, things will just work. - */ - if (intelScreen->subslice_total == -1 || intelScreen->eu_total == -1) - _mesa_warning(NULL, - "Kernel 4.1 required to properly query GPU properties.\n"); - return true; } @@ -1473,6 +1492,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen); + /* GENs prior to 8 do not support EU/Subslice info */ + if (intelScreen->devinfo->gen >= 8) + intel_detect_sseu(intelScreen); + const char *force_msaa = getenv("INTEL_FORCE_MSAA"); if (force_msaa) { intelScreen->winsys_msaa_samples_override = diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 1601edddef6..bee8be1fd27 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -404,8 +404,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, if (texImage->_BaseFormat == GL_RGB) return false; - if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp, - INTEL_DOWNLOAD)) + if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp)) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 4849a4151e2..9561968d2d6 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -119,8 +119,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, if (ctx->_ImageTransferState) return false; - if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp, - INTEL_UPLOAD)) + if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp)) return false; /* If this is a nontrivial texture view, let another path handle it instead. */ diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 31354582964..a549854dce6 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -36,8 +36,10 @@ #include "brw_context.h" #include "intel_tiled_memcpy.h" -#ifdef __SSSE3__ +#if defined(__SSSE3__) #include <tmmintrin.h> +#elif defined(__SSE2__) +#include <emmintrin.h> #endif #define FILE_DEBUG_FLAG DEBUG_TEXTURE @@ -56,21 +58,86 @@ static const uint32_t ytile_width = 128; static const uint32_t ytile_height = 32; static const uint32_t ytile_span = 16; +static inline uint32_t +ror(uint32_t n, uint32_t d) +{ + return (n >> d) | (n << (32 - d)); +} + +/** + * Copy RGBA to BGRA - swap R and B. + */ +static inline void * +rgba8_copy(void *dst, const void *src, size_t bytes) +{ + uint32_t *d = dst; + uint32_t const *s = src; + + assert(bytes % 4 == 0); + + while (bytes >= 4) { + *d = ror(__builtin_bswap32(*s), 8); + d += 1; + s += 1; + bytes -= 4; + } + return dst; +} + #ifdef __SSSE3__ static const uint8_t rgba8_permutation[16] = { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 }; -/* NOTE: dst must be 16-byte aligned. src may be unaligned. */ -#define rgba8_copy_16_aligned_dst(dst, src) \ - _mm_store_si128((__m128i *)(dst), \ - _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(src)), \ - *(__m128i *) rgba8_permutation)) - -/* NOTE: src must be 16-byte aligned. dst may be unaligned. */ -#define rgba8_copy_16_aligned_src(dst, src) \ - _mm_storeu_si128((__m128i *)(dst), \ - _mm_shuffle_epi8(_mm_load_si128((__m128i *)(src)), \ - *(__m128i *) rgba8_permutation)) +static inline void +rgba8_copy_16_aligned_dst(void *dst, const void *src) +{ + _mm_store_si128(dst, + _mm_shuffle_epi8(_mm_loadu_si128(src), + *(__m128i *)rgba8_permutation)); +} + +static inline void +rgba8_copy_16_aligned_src(void *dst, const void *src) +{ + _mm_storeu_si128(dst, + _mm_shuffle_epi8(_mm_load_si128(src), + *(__m128i *)rgba8_permutation)); +} + +#elif defined(__SSE2__) +static inline void +rgba8_copy_16_aligned_dst(void *dst, const void *src) +{ + __m128i srcreg, dstreg, agmask, ag, rb, br; + + agmask = _mm_set1_epi32(0xFF00FF00); + srcreg = _mm_loadu_si128((__m128i *)src); + + rb = _mm_andnot_si128(agmask, srcreg); + ag = _mm_and_si128(agmask, srcreg); + br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), + _MM_SHUFFLE(2, 3, 0, 1)); + dstreg = _mm_or_si128(ag, br); + + _mm_store_si128((__m128i *)dst, dstreg); +} + +static inline void +rgba8_copy_16_aligned_src(void *dst, const void *src) +{ + __m128i srcreg, dstreg, agmask, ag, rb, br; + + agmask = _mm_set1_epi32(0xFF00FF00); + srcreg = _mm_load_si128((__m128i *)src); + + rb = _mm_andnot_si128(agmask, srcreg); + ag = _mm_and_si128(agmask, srcreg); + br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, _MM_SHUFFLE(2, 3, 0, 1)), + _MM_SHUFFLE(2, 3, 0, 1)); + dstreg = _mm_or_si128(ag, br); + + _mm_storeu_si128((__m128i *)dst, dstreg); +} #endif /** @@ -79,35 +146,27 @@ static const uint8_t rgba8_permutation[16] = static inline void * rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) { - uint8_t *d = dst; - uint8_t const *s = src; + assert(bytes == 0 || !(((uintptr_t)dst) & 0xf)); -#ifdef __SSSE3__ - if (bytes == 16) { - assert(!(((uintptr_t)dst) & 0xf)); - rgba8_copy_16_aligned_dst(d+ 0, s+ 0); +#if defined(__SSSE3__) || defined(__SSE2__) + if (bytes == 64) { + rgba8_copy_16_aligned_dst(dst + 0, src + 0); + rgba8_copy_16_aligned_dst(dst + 16, src + 16); + rgba8_copy_16_aligned_dst(dst + 32, src + 32); + rgba8_copy_16_aligned_dst(dst + 48, src + 48); return dst; } - if (bytes == 64) { - assert(!(((uintptr_t)dst) & 0xf)); - rgba8_copy_16_aligned_dst(d+ 0, s+ 0); - rgba8_copy_16_aligned_dst(d+16, s+16); - rgba8_copy_16_aligned_dst(d+32, s+32); - rgba8_copy_16_aligned_dst(d+48, s+48); - return dst; + while (bytes >= 16) { + rgba8_copy_16_aligned_dst(dst, src); + src += 16; + dst += 16; + bytes -= 16; } #endif - while (bytes >= 4) { - d[0] = s[2]; - d[1] = s[1]; - d[2] = s[0]; - d[3] = s[3]; - d += 4; - s += 4; - bytes -= 4; - } + rgba8_copy(dst, src, bytes); + return dst; } @@ -117,35 +176,27 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) static inline void * rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) { - uint8_t *d = dst; - uint8_t const *s = src; + assert(bytes == 0 || !(((uintptr_t)src) & 0xf)); -#ifdef __SSSE3__ - if (bytes == 16) { - assert(!(((uintptr_t)src) & 0xf)); - rgba8_copy_16_aligned_src(d+ 0, s+ 0); +#if defined(__SSSE3__) || defined(__SSE2__) + if (bytes == 64) { + rgba8_copy_16_aligned_src(dst + 0, src + 0); + rgba8_copy_16_aligned_src(dst + 16, src + 16); + rgba8_copy_16_aligned_src(dst + 32, src + 32); + rgba8_copy_16_aligned_src(dst + 48, src + 48); return dst; } - if (bytes == 64) { - assert(!(((uintptr_t)src) & 0xf)); - rgba8_copy_16_aligned_src(d+ 0, s+ 0); - rgba8_copy_16_aligned_src(d+16, s+16); - rgba8_copy_16_aligned_src(d+32, s+32); - rgba8_copy_16_aligned_src(d+48, s+48); - return dst; + while (bytes >= 16) { + rgba8_copy_16_aligned_src(dst, src); + src += 16; + dst += 16; + bytes -= 16; } #endif - while (bytes >= 4) { - d[0] = s[2]; - d[1] = s[1]; - d[2] = s[0]; - d[3] = s[3]; - d += 4; - s += 4; - bytes -= 4; - } + rgba8_copy(dst, src, bytes); + return dst; } @@ -172,6 +223,12 @@ typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * Copy texture data from linear to X tile layout. * * \copydoc tile_copy_fn + * + * The mem_copy parameters allow the user to specify an alternative mem_copy + * function that, for instance, may do RGBA -> BGRA swizzling. The first + * function must handle any memory alignment while the second function must + * only handle 16-byte alignment in whichever side (source or destination) is + * tiled. */ static inline void linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, @@ -179,7 +236,8 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t src_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* The copy destination offset for each range copied is the sum of * an X offset 'x0' or 'xo' and a Y offset 'yo.' @@ -200,10 +258,10 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0); for (xo = x1; xo < x2; xo += xtile_span) { - mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span); } - mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); src += src_pitch; } @@ -220,7 +278,8 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t src_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* Y tiles consist of columns that are 'ytile_span' wide (and the same height * as the tile). Thus the destination offset for (x,y) is the sum of: @@ -259,12 +318,12 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * at each step so we don't need to calculate it explicitly. */ for (x = x1; x < x2; x += ytile_span) { - mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); xo += bytes_per_column; swizzle ^= swizzle_bit; } - mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); src += src_pitch; } @@ -281,7 +340,8 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t dst_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* The copy destination offset for each range copied is the sum of * an X offset 'x0' or 'xo' and a Y offset 'yo.' @@ -302,10 +362,10 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0); for (xo = x1; xo < x2; xo += xtile_span) { - mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span); + mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span); } - mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); dst += dst_pitch; } @@ -322,7 +382,8 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t dst_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* Y tiles consist of columns that are 'ytile_span' wide (and the same height * as the tile). Thus the destination offset for (x,y) is the sum of: @@ -361,12 +422,12 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * at each step so we don't need to calculate it explicitly. */ for (x = x1; x < x2; x += ytile_span) { - mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); + mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); xo += bytes_per_column; swizzle ^= swizzle_bit; } - mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); dst += dst_pitch; } @@ -393,26 +454,27 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { if (mem_copy == memcpy) return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, src_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + dst, src, src_pitch, swizzle_bit, + memcpy, memcpy); + else if (mem_copy == rgba8_copy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy); + dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -435,26 +497,26 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { if (mem_copy == memcpy) return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, src_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy); + dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -477,26 +539,26 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { if (mem_copy == memcpy) return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } else { if (mem_copy == memcpy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy); + dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -519,26 +581,26 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { if (mem_copy == memcpy) return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } else { if (mem_copy == memcpy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); + else if (mem_copy == rgba8_copy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy); + dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -745,8 +807,7 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, * \return true if the format and type combination are valid */ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp, - enum intel_memcpy_direction direction) + GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp) { if (type == GL_UNSIGNED_INT_8_8_8_8_REV && !(format == GL_RGBA || format == GL_BGRA)) @@ -764,8 +825,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, if (format == GL_BGRA) { *mem_copy = memcpy; } else if (format == GL_RGBA) { - *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst - : rgba8_copy_aligned_src; + *mem_copy = rgba8_copy; } } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || @@ -776,8 +836,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can * use the same function. */ - *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst - : rgba8_copy_aligned_src; + *mem_copy = rgba8_copy; } else if (format == GL_RGBA) { *mem_copy = memcpy; } diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h index 01543bf298d..d9148bb6239 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h @@ -55,20 +55,7 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, uint32_t tiling, mem_copy_fn mem_copy); -/* Tells intel_get_memcpy() whether the memcpy() is - * - * - an upload to the GPU with an aligned destination and a potentially - * unaligned source; or - * - a download from the GPU with an aligned source and a potentially - * unaligned destination. - */ -enum intel_memcpy_direction { - INTEL_UPLOAD, - INTEL_DOWNLOAD -}; - bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, - GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp, - enum intel_memcpy_direction direction); + GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp); #endif /* INTEL_TILED_MEMCPY */ diff --git a/src/mesa/drivers/x11/SConscript b/src/mesa/drivers/x11/SConscript index 45419973d39..59c8df4b3c2 100644 --- a/src/mesa/drivers/x11/SConscript +++ b/src/mesa/drivers/x11/SConscript @@ -34,9 +34,13 @@ sources = [ 'xm_tri.c', ] -# Disallow undefined symbols if env['platform'] != 'darwin': - env.Append(SHLINKFLAGS = ['-Wl,-z,defs']) + # Disallow undefined symbols, except with Address Sanitizer, since libasan + # is not linked on shared libs, as it should be LD_PRELOAD'ed instead + if not env['asan']: + env.Append(SHLINKFLAGS = [ + '-Wl,-z,defs', + ]) # libGL.so.1.6 libgl_1_6 = env.SharedLibrary( diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 80b71765e6c..2f4d966973e 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -794,7 +794,7 @@ destroy_visuals_on_display(Display *dpy) if (VisualTable[i]->display == dpy) { /* remove this visual */ int j; - free(VisualTable[i]); + XMesaDestroyVisual(VisualTable[i]); for (j = i; j < NumVisuals - 1; j++) VisualTable[j] = VisualTable[j + 1]; NumVisuals--; diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 65e7ca89d32..82c4d188d5a 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -856,6 +856,7 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, accum_red_size, accum_green_size, accum_blue_size, accum_alpha_size, 0)) { + free(v->visinfo); free(v); return NULL; } diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index dbba136f526..6af02d1c3dc 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1525,10 +1525,6 @@ _mesa_copy_context( const struct gl_context *src, struct gl_context *dst, * Check if the given context can render into the given framebuffer * by checking visual attributes. * - * Most of these tests could go away because Mesa is now pretty flexible - * in terms of mixing rendering contexts with framebuffers. As long - * as RGB vs. CI mode agree, we're probably good. - * * \return GL_TRUE if compatible, GL_FALSE otherwise. */ static GLboolean @@ -1541,32 +1537,18 @@ check_compatible(const struct gl_context *ctx, if (buffer == _mesa_get_incomplete_framebuffer()) return GL_TRUE; -#if 0 - /* disabling this fixes the fgl_glxgears pbuffer demo */ - if (ctxvis->doubleBufferMode && !bufvis->doubleBufferMode) - return GL_FALSE; -#endif - if (ctxvis->stereoMode && !bufvis->stereoMode) - return GL_FALSE; - if (ctxvis->haveAccumBuffer && !bufvis->haveAccumBuffer) - return GL_FALSE; - if (ctxvis->haveDepthBuffer && !bufvis->haveDepthBuffer) - return GL_FALSE; - if (ctxvis->haveStencilBuffer && !bufvis->haveStencilBuffer) - return GL_FALSE; - if (ctxvis->redMask && ctxvis->redMask != bufvis->redMask) - return GL_FALSE; - if (ctxvis->greenMask && ctxvis->greenMask != bufvis->greenMask) - return GL_FALSE; - if (ctxvis->blueMask && ctxvis->blueMask != bufvis->blueMask) - return GL_FALSE; -#if 0 - /* disabled (see bug 11161) */ - if (ctxvis->depthBits && ctxvis->depthBits != bufvis->depthBits) - return GL_FALSE; -#endif - if (ctxvis->stencilBits && ctxvis->stencilBits != bufvis->stencilBits) - return GL_FALSE; +#define check_component(foo) \ + if (ctxvis->foo && bufvis->foo && \ + ctxvis->foo != bufvis->foo) \ + return GL_FALSE + + check_component(redMask); + check_component(greenMask); + check_component(blueMask); + check_component(depthBits); + check_component(stencilBits); + +#undef check_component return GL_TRUE; } diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index ddc25d812c7..78899ecccad 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -91,6 +91,7 @@ EXT(ARB_point_sprite , ARB_point_sprite EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012) EXT(ARB_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009) EXT(ARB_query_buffer_object , ARB_query_buffer_object , GLL, GLC, x , x , 2013) +EXT(ARB_robust_buffer_access_behavior , ARB_robust_buffer_access_behavior , GLL, GLC, x , x , 2012) EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010) EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009) EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009) diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 14cd58870f7..fe54109322d 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -262,7 +262,7 @@ ffsll(long long int val) { int bit; - assert(sizeof(val) == 8); + STATIC_ASSERT(sizeof(val) == 8); bit = ffs((int) val); if (bit != 0) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 6c09948af04..eec057e0137 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3771,6 +3771,7 @@ struct gl_extensions GLboolean ARB_pipeline_statistics_query; GLboolean ARB_point_sprite; GLboolean ARB_query_buffer_object; + GLboolean ARB_robust_buffer_access_behavior; GLboolean ARB_sample_shading; GLboolean ARB_seamless_cube_map; GLboolean ARB_shader_atomic_counter_ops; diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c index ca366d967ab..74761953044 100644 --- a/src/mesa/main/samplerobj.c +++ b/src/mesa/main/samplerobj.c @@ -1171,8 +1171,9 @@ _mesa_SamplerParameterIiv(GLuint sampler, GLenum pname, const GLint *params) sampObj = _mesa_lookup_samplerobj(ctx, sampler); if (!sampObj) { - _mesa_error(ctx, GL_INVALID_VALUE, "glSamplerParameterIiv(sampler %u)", - sampler); + _mesa_error(ctx, (_mesa_is_gles(ctx) ? + GL_INVALID_OPERATION : GL_INVALID_VALUE), + "glSamplerParameterIiv(sampler %u)", sampler); return; } @@ -1257,8 +1258,9 @@ _mesa_SamplerParameterIuiv(GLuint sampler, GLenum pname, const GLuint *params) sampObj = _mesa_lookup_samplerobj(ctx, sampler); if (!sampObj) { - _mesa_error(ctx, GL_INVALID_VALUE, "glSamplerParameterIuiv(sampler %u)", - sampler); + _mesa_error(ctx, (_mesa_is_gles(ctx) ? + GL_INVALID_OPERATION : GL_INVALID_VALUE), + "glSamplerParameterIuiv(sampler %u)", sampler); return; } diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 2af3653f7bb..b9c1bcbbc6e 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -361,7 +361,7 @@ compute_version(const struct gl_extensions *extensions, extensions->ARB_fragment_layer_viewport && extensions->ARB_framebuffer_no_attachments && extensions->ARB_internalformat_query2 && - /* extensions->ARB_robust_buffer_access_behavior */ 0 && + extensions->ARB_robust_buffer_access_behavior && extensions->ARB_shader_image_size && extensions->ARB_shader_storage_buffer_object && extensions->ARB_stencil_texturing && diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 202b4eeeefa..8bbc2f0af4b 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -98,7 +98,7 @@ static void st_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, - const GLvoid * data, struct gl_buffer_object *obj) + const void * data, struct gl_buffer_object *obj) { struct st_buffer_object *st_obj = st_buffer_object(obj); @@ -142,7 +142,7 @@ static void st_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, - GLvoid * data, struct gl_buffer_object *obj) + void * data, struct gl_buffer_object *obj) { struct st_buffer_object *st_obj = st_buffer_object(obj); @@ -175,7 +175,7 @@ static GLboolean st_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size, - const GLvoid * data, + const void * data, GLenum usage, GLbitfield storageFlags, struct gl_buffer_object *obj) @@ -513,7 +513,7 @@ st_copy_buffer_subdata(struct gl_context *ctx, static void st_clear_buffer_subdata(struct gl_context *ctx, GLintptr offset, GLsizeiptr size, - const GLvoid *clearValue, + const void *clearValue, GLsizeiptr clearValueSize, struct gl_buffer_object *bufObj) { diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 55801469f23..362cef46286 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -206,6 +206,7 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) CSO_BIT_STREAM_OUTPUTS | CSO_BIT_VERTEX_ELEMENTS | CSO_BIT_AUX_VERTEX_BUFFER_SLOT | + CSO_BIT_PAUSE_QUERIES | CSO_BITS_ALL_SHADERS)); /* blend state: RGBA masking */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 01ed5441d11..c3e05bbb7ce 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -379,12 +379,12 @@ static struct pipe_resource * make_texture(struct st_context *st, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) + const void *pixels) { struct gl_context *ctx = st->ctx; struct pipe_context *pipe = st->pipe; mesa_format mformat; - struct pipe_resource *pt; + struct pipe_resource *pt = NULL; enum pipe_format pipeFormat; GLenum baseInternalFormat; @@ -403,10 +403,18 @@ make_texture(struct st_context *st, unpack->SkipRows == 0 && unpack->SwapBytes == GL_FALSE && st->drawpix_cache.image) { + assert(st->drawpix_cache.texture); + /* check if the pixel data is the same */ if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) { /* OK, re-use the cached texture */ - return st->drawpix_cache.texture; + pipe_resource_reference(&pt, st->drawpix_cache.texture); + /* refcount of returned texture should be at least two here. One + * reference for the cache to hold on to, one for the caller (which + * it will release), and possibly more held by the driver. + */ + assert(pt->reference.count >= 2); + return pt; } } @@ -525,8 +533,14 @@ make_texture(struct st_context *st, st->drawpix_cache.image = malloc(width * height * bpp); if (st->drawpix_cache.image) { memcpy(st->drawpix_cache.image, pixels, width * height * bpp); + pipe_resource_reference(&st->drawpix_cache.texture, pt); + } + else { + /* out of memory, free/disable cached texture */ + st->drawpix_cache.width = 0; + st->drawpix_cache.height = 0; + pipe_resource_reference(&st->drawpix_cache.texture, NULL); } - st->drawpix_cache.texture = pt; } #endif @@ -744,7 +758,7 @@ static void draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) + const void *pixels) { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; @@ -798,7 +812,7 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, for (row = 0; row < height; row++) { GLfloat *zValuesFloat = (GLfloat*)zValues; GLenum destType = GL_UNSIGNED_BYTE; - const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, + const void *source = _mesa_image_address2d(&clippedUnpack, pixels, width, height, format, type, row, 0); @@ -1041,7 +1055,7 @@ static void st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) + const struct gl_pixelstore_attrib *unpack, const void *pixels) { void *driver_vp, *driver_fp; struct st_context *st = st_context(ctx); @@ -1160,9 +1174,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, if (num_sampler_view > 1) pipe_sampler_view_reference(&sv[1], NULL); -#if !USE_DRAWPIXELS_CACHE + /* free the texture (but may persist in the cache) */ pipe_resource_reference(&pt, NULL); -#endif } diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index ff570e0e444..456ad83818b 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -40,6 +40,7 @@ #include "main/glformats.h" #include "main/macros.h" #include "main/renderbuffer.h" +#include "main/state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -729,6 +730,7 @@ st_ReadBuffer(struct gl_context *ctx, GLenum buffer) fb->Attachment[fb->_ColorReadBufferIndex].Type == GL_NONE) { /* add the buffer */ st_manager_add_color_renderbuffer(st, fb, fb->_ColorReadBufferIndex); + _mesa_update_state(ctx); st_validate_state(st, ST_PIPELINE_RENDER); } } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 5153c4bbba1..393b881ea4c 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -85,7 +85,7 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, - GLvoid *pixels) + void *pixels) { struct st_context *st = st_context(ctx); struct gl_renderbuffer *rb = @@ -238,7 +238,7 @@ st_ReadPixels(struct gl_context *ctx, GLint x, GLint y, GLuint row; for (row = 0; row < (unsigned) height; row++) { - GLvoid *dest = _mesa_image_address2d(pack, pixels, + void *dest = _mesa_image_address2d(pack, pixels, width, height, format, type, row, 0); memcpy(dest, map, bytesPerRow); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 3980f5d2f51..a18b08b3226 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1345,6 +1345,7 @@ try_pbo_upload_common(struct gl_context *ctx, CSO_BIT_DEPTH_STENCIL_ALPHA | CSO_BIT_RASTERIZER | CSO_BIT_STREAM_OUTPUTS | + CSO_BIT_PAUSE_QUERIES | CSO_BITS_ALL_SHADERS)); cso_save_constant_buffer_slot0(cso, PIPE_SHADER_FRAGMENT); @@ -1845,7 +1846,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, /* 1D array textures. * We need to convert gallium coords to GL coords. */ - GLvoid *src = _mesa_image_address2d(unpack, pixels, + void *src = _mesa_image_address2d(unpack, pixels, width, depth, format, type, slice, 0); memcpy(map, src, bytesPerRow); @@ -1854,7 +1855,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, ubyte *slice_map = map; for (row = 0; row < (unsigned) height; row++) { - GLvoid *src = _mesa_image_address(dims, unpack, pixels, + void *src = _mesa_image_address(dims, unpack, pixels, width, height, format, type, slice, row, 0); memcpy(slice_map, src, bytesPerRow); @@ -1928,7 +1929,7 @@ st_CompressedTexSubImage(struct gl_context *ctx, GLuint dims, struct gl_texture_image *texImage, GLint x, GLint y, GLint z, GLsizei w, GLsizei h, GLsizei d, - GLenum format, GLsizei imageSize, const GLvoid *data) + GLenum format, GLsizei imageSize, const void *data) { struct st_context *st = st_context(ctx); struct st_texture_image *stImage = st_texture_image(texImage); @@ -2053,7 +2054,7 @@ fallback: static void st_CompressedTexImage(struct gl_context *ctx, GLuint dims, struct gl_texture_image *texImage, - GLsizei imageSize, const GLvoid *data) + GLsizei imageSize, const void *data) { prep_teximage(ctx, texImage, GL_NONE, GL_NONE); @@ -2106,7 +2107,7 @@ static void st_GetTexSubImage(struct gl_context * ctx, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLint depth, - GLenum format, GLenum type, GLvoid * pixels, + GLenum format, GLenum type, void * pixels, struct gl_texture_image *texImage) { struct st_context *st = st_context(ctx); @@ -2319,7 +2320,7 @@ st_GetTexSubImage(struct gl_context * ctx, /* 1D array textures. * We need to convert gallium coords to GL coords. */ - GLvoid *dest = _mesa_image_address3d(&ctx->Pack, pixels, + void *dest = _mesa_image_address3d(&ctx->Pack, pixels, width, depth, format, type, 0, slice, 0); memcpy(dest, map, bytesPerRow); @@ -2328,7 +2329,7 @@ st_GetTexSubImage(struct gl_context * ctx, ubyte *slice_map = map; for (row = 0; row < height; row++) { - GLvoid *dest = _mesa_image_address3d(&ctx->Pack, pixels, + void *dest = _mesa_image_address3d(&ctx->Pack, pixels, width, height, format, type, slice, row, 0); memcpy(dest, slice_map, bytesPerRow); @@ -2363,7 +2364,7 @@ st_GetTexSubImage(struct gl_context * ctx, /* 1D array textures. * We need to convert gallium coords to GL coords. */ - GLvoid *dest = _mesa_image_address3d(&ctx->Pack, pixels, + void *dest = _mesa_image_address3d(&ctx->Pack, pixels, width, depth, format, type, 0, slice, 0); @@ -2377,7 +2378,7 @@ st_GetTexSubImage(struct gl_context * ctx, } else { for (row = 0; row < height; row++) { - GLvoid *dest = _mesa_image_address3d(&ctx->Pack, pixels, + void *dest = _mesa_image_address3d(&ctx->Pack, pixels, width, height, format, type, slice, row, 0); @@ -3085,7 +3086,7 @@ st_ClearTexSubImage(struct gl_context *ctx, struct gl_texture_image *texImage, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, - const GLvoid *clearValue) + const void *clearValue) { static const char zeros[16] = {0}; struct st_texture_image *stImage = st_texture_image(texImage); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 287894317df..6d407d33eff 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -589,6 +589,7 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS }, { o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE }, { o(ARB_query_buffer_object), PIPE_CAP_QUERY_BUFFER_OBJECT }, + { o(ARB_robust_buffer_access_behavior), PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR }, { o(ARB_sample_shading), PIPE_CAP_SAMPLE_SHADING }, { o(ARB_seamless_cube_map), PIPE_CAP_SEAMLESS_CUBE_MAP }, { o(ARB_shader_draw_parameters), PIPE_CAP_DRAW_PARAMETERS }, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index b9ab7ae9919..5f037daea76 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5192,43 +5192,72 @@ struct st_translate { }; /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ -const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { - /* Vertex shader - */ - TGSI_SEMANTIC_VERTEXID, - TGSI_SEMANTIC_INSTANCEID, - TGSI_SEMANTIC_VERTEXID_NOBASE, - TGSI_SEMANTIC_BASEVERTEX, - TGSI_SEMANTIC_BASEINSTANCE, - TGSI_SEMANTIC_DRAWID, - - /* Geometry shader - */ - TGSI_SEMANTIC_INVOCATIONID, - - /* Fragment shader - */ - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_FACE, - TGSI_SEMANTIC_SAMPLEID, - TGSI_SEMANTIC_SAMPLEPOS, - TGSI_SEMANTIC_SAMPLEMASK, - TGSI_SEMANTIC_HELPER_INVOCATION, - - /* Tessellation shaders - */ - TGSI_SEMANTIC_TESSCOORD, - TGSI_SEMANTIC_VERTICESIN, - TGSI_SEMANTIC_PRIMID, - TGSI_SEMANTIC_TESSOUTER, - TGSI_SEMANTIC_TESSINNER, +unsigned +_mesa_sysval_to_semantic(unsigned sysval) +{ + switch (sysval) { + /* Vertex shader */ + case SYSTEM_VALUE_VERTEX_ID: + return TGSI_SEMANTIC_VERTEXID; + case SYSTEM_VALUE_INSTANCE_ID: + return TGSI_SEMANTIC_INSTANCEID; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + return TGSI_SEMANTIC_VERTEXID_NOBASE; + case SYSTEM_VALUE_BASE_VERTEX: + return TGSI_SEMANTIC_BASEVERTEX; + case SYSTEM_VALUE_BASE_INSTANCE: + return TGSI_SEMANTIC_BASEINSTANCE; + case SYSTEM_VALUE_DRAW_ID: + return TGSI_SEMANTIC_DRAWID; + + /* Geometry shader */ + case SYSTEM_VALUE_INVOCATION_ID: + return TGSI_SEMANTIC_INVOCATIONID; + + /* Fragment shader */ + case SYSTEM_VALUE_FRAG_COORD: + return TGSI_SEMANTIC_POSITION; + case SYSTEM_VALUE_FRONT_FACE: + return TGSI_SEMANTIC_FACE; + case SYSTEM_VALUE_SAMPLE_ID: + return TGSI_SEMANTIC_SAMPLEID; + case SYSTEM_VALUE_SAMPLE_POS: + return TGSI_SEMANTIC_SAMPLEPOS; + case SYSTEM_VALUE_SAMPLE_MASK_IN: + return TGSI_SEMANTIC_SAMPLEMASK; + case SYSTEM_VALUE_HELPER_INVOCATION: + return TGSI_SEMANTIC_HELPER_INVOCATION; + + /* Tessellation shader */ + case SYSTEM_VALUE_TESS_COORD: + return TGSI_SEMANTIC_TESSCOORD; + case SYSTEM_VALUE_VERTICES_IN: + return TGSI_SEMANTIC_VERTICESIN; + case SYSTEM_VALUE_PRIMITIVE_ID: + return TGSI_SEMANTIC_PRIMID; + case SYSTEM_VALUE_TESS_LEVEL_OUTER: + return TGSI_SEMANTIC_TESSOUTER; + case SYSTEM_VALUE_TESS_LEVEL_INNER: + return TGSI_SEMANTIC_TESSINNER; + + /* Compute shader */ + case SYSTEM_VALUE_LOCAL_INVOCATION_ID: + return TGSI_SEMANTIC_THREAD_ID; + case SYSTEM_VALUE_WORK_GROUP_ID: + return TGSI_SEMANTIC_BLOCK_ID; + case SYSTEM_VALUE_NUM_WORK_GROUPS: + return TGSI_SEMANTIC_GRID_SIZE; + + /* Unhandled */ + case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: + case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: + case SYSTEM_VALUE_VERTEX_CNT: + default: + assert(!"Unexpected SYSTEM_VALUE_ enum"); + return TGSI_SEMANTIC_COUNT; + } +} - /* Compute shaders - */ - TGSI_SEMANTIC_THREAD_ID, - TGSI_SEMANTIC_BLOCK_ID, - TGSI_SEMANTIC_GRID_SIZE, -}; /** * Make note of a branch to a label in the TGSI code. @@ -6000,35 +6029,6 @@ st_translate_program( assert(numInputs <= ARRAY_SIZE(t->inputs)); assert(numOutputs <= ARRAY_SIZE(t->outputs)); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] == - TGSI_SEMANTIC_FACE); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] == - TGSI_SEMANTIC_VERTEXID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] == - TGSI_SEMANTIC_INSTANCEID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] == - TGSI_SEMANTIC_SAMPLEID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] == - TGSI_SEMANTIC_SAMPLEPOS); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] == - TGSI_SEMANTIC_SAMPLEMASK); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] == - TGSI_SEMANTIC_INVOCATIONID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE] == - TGSI_SEMANTIC_VERTEXID_NOBASE); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] == - TGSI_SEMANTIC_BASEVERTEX); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] == - TGSI_SEMANTIC_TESSCOORD); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] == - TGSI_SEMANTIC_HELPER_INVOCATION); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_LOCAL_INVOCATION_ID] == - TGSI_SEMANTIC_THREAD_ID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_WORK_GROUP_ID] == - TGSI_SEMANTIC_BLOCK_ID); - assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_NUM_WORK_GROUPS] == - TGSI_SEMANTIC_GRID_SIZE); - t = CALLOC_STRUCT(st_translate); if (!t) { ret = PIPE_ERROR_OUT_OF_MEMORY; @@ -6215,7 +6215,7 @@ st_translate_program( for (i = 0; sysInputs; i++) { if (sysInputs & (1 << i)) { - unsigned semName = _mesa_sysval_to_semantic[i]; + unsigned semName = _mesa_sysval_to_semantic(i); t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 729295bcb52..774588a111b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -63,7 +63,8 @@ st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi, const GLuint outputMapping[], struct pipe_stream_output_info *so); -extern const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX]; +unsigned +_mesa_sysval_to_semantic(unsigned sysval); #ifdef __cplusplus } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 7a686b199d5..e1c79a57b0a 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1074,7 +1074,7 @@ st_translate_mesa_program( for (i = 0; sysInputs; i++) { if (sysInputs & (1 << i)) { - unsigned semName = _mesa_sysval_to_semantic[i]; + unsigned semName = _mesa_sysval_to_semantic(i); t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); diff --git a/src/mesa/state_tracker/st_vdpau.c b/src/mesa/state_tracker/st_vdpau.c index b9abebfc7bf..08f25535ae1 100644 --- a/src/mesa/state_tracker/st_vdpau.c +++ b/src/mesa/state_tracker/st_vdpau.c @@ -55,7 +55,7 @@ #include "state_tracker/drm_driver.h" static struct pipe_resource * -st_vdpau_video_surface_gallium(struct gl_context *ctx, const GLvoid *vdpSurface, +st_vdpau_video_surface_gallium(struct gl_context *ctx, const void *vdpSurface, GLuint index) { int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); @@ -86,7 +86,7 @@ st_vdpau_video_surface_gallium(struct gl_context *ctx, const GLvoid *vdpSurface, } static struct pipe_resource * -st_vdpau_output_surface_gallium(struct gl_context *ctx, const GLvoid *vdpSurface) +st_vdpau_output_surface_gallium(struct gl_context *ctx, const void *vdpSurface) { int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); uint32_t device = (uintptr_t)ctx->vdpDevice; @@ -135,7 +135,7 @@ st_vdpau_resource_from_description(struct gl_context *ctx, } static struct pipe_resource * -st_vdpau_output_surface_dma_buf(struct gl_context *ctx, const GLvoid *vdpSurface) +st_vdpau_output_surface_dma_buf(struct gl_context *ctx, const void *vdpSurface) { int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); uint32_t device = (uintptr_t)ctx->vdpDevice; @@ -154,7 +154,7 @@ st_vdpau_output_surface_dma_buf(struct gl_context *ctx, const GLvoid *vdpSurface } static struct pipe_resource * -st_vdpau_video_surface_dma_buf(struct gl_context *ctx, const GLvoid *vdpSurface, +st_vdpau_video_surface_dma_buf(struct gl_context *ctx, const void *vdpSurface, GLuint index) { int (*getProcAddr)(uint32_t device, uint32_t id, void **ptr); @@ -177,7 +177,7 @@ static void st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access, GLboolean output, struct gl_texture_object *texObj, struct gl_texture_image *texImage, - const GLvoid *vdpSurface, GLuint index) + const void *vdpSurface, GLuint index) { struct st_context *st = st_context(ctx); struct st_texture_object *stObj = st_texture_object(texObj); @@ -250,7 +250,7 @@ static void st_vdpau_unmap_surface(struct gl_context *ctx, GLenum target, GLenum access, GLboolean output, struct gl_texture_object *texObj, struct gl_texture_image *texImage, - const GLvoid *vdpSurface, GLuint index) + const void *vdpSurface, GLuint index) { struct st_context *st = st_context(ctx); struct st_texture_object *stObj = st_texture_object(texObj); |