diff options
author | Kristian Høgsberg Kristensen <[email protected]> | 2016-01-08 22:16:43 -0800 |
---|---|---|
committer | Kristian Høgsberg Kristensen <[email protected]> | 2016-01-08 22:16:43 -0800 |
commit | f0993f81c7fab4ceb99b010d8ee2facbf4fdc737 (patch) | |
tree | 2da8aeaa95a0337d9177f4d808837f7c5cbdbc25 /src/mesa/drivers | |
parent | cfdc955fd5e1e965c458e50f4dc877653a463684 (diff) | |
parent | da5d4583e53fc9cdc86aba7d2ac770e01baa158d (diff) |
Merge ../mesa into vulkan
Diffstat (limited to 'src/mesa/drivers')
27 files changed, 174 insertions, 117 deletions
diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index bb840eaba94..e1874c3f1dc 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -37,18 +37,26 @@ TODO: document the other workarounds. <application name="Unigine Heaven (32-bit)" executable="heaven_x86"> <option name="allow_glsl_extension_directive_midshader" value="true" /> + <!-- remove disable_blend_func_extended if 4.1 ever comes out --> + <option name="disable_blend_func_extended" value="true" /> </application> <application name="Unigine Heaven (64-bit)" executable="heaven_x64"> <option name="allow_glsl_extension_directive_midshader" value="true" /> + <!-- remove disable_blend_func_extended if 4.1 ever comes out --> + <option name="disable_blend_func_extended" value="true" /> </application> <application name="Unigine Valley (32-bit)" executable="valley_x86"> <option name="allow_glsl_extension_directive_midshader" value="true" /> + <!-- remove disable_blend_func_extended if 1.1 ever comes out --> + <option name="disable_blend_func_extended" value="true" /> </application> <application name="Unigine Valley (64-bit)" executable="valley_x64"> <option name="allow_glsl_extension_directive_midshader" value="true" /> + <!-- remove disable_blend_func_extended if 1.1 ever comes out --> + <option name="disable_blend_func_extended" value="true" /> </application> <application name="Unigine OilRush (32-bit)" executable="OilRush_x86"> diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c b/src/mesa/drivers/dri/i915/intel_buffer_objects.c index ef06743ed49..e6760964909 100644 --- a/src/mesa/drivers/dri/i915/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.c @@ -99,7 +99,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) _mesa_align_free(intel_obj->sys_buffer); drm_intel_bo_unreference(intel_obj->buffer); - free(intel_obj); + _mesa_delete_buffer_object(ctx, obj); } diff --git a/src/mesa/drivers/dri/i915/intel_pixel_copy.c b/src/mesa/drivers/dri/i915/intel_pixel_copy.c index a7185564e47..213cdbd0f53 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_copy.c @@ -138,7 +138,7 @@ do_blit_copypixels(struct gl_context * ctx, } if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) { - perf_debug("glCopyPixles(): Unsupported pixel zoom\n"); + perf_debug("glCopyPixels(): Unsupported pixel zoom\n"); return false; } diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 3d5150d2f53..86777430a2e 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -2,7 +2,6 @@ i965_compiler_FILES = \ brw_cfg.cpp \ brw_cfg.h \ brw_compiler.h \ - brw_cubemap_normalize.cpp \ brw_dead_control_flow.cpp \ brw_dead_control_flow.h \ brw_defines.h \ @@ -16,7 +15,6 @@ i965_compiler_FILES = \ brw_eu_util.c \ brw_eu_validate.c \ brw_fs_builder.h \ - brw_fs_channel_expressions.cpp \ brw_fs_cmod_propagation.cpp \ brw_fs_combine_constants.cpp \ brw_fs_copy_propagation.cpp \ @@ -35,15 +33,12 @@ i965_compiler_FILES = \ brw_fs_surface_builder.cpp \ brw_fs_surface_builder.h \ brw_fs_validate.cpp \ - brw_fs_vector_splitting.cpp \ brw_fs_visitor.cpp \ brw_inst.h \ brw_interpolation_map.c \ brw_ir_allocator.h \ brw_ir_fs.h \ brw_ir_vec4.h \ - brw_lower_texture_gradients.cpp \ - brw_lower_unnormalized_offset.cpp \ brw_nir.h \ brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ @@ -115,6 +110,7 @@ i965_FILES = \ brw_context.h \ brw_cs.c \ brw_cs.h \ + brw_cubemap_normalize.cpp \ brw_curbe.c \ brw_draw.c \ brw_draw.h \ @@ -122,11 +118,15 @@ i965_FILES = \ brw_ff_gs.c \ brw_ff_gs_emit.c \ brw_ff_gs.h \ + brw_fs_channel_expressions.cpp \ + brw_fs_vector_splitting.cpp \ brw_gs.c \ brw_gs.h \ brw_gs_state.c \ brw_gs_surface_state.c \ brw_link.cpp \ + brw_lower_texture_gradients.cpp \ + brw_lower_unnormalized_offset.cpp \ brw_meta_fast_clear.c \ brw_meta_stencil_blit.c \ brw_meta_updownsample.c \ diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index db9acd64f4e..4eeca5cab95 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -402,6 +402,66 @@ static const struct brw_device_info brw_device_info_bxt = { } }; +/* + * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. + * There's no KBL entry. Using the default SKL (GEN9) GS entries value. + */ + +/* + * Both SKL and KBL support a maximum of 64 threads per + * Pixel Shader Dispatch (PSD) unit. + */ +#define KBL_MAX_THREADS_PER_PSD 64 + +static const struct brw_device_info brw_device_info_kbl_gt1 = { + GEN9_FEATURES, + .gt = 1, + + .max_cs_threads = 7 * 6, + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2, + .urb.size = 192, +}; + +static const struct brw_device_info brw_device_info_kbl_gt1_5 = { + GEN9_FEATURES, + .gt = 1, + + .max_cs_threads = 7 * 6, + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, +}; + +static const struct brw_device_info brw_device_info_kbl_gt2 = { + GEN9_FEATURES, + .gt = 2, + + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3, +}; + +static const struct brw_device_info brw_device_info_kbl_gt3 = { + GEN9_FEATURES, + .gt = 3, + + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6, +}; + +static const struct brw_device_info brw_device_info_kbl_gt4 = { + GEN9_FEATURES, + .gt = 4, + + .max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9, + /* + * From the "L3 Allocation and Programming" documentation: + * + * "URB is limited to 1008KB due to programming restrictions. This + * is not a restriction of the L3 implementation, but of the FF and + * other clients. Therefore, in a GT4 implementation it is + * possible for the programmed allocation of the L3 data array to + * provide 3*384KB=1152KB for URB, but only 1008KB of this + * will be used." + */ + .urb.size = 1008 / 3, +}; + const struct brw_device_info * brw_get_device_info(int devid) { diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 5fb96626649..35d8039ed13 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -847,12 +847,11 @@ brw_alu2(struct brw_codegen *p, unsigned opcode, static int get_3src_subreg_nr(struct brw_reg reg) { - if (reg.vstride == BRW_VERTICAL_STRIDE_0) { - assert(brw_is_single_value_swizzle(reg.swizzle)); - return reg.subnr / 4 + BRW_GET_SWZ(reg.swizzle, 0); - } else { - return reg.subnr / 4; - } + /* Normally, SubRegNum is in bytes (0..31). However, 3-src instructions + * use 32-bit units (components 0..7). Since they only support F/D/UD + * types, this doesn't lose any flexibility, but uses fewer bits. + */ + return reg.subnr / 4; } static brw_inst * diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7675e9d299a..489461c6d95 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3455,8 +3455,7 @@ fs_visitor::lower_integer_multiplication() */ assert(mul->src[1].type == BRW_REGISTER_TYPE_D || mul->src[1].type == BRW_REGISTER_TYPE_UD); - mul->src[1].type = (type_is_signed(mul->src[1].type) ? - BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW); + mul->src[1].type = BRW_REGISTER_TYPE_UW; mul->src[1].stride *= 2; } else if (devinfo->gen == 7 && !devinfo->is_haswell && diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c7bbae325a1..62133784dcf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1849,12 +1849,33 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, fs_inst *inst; if (indirect_offset.file == BAD_FILE) { - /* Replicate the patch handle to all enabled channels */ - fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - bld.MOV(patch_handle, retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); - - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle); - inst->mlen = 1; + /* Arbitrarily only push up to 32 vec4 slots worth of data, + * which is 16 registers (since each holds 2 vec4 slots). + */ + const unsigned max_push_slots = 32; + if (imm_offset < max_push_slots) { + fs_reg src = fs_reg(ATTR, imm_offset / 2, dest.type); + for (int i = 0; i < instr->num_components; i++) { + bld.MOV(offset(dest, bld, i), + component(src, 4 * (imm_offset % 2) + i)); + } + tes_prog_data->base.urb_read_length = + MAX2(tes_prog_data->base.urb_read_length, + DIV_ROUND_UP(imm_offset + 1, 2)); + } else { + /* Replicate the patch handle to all enabled channels */ + const fs_reg srcs[] = { + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD) + }; + fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + bld.LOAD_PAYLOAD(patch_handle, srcs, ARRAY_SIZE(srcs), 0); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle); + inst->mlen = 1; + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->regs_written = instr->num_components; + } } else { /* Indirect indexing - use per-slot offsets as well. */ const fs_reg srcs[] = { @@ -1866,10 +1887,10 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, payload); inst->mlen = 2; + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->regs_written = instr->num_components; } - inst->offset = imm_offset; - inst->base_mrf = -1; - inst->regs_written = instr->num_components; break; } default: diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 7cdc830f6b8..766c57ff60a 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -27,7 +27,6 @@ #include "brw_nir.h" #include "brw_program.h" #include "glsl/ir_optimization.h" -#include "glsl/glsl_parser_extras.h" #include "program/program.h" #include "main/shaderapi.h" #include "main/uniforms.h" diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp index d571ecd4394..c83b2728b98 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp @@ -279,7 +279,7 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir) /* 2. quotient rule */ ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip"); - EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q)))); + EMIT(assign(recip, expr(ir_unop_rcp, swizzle_z(Q)))); ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx"); ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy"); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index e031173036a..f8b258bf96c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -405,42 +405,15 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) } } -#include "util/debug.h" - -static bool -should_clone_nir() -{ - static int should_clone = -1; - if (should_clone < 1) - should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); - - return should_clone; -} - -#define _OPT(do_pass) (({ \ - bool this_progress = true; \ - do_pass \ - nir_validate_shader(nir); \ - if (should_clone_nir()) { \ - nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ - ralloc_free(nir); \ - nir = clone; \ - } \ - this_progress; \ -})) - -#define OPT(pass, ...) _OPT( \ - nir_metadata_set_validation_flag(nir); \ - this_progress = pass(nir ,##__VA_ARGS__); \ - if (this_progress) { \ - progress = true; \ - nir_metadata_check_validation_flag(nir); \ - } \ -) - -#define OPT_V(pass, ...) _OPT( \ - pass(nir, ##__VA_ARGS__); \ -) +#define OPT(pass, ...) ({ \ + bool this_progress = false; \ + NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ + if (this_progress) \ + progress = true; \ + this_progress; \ +}) + +#define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) static nir_shader * nir_optimize(nir_shader *nir, bool is_scalar) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 9f2ff9ae5ad..a2a4a40f373 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -287,33 +287,6 @@ type_sz(unsigned type) } } -static inline bool -type_is_signed(unsigned type) -{ - switch(type) { - case BRW_REGISTER_TYPE_D: - case BRW_REGISTER_TYPE_W: - case BRW_REGISTER_TYPE_F: - case BRW_REGISTER_TYPE_B: - case BRW_REGISTER_TYPE_V: - case BRW_REGISTER_TYPE_VF: - case BRW_REGISTER_TYPE_DF: - case BRW_REGISTER_TYPE_HF: - case BRW_REGISTER_TYPE_Q: - return true; - - case BRW_REGISTER_TYPE_UD: - case BRW_REGISTER_TYPE_UW: - case BRW_REGISTER_TYPE_UB: - case BRW_REGISTER_TYPE_UV: - case BRW_REGISTER_TYPE_UQ: - return false; - - default: - unreachable("not reached"); - } -} - /** * Construct a brw_reg. * \param file one of the BRW_x_REGISTER_FILE values diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 4ae403c2baa..9c7f4a7619f 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -27,7 +27,6 @@ #include "brw_fs.h" #include "brw_nir.h" #include "brw_vec4_tes.h" -#include "glsl/glsl_parser_extras.h" #include "main/shaderobj.h" #include "main/uniforms.h" #include "util/debug.h" @@ -97,6 +96,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) nir_shader_compiler_options *nir_options = rzalloc(compiler, nir_shader_compiler_options); nir_options->native_integers = true; + nir_options->lower_fdiv = true; /* In order to help allow for better CSE at the NIR level we tell NIR * to split all ffma instructions during opt_algebraic and we then * re-combine them as a later step. diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 2c925e7f572..7e414260284 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -307,7 +307,9 @@ brw_tcs_precompile(struct gl_context *ctx, /* Guess that the input and output patches have the same dimensionality. */ key.input_vertices = shader_prog->TessCtrl.VerticesOut; - key.tes_primitive_mode = GL_TRIANGLES; + key.tes_primitive_mode = + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] ? + shader_prog->TessEval.PrimitiveMode : GL_TRIANGLES; key.outputs_written = prog->OutputsWritten; key.patch_outputs_written = prog->PatchOutputsWritten; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index dd223985d1c..c6a52c5d183 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1784,9 +1784,22 @@ vec4_visitor::convert_to_hw_regs() case ATTR: unreachable("not reached"); } + src = reg; } + if (inst->is_3src()) { + /* 3-src instructions with scalar sources support arbitrary subnr, + * but don't actually use swizzles. Convert swizzle into subnr. + */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].vstride == BRW_VERTICAL_STRIDE_0) { + assert(brw_is_single_value_swizzle(inst->src[i].swizzle)); + inst->src[i].subnr += 4 * BRW_GET_SWZ(inst->src[i].swizzle, 0); + } + } + } + dst_reg &dst = inst->dst; struct brw_reg reg; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index c6fa837dc74..b3236672ef2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -20,7 +20,6 @@ * IN THE SOFTWARE. */ -#include "glsl/glsl_parser_extras.h" #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_eu.h" diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c index 09eadbcb54f..fea24368e8c 100644 --- a/src/mesa/drivers/dri/i965/brw_vue_map.c +++ b/src/mesa/drivers/dri/i965/brw_vue_map.c @@ -257,6 +257,7 @@ varying_name(brw_varying_slot slot) [BRW_VARYING_SLOT_PNTC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PNTC", }; + assert(slot < BRW_VARYING_SLOT_COUNT); return brw_names[slot - VARYING_SLOT_MAX]; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 76dc5775121..5ab2f7f09df 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -946,12 +946,15 @@ brw_upload_ubo_surfaces(struct brw_context *brw, } else { struct intel_buffer_object *intel_bo = intel_buffer_object(binding->BufferObject); + GLsizeiptr size = binding->BufferObject->Size - binding->Offset; + if (!binding->AutomaticSize) + size = MIN2(size, binding->Size); drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, binding->Offset, - binding->BufferObject->Size - binding->Offset); + size); brw_create_constant_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, + size, &ubo_surf_offsets[i]); } } @@ -968,12 +971,15 @@ brw_upload_ubo_surfaces(struct brw_context *brw, } else { struct intel_buffer_object *intel_bo = intel_buffer_object(binding->BufferObject); + GLsizeiptr size = binding->BufferObject->Size - binding->Offset; + if (!binding->AutomaticSize) + size = MIN2(size, binding->Size); drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, binding->Offset, - binding->BufferObject->Size - binding->Offset); + size); brw_create_buffer_surface(brw, bo, binding->Offset, - binding->BufferObject->Size - binding->Offset, + size, &ssbo_surf_offsets[i]); } } diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index da3b4cd90e8..4bc0a8598d6 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -32,7 +32,6 @@ #include "program/prog_parameter.h" #include "program/prog_statevars.h" #include "intel_batchbuffer.h" -#include "glsl/glsl_parser_extras.h" /** * Creates a streamed BO containing the push constants for the VS or GS on diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index bd204aa3ce8..6d29fbdde21 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -406,11 +406,6 @@ can_fast_copy_blit(struct brw_context *brw, if (brw->gen < 9) return false; - if (src_buffer->handle == dst_buffer->handle && - _mesa_regions_overlap(src_x, src_y, src_x + w, src_y + h, - dst_x, dst_y, dst_x + w, dst_y + h)) - return false; - /* Enable fast copy blit only if the surfaces are Yf/Ys tiled. * FIXME: Based on performance data, remove this condition later to * enable for all types of surfaces. @@ -427,8 +422,10 @@ can_fast_copy_blit(struct brw_context *brw, if ((dst_offset | src_offset) & 63) return false; - /* Color depth greater than 128 bits not supported. */ - if (cpp > 16) + /* Color depths which are not power of 2 or greater than 128 bits are + * not supported. + */ + if (!_mesa_is_pow_two(cpp) || cpp > 16) return false; /* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15 @@ -567,9 +564,10 @@ intelEmitCopyBlit(struct brw_context *brw, dst_offset, dst_pitch, dst_tiling, dst_tr_mode, w, h, cpp); - assert(use_fast_copy_blit || - (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE && - dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)); + if (!use_fast_copy_blit && + (src_tr_mode != INTEL_MIPTREE_TRMODE_NONE || + dst_tr_mode != INTEL_MIPTREE_TRMODE_NONE)) + return false; if (use_fast_copy_blit) { /* When two sequential fast copy blits have different source surfaces, diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 7a5b3fca595..56da2da08a8 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -167,7 +167,7 @@ brw_delete_buffer(struct gl_context * ctx, struct gl_buffer_object *obj) _mesa_buffer_unmap_all_mappings(ctx, obj); drm_intel_bo_unreference(intel_obj->buffer); - free(intel_obj); + _mesa_delete_buffer_object(ctx, obj); } diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index e1338e92e15..889f7cbb5c1 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -346,6 +346,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_transform_feedback3 = true; ctx->Extensions.ARB_transform_feedback_instanced = true; + if (ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) + ctx->Extensions.ARB_compute_shader = true; + if (brw->intelScreen->cmd_parser_version >= 2) brw->predicate.supported = true; } @@ -357,8 +360,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_viewport_array = true; ctx->Extensions.AMD_vertex_shader_viewport_index = true; ctx->Extensions.ARB_shader_subroutine = true; - if (ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) - ctx->Extensions.ARB_compute_shader = true; } } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 88c0a19bed6..108dd87dd8b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2697,13 +2697,17 @@ use_intel_mipree_map_blit(struct brw_context *brw, { if (brw->has_llc && /* It's probably not worth swapping to the blit ring because of - * all the overhead involved. + * all the overhead involved. But, we must use blitter for the + * surfaces with INTEL_MIPTREE_TRMODE_{YF,YS}. */ - !(mode & GL_MAP_WRITE_BIT) && + (!(mode & GL_MAP_WRITE_BIT) || + mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) && !mt->compressed && (mt->tiling == I915_TILING_X || /* Prior to Sandybridge, the blitter can't handle Y tiling */ - (brw->gen >= 6 && mt->tiling == I915_TILING_Y)) && + (brw->gen >= 6 && mt->tiling == I915_TILING_Y) || + /* Fast copy blit on skl+ supports all tiling formats. */ + brw->gen >= 9) && can_blit_slice(mt, level, slice)) return true; @@ -2772,6 +2776,8 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_map_movntdqa(brw, mt, map, level, slice); #endif } else { + /* intel_miptree_map_gtt() doesn't support surfaces with Yf/Ys tiling. */ + assert(mt->tr_mode == INTEL_MIPTREE_TRMODE_NONE); intel_miptree_map_gtt(brw, mt, map, level, slice); } diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c index 3b5bdb8f928..05c35bd61b3 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c @@ -142,7 +142,7 @@ do_blit_copypixels(struct gl_context * ctx, } if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) { - perf_debug("glCopyPixles(): Unsupported pixel zoom\n"); + perf_debug("glCopyPixels(): Unsupported pixel zoom\n"); return false; } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index 7f31b2851e4..998e751fc3c 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -35,7 +35,7 @@ #include "drivers/common/meta.h" -const char const *nouveau_vendor_string = "Nouveau"; +const char * const nouveau_vendor_string = "Nouveau"; const char * nouveau_get_renderer_string(unsigned chipset) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h index a4273a554bd..237e9563246 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h @@ -69,7 +69,7 @@ struct nouveau_driver { #define nouveau_error(format, ...) \ fprintf(stderr, "%s: " format, __func__, ## __VA_ARGS__) -extern const char const *nouveau_vendor_string; +extern const char * const nouveau_vendor_string; const char * nouveau_get_renderer_string(unsigned chipset); diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index d9d4f5ffc5e..2b76305dd45 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -71,7 +71,7 @@ radeonDeleteBufferObject(struct gl_context * ctx, radeon_bo_unref(radeon_obj->bo); } - free(radeon_obj); + _mesa_delete_buffer_object(ctx, obj); } |