diff options
author | Jason Ekstrand <[email protected]> | 2016-04-14 17:14:28 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-04-14 17:14:28 -0700 |
commit | 5567ae0547d5f31e51e5f32e78065894e594fd1a (patch) | |
tree | 98ff61f6afbfd7d5517553e1511f0f88cd34feeb | |
parent | 48cc8c284aa28405eaa2335bb8f96324c8153ca7 (diff) | |
parent | f1d29099b4eedafb0302a21c0673d12a6610c369 (diff) |
Merge remote-tracking branch 'public/master' into vulkan
40 files changed, 501 insertions, 537 deletions
diff --git a/SConstruct b/SConstruct index ef71ab69c38..e2e49fcc6ca 100644 --- a/SConstruct +++ b/SConstruct @@ -84,6 +84,11 @@ env.Append(CPPPATH = [ #print env.Dump() +# Add a check target for running tests +check = env.Alias('check') +env.AlwaysBuild(check) + + ####################################################################### # Invoke host SConscripts # diff --git a/appveyor.yml b/appveyor.yml index bf7ac752857..2e9b9d650d7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -65,6 +65,9 @@ install: build_script: - scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 +after_build: +- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check + # It's possible to setup notification here, as described in # http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but diff --git a/configure.ac b/configure.ac index 0d3f0ff3c9d..2aa46dccdbb 100644 --- a/configure.ac +++ b/configure.ac @@ -2551,8 +2551,6 @@ AC_CONFIG_FILES([Makefile src/gallium/drivers/softpipe/Makefile src/gallium/drivers/svga/Makefile src/gallium/drivers/swr/Makefile - src/gallium/drivers/swr/avx/Makefile - src/gallium/drivers/swr/avx2/Makefile src/gallium/drivers/trace/Makefile src/gallium/drivers/vc4/Makefile src/gallium/drivers/virgl/Makefile diff --git a/doxygen/Makefile.orig b/doxygen/Makefile.orig deleted file mode 100644 index b1bc15d5a05..00000000000 --- a/doxygen/Makefile.orig +++ /dev/null @@ -1,35 +0,0 @@ - -default: full - -all: full subset - -%.tag: %.doxy - doxygen $< - -FULL = \ - main.doxy \ - math.doxy \ - vbo.doxy \ - glapi.doxy \ - glsl.doxy \ - swrast.doxy \ - swrast_setup.doxy \ - tnl.doxy \ - tnl_dd.doxy \ - gbm.doxy \ - i965.doxy - -full: $(FULL:.doxy=.tag) - $(foreach FILE,$(FULL),doxygen $(FILE);) - -SUBSET = \ - main.doxy \ - math.doxy - -subset: $(SUBSET:.doxy=.tag) - $(foreach FILE,$(SUBSET),doxygen $(FILE);) - -clean: - -rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=) - -rm -rf *.tag - -rm -rf *.db diff --git a/doxygen/header.html.orig b/doxygen/header.html.orig deleted file mode 100644 index 034231cbdfd..00000000000 --- a/doxygen/header.html.orig +++ /dev/null @@ -1,19 +0,0 @@ -<html> -<head> -<title>Mesa Source Code Documentation</title> -<link href="doxygen.css" rel="stylesheet" type="text/css"> -</head> -<body> -<div class="qindex"> -<a class="qindex" href="../main/index.html">core</a> | -<a class="qindex" href="../glapi/index.html">glapi</a> | -<a class="qindex" href="../glsl/index.html">glsl</a> | -<a class="qindex" href="../vbo/index.html">vbo</a> | -<a class="qindex" href="../math/index.html">math</a> | -<a class="qindex" href="../swrast/index.html">swrast</a> | -<a class="qindex" href="../swrast_setup/index.html">swrast_setup</a> | -<a class="qindex" href="../tnl/index.html">tnl</a> | -<a class="qindex" href="../tnl_dd/index.html">tnl_dd</a> | -<a class="qindex" href="../gbm/index.html">gbm</a> | -<a class="qindex" href="../i965/index.html">i965</a> -</div> diff --git a/install-gallium-links.mk b/install-gallium-links.mk index 4010cad15c0..ac5a499c48f 100644 --- a/install-gallium-links.mk +++ b/install-gallium-links.mk @@ -12,9 +12,9 @@ all-local : .install-gallium-links link_dir=$(top_builddir)/$(LIB_DIR)/egl; \ fi; \ $(MKDIR_P) $$link_dir; \ - file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so); \ - file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \ - file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \ + file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \ + file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \ + file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \ for f in $$file_list; do \ if test -h .libs/$$f; then \ cp -d $$f $$link_dir; \ diff --git a/scons/gallium.py b/scons/gallium.py index f37042d9af1..94321b2e847 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -82,11 +82,6 @@ def install_shared_library(env, sources, version = ()): return targets -def createInstallMethods(env): - env.AddMethod(install_program, 'InstallProgram') - env.AddMethod(install_shared_library, 'InstallSharedLibrary') - - def msvc2013_compat(env): if env['gcc']: env.Append(CCFLAGS = [ @@ -94,8 +89,20 @@ def msvc2013_compat(env): '-Werror=pointer-arith', ]) -def createMSVCCompatMethods(env): - env.AddMethod(msvc2013_compat, 'MSVC2013Compat') + +def unit_test(env, test_name, program_target, args=None): + env.InstallProgram(program_target) + + cmd = [program_target[0].abspath] + if args is not None: + cmd += args + cmd = ' '.join(cmd) + + # http://www.scons.org/wiki/UnitTests + action = SCons.Action.Action(cmd, " Running %s ..." % test_name) + alias = env.Alias(test_name, program_target, action) + env.AlwaysBuild(alias) + env.Depends('check', alias) def num_jobs(): @@ -667,8 +674,10 @@ def generate(env): # Custom builders and methods env.Tool('custom') - createInstallMethods(env) - createMSVCCompatMethods(env) + env.AddMethod(install_program, 'InstallProgram') + env.AddMethod(install_shared_library, 'InstallSharedLibrary') + env.AddMethod(msvc2013_compat, 'MSVC2013Compat') + env.AddMethod(unit_test, 'UnitTest') env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13']) env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8']) diff --git a/src/compiler/SConscript b/src/compiler/SConscript index 8969d821984..10c79c44348 100644 --- a/src/compiler/SConscript +++ b/src/compiler/SConscript @@ -22,3 +22,4 @@ compiler = env.ConvenienceLibrary( Export('compiler') SConscript('SConscript.glsl') +SConscript('SConscript.nir') diff --git a/src/compiler/SConscript.nir b/src/compiler/SConscript.nir new file mode 100644 index 00000000000..51cc0d403fd --- /dev/null +++ b/src/compiler/SConscript.nir @@ -0,0 +1,73 @@ +import common + +Import('*') + +from sys import executable as python_cmd + +env = env.Clone() + +env.MSVC2013Compat() + +env.Prepend(CPPPATH = [ + '#include', + '#src', + '#src/mapi', + '#src/mesa', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/compiler/nir', +]) + +# Make generated headers reachable from the include path. +env.Prepend(CPPPATH = [Dir('.').abspath, Dir('nir').abspath]) + +# nir generated sources + +nir_builder_opcodes_h = env.CodeGenerate( + target = 'nir/nir_builder_opcodes.h', + script = 'nir/nir_builder_opcodes_h.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'nir/nir_constant_expressions.c', + script = 'nir/nir_constant_expressions.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'nir/nir_opcodes.h', + script = 'nir/nir_opcodes_h.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'nir/nir_opcodes.c', + script = 'nir/nir_opcodes_c.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +env.CodeGenerate( + target = 'nir/nir_opt_algebraic.c', + script = 'nir/nir_opt_algebraic.py', + source = [], + command = python_cmd + ' $SCRIPT > $TARGET' +) + +# parse Makefile.sources +source_lists = env.ParseSourceList('Makefile.sources') + +nir_sources = source_lists['NIR_FILES'] +nir_sources += source_lists['NIR_GENERATED_FILES'] + +nir = env.ConvenienceLibrary( + target = 'nir', + source = nir_sources, +) + +env.Alias('nir', nir) +Export('nir') diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index fede1954cf0..bbbc2089db3 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -507,7 +507,14 @@ typedef struct nir_src { bool is_ssa; } nir_src; -#define NIR_SRC_INIT (nir_src) { { NULL } } +static inline nir_src +nir_src_init(void) +{ + nir_src src = { { NULL } }; + return src; +} + +#define NIR_SRC_INIT nir_src_init() #define nir_foreach_use(reg_or_ssa_def, src) \ list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) @@ -530,7 +537,14 @@ typedef struct { bool is_ssa; } nir_dest; -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } +static inline nir_dest +nir_dest_init(void) +{ + nir_dest dest = { { { NULL } } }; + return dest; +} + +#define NIR_DEST_INIT nir_dest_init() #define nir_foreach_def(reg, dest) \ list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) @@ -957,7 +971,7 @@ typedef enum { NIR_INTRINSIC_UCP_ID = 4, /** - * The ammount of data, starting from BASE, that this instruction may + * The amount of data, starting from BASE, that this instruction may * access. This is used to provide bounds if the offset is not constant. */ NIR_INTRINSIC_RANGE = 5, diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h index fa162f9d126..3cb4f95394c 100644 --- a/src/compiler/nir/nir_intrinsics.h +++ b/src/compiler/nir/nir_intrinsics.h @@ -42,9 +42,9 @@ #define ARR(...) { __VA_ARGS__ } -INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0) -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0) +INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0) /* * Interpolation of input. The interp_var_at* intrinsics are similar to the @@ -72,7 +72,7 @@ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, * a barrier is an intrinsic with no inputs/outputs but which can't be moved * around/optimized in general */ -#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0) +#define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0) BARRIER(barrier) BARRIER(discard) @@ -89,7 +89,7 @@ BARRIER(memory_barrier) * The latter can be used as code motion barrier, which is currently not * feasible with NIR. */ -INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(shader_clock, 0, ARR(0), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE) /* * Memory barrier with semantics analogous to the compute shader @@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0) * * end_primitive implements GLSL's EndPrimitive() built-in. */ -INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0) -INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0) +INTRINSIC(emit_vertex, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0) +INTRINSIC(end_primitive, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0) /** * Geometry Shader intrinsics with a vertex count. @@ -137,7 +137,7 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0) */ #define ATOMIC(name, flags) \ - INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \ + INTRINSIC(atomic_counter_##name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \ INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags) ATOMIC(inc, 0) @@ -170,9 +170,9 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0) -INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx, +INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx, +INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* @@ -278,7 +278,7 @@ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0) INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0) #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \ - INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ + INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \ idx0, idx1, idx2, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@ -313,8 +313,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) * of the start of the variable being loaded and and the offset source is a * offset into that variable. * - * Uniform load operations have a second index that specifies the size of the - * variable being loaded. If const_index[1] == 0, then the size is unknown. + * Uniform load operations have a second "range" index that specifies the + * range (starting at base) of the data from which we are loading. If + * const_index[1] == 0, then the range is unknown. * * Some load operations such as UBO/SSBO load and per_vertex loads take an * additional source to specify which UBO/SSBO/vertex to load from. @@ -328,9 +329,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx) #define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags) -/* src[] = { offset }. const_index[] = { base, range } */ -LOAD(uniform, 1, 2, BASE, RANGE, xx, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { buffer_index, offset }. No const_index */ LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* src[] = { offset }. const_index[] = { base } */ diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 369a8ee537e..df1f7a5d765 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -278,8 +278,8 @@ nir_lower_io_block(nir_block *block, void *void_state) intrin->variables[0]->var->data.driver_location); if (load->intrinsic == nir_intrinsic_load_uniform) { - load->const_index[1] = - state->type_size(intrin->variables[0]->var->type); + nir_intrinsic_set_range(load, + state->type_size(intrin->variables[0]->var->type)); } if (per_vertex) diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 2793020953e..bbb4edf3260 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -31,6 +31,10 @@ #include <stdlib.h> #include <inttypes.h> /* for PRIx64 macro */ +#if defined(_WIN32) && !defined(snprintf) +#define snprintf _snprintf +#endif + static void print_tabs(unsigned num_tabs, FILE *fp) { @@ -514,8 +518,6 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) [NIR_INTRINSIC_STREAM_ID] = "stream-id", [NIR_INTRINSIC_UCP_ID] = "ucp-id", [NIR_INTRINSIC_RANGE] = "range", - [NIR_INTRINSIC_DESC_SET] = "desc-set", - [NIR_INTRINSIC_BINDING] = "binding", }; for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) { if (!info->index_map[idx]) diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c index 23d709a218a..1a772fff2d5 100644 --- a/src/compiler/nir/nir_to_ssa.c +++ b/src/compiler/nir/nir_to_ssa.c @@ -27,7 +27,6 @@ #include "nir.h" #include <stdlib.h> -#include <unistd.h> /* * Implements the classic to-SSA algorithm described by Cytron et. al. in diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index 086e1701128..ef2bc1016d5 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -80,8 +80,6 @@ endif if HAVE_GALLIUM_SWR SUBDIRS += drivers/swr -SUBDIRS += drivers/swr/avx -SUBDIRS += drivers/swr/avx2 endif ## vc4/rpi diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 8188156afc2..fbbd22a5299 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -38,10 +38,6 @@ if not env['embedded']: target = testname, source = [testname + '.c', 'lp_test_main.c'], ) - env.InstallProgram(target) - - # http://www.scons.org/wiki/UnitTests - alias = env.Alias(testname, [target], target[0].abspath) - AlwaysBuild(alias) + env.UnitTest(testname, target) Export('llvmpipe') diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index 3479c343261..fe4982aa9fd 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -202,6 +202,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVE4_COMPUTE_CLASS 0x0000a0c0 #define NVF0_COMPUTE_CLASS 0x0000a1c0 #define GM107_COMPUTE_CLASS 0x0000b0c0 +#define GM200_COMPUTE_CLASS 0x0000b1c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 9a34007c6e5..3bf98ad6a3b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -644,9 +644,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) case 0xf0: case 0x100: case 0x110: + case 0x120: if (debug_get_bool_option("NVF0_COMPUTE", false)) return nve4_screen_compute_setup(screen, screen->base.pushbuf); - case 0x120: return 0; default: return -1; diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 4d069df983e..3d0190928e3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -54,6 +54,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, case 0x110: obj_class = GM107_COMPUTE_CLASS; break; + case 0x120: + obj_class = GM200_COMPUTE_CLASS; + break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 47514e91d23..664dc5bfdce 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -376,6 +376,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, 0, 0, resource, level, box); data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ); + if (!data) { + pipe_resource_reference((struct pipe_resource **)&staging, NULL); + return NULL; + } data += box->x % R600_MAP_BUFFER_ALIGNMENT; return r600_buffer_get_transfer(ctx, resource, level, usage, box, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index b5557d800c7..7fc1461ec9b 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1192,7 +1192,9 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, { sctx->const_buffers[shader].desc.pointer_dirty = true; sctx->rw_buffers[shader].desc.pointer_dirty = true; + sctx->shader_buffers[shader].desc.pointer_dirty = true; sctx->samplers[shader].views.desc.pointer_dirty = true; + sctx->images[shader].desc.pointer_dirty = true; if (shader == PIPE_SHADER_VERTEX) sctx->vertex_buffers.pointer_dirty = true; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c58467ddcb0..c26960b1ca3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5839,6 +5839,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, radeon_llvm_dispose(&ctx.radeon_bld); + /* Add the scratch offset to input SGPRs. */ + if (shader->config.scratch_bytes_per_wave) + shader->info.num_input_sgprs += 1; /* scratch byte offset */ + /* Calculate the number of fragment input VGPRs. */ if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { shader->info.num_input_vgprs = 0; @@ -6761,6 +6765,13 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, return true; } +static void si_fix_num_sgprs(struct si_shader *shader) +{ + unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */ + + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs); +} + int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug) @@ -6850,6 +6861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } + si_fix_num_sgprs(shader); si_shader_dump(sscreen, shader, debug, shader->selector->info.processor, stderr); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 82ae4c43245..af9ffdd381f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1487,7 +1487,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, } if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && - sscreen->b.family >= CHIP_STONEY) { + sscreen->b.family == CHIP_STONEY) { switch (format) { case PIPE_FORMAT_ETC1_RGB8: case PIPE_FORMAT_ETC2_RGB8: diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b7ebb48e6a9..d560aae1f7d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -306,7 +306,6 @@ static void si_set_tesseval_regs(struct si_shader *shader, static void si_shader_ls(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; @@ -321,30 +320,21 @@ static void si_shader_ls(struct si_shader *shader) * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; - num_user_sgprs = SI_LS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - if (num_user_sgprs > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 2; - } - assert(num_sgprs <= 104); - si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40); shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B528_SGPRS((num_sgprs - 1) / 8) | + S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B528_DX10_CLAMP(1) | S_00B528_FLOAT_MODE(shader->config.float_mode); - shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) | + shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } static void si_shader_hs(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; uint64_t va; pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -354,32 +344,22 @@ static void si_shader_hs(struct si_shader *shader) va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); - num_user_sgprs = SI_TCS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - /* One SGPR after user SGPRs is pre-loaded with tessellation factor - * buffer offset. */ - if ((num_user_sgprs + 1) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 1 + 2; - } - assert(num_sgprs <= 104); - si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40); si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B428_SGPRS((num_sgprs - 1) / 8) | + S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B428_DX10_CLAMP(1) | S_00B428_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, - S_00B42C_USER_SGPR(num_user_sgprs) | + S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } static void si_shader_es(struct si_shader *shader) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; @@ -400,21 +380,13 @@ static void si_shader_es(struct si_shader *shader) } else unreachable("invalid shader selector type"); - num_sgprs = shader->config.num_sgprs; - /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */ - if ((num_user_sgprs + 1) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 1 + 2; - } - assert(num_sgprs <= 104); - si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, shader->selector->esgs_itemsize / 4); si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40); si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES, S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B328_SGPRS((num_sgprs - 1) / 8) | + S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B328_DX10_CLAMP(1) | S_00B328_FLOAT_MODE(shader->config.float_mode)); @@ -458,7 +430,6 @@ static void si_shader_gs(struct si_shader *shader) unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2; unsigned gs_num_invocations = shader->selector->gs_num_invocations; struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; uint64_t va; unsigned max_stream = shader->selector->max_gs_stream; @@ -494,22 +465,13 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); - num_user_sgprs = SI_GS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */ - if ((num_user_sgprs + 2) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 2 + 2; - } - assert(num_sgprs <= 104); - si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B228_SGPRS((num_sgprs - 1) / 8) | + S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B228_DX10_CLAMP(1) | S_00B228_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, - S_00B22C_USER_SGPR(num_user_sgprs) | + S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) | S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } @@ -523,7 +485,7 @@ static void si_shader_gs(struct si_shader *shader) static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) { struct si_pm4_state *pm4; - unsigned num_sgprs, num_user_sgprs; + unsigned num_user_sgprs; unsigned nparams, vgpr_comp_cnt; uint64_t va; unsigned window_space = @@ -566,13 +528,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) } else unreachable("invalid shader selector type"); - num_sgprs = shader->config.num_sgprs; - if (num_user_sgprs > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 2; - } - assert(num_sgprs <= 104); - /* VS is required to export at least one param. */ nparams = MAX2(shader->info.nr_param_exports, 1); si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG, @@ -594,7 +549,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40); si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B128_SGPRS((num_sgprs - 1) / 8) | + S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B128_DX10_CLAMP(1) | S_00B128_FLOAT_MODE(shader->config.float_mode)); @@ -684,7 +639,6 @@ static void si_shader_ps(struct si_shader *shader) struct tgsi_shader_info *info = &shader->selector->info; struct si_pm4_state *pm4; unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask; - unsigned num_sgprs, num_user_sgprs; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; bool has_centroid; @@ -771,23 +725,14 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); - num_user_sgprs = SI_PS_NUM_USER_SGPR; - num_sgprs = shader->config.num_sgprs; - /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */ - if ((num_user_sgprs + 1) > num_sgprs) { - /* Last 2 reserved SGPRs are used for VCC */ - num_sgprs = num_user_sgprs + 1 + 2; - } - assert(num_sgprs <= 104); - si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B028_SGPRS((num_sgprs - 1) / 8) | + S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B028_DX10_CLAMP(1) | S_00B028_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | - S_00B02C_USER_SGPR(num_user_sgprs) | + S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); /* Prefer RE_Z if the shader is complex enough. The requirement is either: diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am index f08806aaf77..d6d6e7dc611 100644 --- a/src/gallium/drivers/swr/Makefile.am +++ b/src/gallium/drivers/swr/Makefile.am @@ -28,4 +28,96 @@ noinst_LTLIBRARIES = libmesaswr.la libmesaswr_la_SOURCES = $(LOADER_SOURCES) -EXTRA_DIST = Makefile.sources-arch +COMMON_CXXFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(LLVM_CFLAGS) \ + -I$(builddir)/rasterizer/scripts \ + -I$(builddir)/rasterizer/jitter \ + -I$(srcdir)/rasterizer \ + -I$(srcdir)/rasterizer/core \ + -I$(srcdir)/rasterizer/jitter + +COMMON_SOURCES = \ + $(CXX_SOURCES) \ + $(COMMON_CXX_SOURCES) \ + $(CORE_CXX_SOURCES) \ + $(JITTER_CXX_SOURCES) \ + $(MEMORY_CXX_SOURCES) \ + $(BUILT_SOURCES) + +BUILT_SOURCES = \ + rasterizer/scripts/gen_knobs.cpp \ + rasterizer/scripts/gen_knobs.h \ + rasterizer/jitter/state_llvm.h \ + rasterizer/jitter/builder_gen.h \ + rasterizer/jitter/builder_gen.cpp \ + rasterizer/jitter/builder_x86.h \ + rasterizer/jitter/builder_x86.cpp + +rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/scripts/gen_knobs.py \ + rasterizer/scripts + +rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \ + --input $(srcdir)/rasterizer/core/state.h \ + --output rasterizer/jitter/state_llvm.h + +rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ + --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \ + --output rasterizer/jitter/builder_gen.h \ + --gen_h + +rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ + --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \ + --output rasterizer/jitter/builder_gen.cpp \ + --gen_cpp + +rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ + --output rasterizer/jitter/builder_x86.h \ + --gen_x86_h + +rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ + --output rasterizer/jitter/builder_x86.cpp \ + --gen_x86_cpp + + +COMMON_LIBADD = \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/mesa/libmesagallium.la + +lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la + +libswrAVX_la_CXXFLAGS = \ + -march=core-avx-i \ + -DKNOB_ARCH=KNOB_ARCH_AVX \ + $(COMMON_CXXFLAGS) + +libswrAVX_la_SOURCES = \ + $(COMMON_SOURCES) + +libswrAVX_la_LIBADD = \ + $(COMMON_LIBADD) + +libswrAVX2_la_CXXFLAGS = \ + -march=core-avx2 \ + -DKNOB_ARCH=KNOB_ARCH_AVX2 \ + $(COMMON_CXXFLAGS) + +libswrAVX2_la_SOURCES = \ + $(COMMON_SOURCES) + +libswrAVX2_la_LIBADD = \ + $(COMMON_LIBADD) + +include $(top_srcdir)/install-gallium-links.mk diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources index 72247211184..be3d8f027f8 100644 --- a/src/gallium/drivers/swr/Makefile.sources +++ b/src/gallium/drivers/swr/Makefile.sources @@ -21,3 +21,94 @@ LOADER_SOURCES := \ swr_loader.cpp + +CXX_SOURCES := \ + swr_clear.cpp \ + swr_context.cpp \ + swr_context.h \ + swr_context_llvm.h \ + swr_draw.cpp \ + swr_public.h \ + swr_resource.h \ + swr_screen.cpp \ + swr_screen.h \ + swr_state.cpp \ + swr_state.h \ + swr_tex_sample.cpp \ + swr_tex_sample.h \ + swr_scratch.h \ + swr_scratch.cpp \ + swr_shader.cpp \ + swr_memory.h \ + swr_fence.h \ + swr_fence.cpp \ + swr_query.h \ + swr_query.cpp + +COMMON_CXX_SOURCES := \ + rasterizer/common/containers.hpp \ + rasterizer/common/formats.cpp \ + rasterizer/common/formats.h \ + rasterizer/common/isa.hpp \ + rasterizer/common/os.h \ + rasterizer/common/rdtsc_buckets.cpp \ + rasterizer/common/rdtsc_buckets.h \ + rasterizer/common/rdtsc_buckets_shared.h \ + rasterizer/common/rdtsc_buckets_shared.h \ + rasterizer/common/simdintrin.h \ + rasterizer/common/swr_assert.cpp \ + rasterizer/common/swr_assert.h + +CORE_CXX_SOURCES := \ + rasterizer/core/api.cpp \ + rasterizer/core/api.h \ + rasterizer/core/arena.h \ + rasterizer/core/backend.cpp \ + rasterizer/core/backend.h \ + rasterizer/core/blend.h \ + rasterizer/core/clip.cpp \ + rasterizer/core/clip.h \ + rasterizer/core/context.h \ + rasterizer/core/depthstencil.h \ + rasterizer/core/fifo.hpp \ + rasterizer/core/format_traits.h \ + rasterizer/core/format_types.h \ + rasterizer/core/frontend.cpp \ + rasterizer/core/frontend.h \ + rasterizer/core/knobs.h \ + rasterizer/core/knobs_init.h \ + rasterizer/core/multisample.cpp \ + rasterizer/core/multisample.h \ + rasterizer/core/pa_avx.cpp \ + rasterizer/core/pa.h \ + rasterizer/core/rasterizer.cpp \ + rasterizer/core/rasterizer.h \ + rasterizer/core/rdtsc_core.cpp \ + rasterizer/core/rdtsc_core.h \ + rasterizer/core/ringbuffer.h \ + rasterizer/core/state.h \ + rasterizer/core/threads.cpp \ + rasterizer/core/threads.h \ + rasterizer/core/tilemgr.cpp \ + rasterizer/core/tilemgr.h \ + rasterizer/core/utils.cpp \ + rasterizer/core/utils.h + +JITTER_CXX_SOURCES := \ + rasterizer/jitter/blend_jit.cpp \ + rasterizer/jitter/blend_jit.h \ + rasterizer/jitter/builder.cpp \ + rasterizer/jitter/builder.h \ + rasterizer/jitter/builder_misc.cpp \ + rasterizer/jitter/builder_misc.h \ + rasterizer/jitter/fetch_jit.cpp \ + rasterizer/jitter/fetch_jit.h \ + rasterizer/jitter/JitManager.cpp \ + rasterizer/jitter/JitManager.h \ + rasterizer/jitter/streamout_jit.cpp \ + rasterizer/jitter/streamout_jit.h + +MEMORY_CXX_SOURCES := \ + rasterizer/memory/ClearTile.cpp \ + rasterizer/memory/LoadTile.cpp \ + rasterizer/memory/StoreTile.cpp diff --git a/src/gallium/drivers/swr/Makefile.sources-arch b/src/gallium/drivers/swr/Makefile.sources-arch deleted file mode 100644 index a04b1203c7c..00000000000 --- a/src/gallium/drivers/swr/Makefile.sources-arch +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (C) 2015 Intel Corporation. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -CXX_SOURCES := \ - swr_clear.cpp \ - swr_context.cpp \ - swr_context.h \ - swr_context_llvm.h \ - swr_draw.cpp \ - swr_public.h \ - swr_resource.h \ - swr_screen.cpp \ - swr_screen.h \ - swr_state.cpp \ - swr_state.h \ - swr_tex_sample.cpp \ - swr_tex_sample.h \ - swr_scratch.h \ - swr_scratch.cpp \ - swr_shader.cpp \ - swr_memory.h \ - swr_fence.h \ - swr_fence.cpp \ - swr_query.h \ - swr_query.cpp - -COMMON_CXX_SOURCES := \ - rasterizer/common/containers.hpp \ - rasterizer/common/formats.cpp \ - rasterizer/common/formats.h \ - rasterizer/common/isa.hpp \ - rasterizer/common/os.h \ - rasterizer/common/rdtsc_buckets.cpp \ - rasterizer/common/rdtsc_buckets.h \ - rasterizer/common/rdtsc_buckets_shared.h \ - rasterizer/common/rdtsc_buckets_shared.h \ - rasterizer/common/simdintrin.h \ - rasterizer/common/swr_assert.cpp \ - rasterizer/common/swr_assert.h - -CORE_CXX_SOURCES := \ - rasterizer/core/api.cpp \ - rasterizer/core/api.h \ - rasterizer/core/arena.h \ - rasterizer/core/backend.cpp \ - rasterizer/core/backend.h \ - rasterizer/core/blend.h \ - rasterizer/core/clip.cpp \ - rasterizer/core/clip.h \ - rasterizer/core/context.h \ - rasterizer/core/depthstencil.h \ - rasterizer/core/fifo.hpp \ - rasterizer/core/format_traits.h \ - rasterizer/core/format_types.h \ - rasterizer/core/frontend.cpp \ - rasterizer/core/frontend.h \ - rasterizer/core/knobs.h \ - rasterizer/core/knobs_init.h \ - rasterizer/core/multisample.cpp \ - rasterizer/core/multisample.h \ - rasterizer/core/pa_avx.cpp \ - rasterizer/core/pa.h \ - rasterizer/core/rasterizer.cpp \ - rasterizer/core/rasterizer.h \ - rasterizer/core/rdtsc_core.cpp \ - rasterizer/core/rdtsc_core.h \ - rasterizer/core/ringbuffer.h \ - rasterizer/core/state.h \ - rasterizer/core/threads.cpp \ - rasterizer/core/threads.h \ - rasterizer/core/tilemgr.cpp \ - rasterizer/core/tilemgr.h \ - rasterizer/core/utils.cpp \ - rasterizer/core/utils.h - -JITTER_CXX_SOURCES := \ - rasterizer/jitter/blend_jit.cpp \ - rasterizer/jitter/blend_jit.h \ - rasterizer/jitter/builder.cpp \ - rasterizer/jitter/builder.h \ - rasterizer/jitter/builder_misc.cpp \ - rasterizer/jitter/builder_misc.h \ - rasterizer/jitter/fetch_jit.cpp \ - rasterizer/jitter/fetch_jit.h \ - rasterizer/jitter/JitManager.cpp \ - rasterizer/jitter/JitManager.h \ - rasterizer/jitter/streamout_jit.cpp \ - rasterizer/jitter/streamout_jit.h - -MEMORY_CXX_SOURCES := \ - rasterizer/memory/ClearTile.cpp \ - rasterizer/memory/LoadTile.cpp \ - rasterizer/memory/StoreTile.cpp diff --git a/src/gallium/drivers/swr/avx/Makefile.am b/src/gallium/drivers/swr/avx/Makefile.am deleted file mode 100644 index 384f1a7eecf..00000000000 --- a/src/gallium/drivers/swr/avx/Makefile.am +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2015 Intel Corporation. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -include ../Makefile.sources-arch -include $(top_srcdir)/src/gallium/Automake.inc - -VPATH = $(srcdir) $(srcdir)/.. - -AM_CXXFLAGS = \ - -march=core-avx-i \ - -DKNOB_ARCH=KNOB_ARCH_AVX \ - $(GALLIUM_DRIVER_CFLAGS) \ - $(LLVM_CFLAGS) \ - -I$(builddir)/rasterizer/scripts \ - -I$(builddir)/rasterizer/jitter \ - -I$(srcdir)/../rasterizer \ - -I$(srcdir)/../rasterizer/core \ - -I$(srcdir)/../rasterizer/jitter - -lib_LTLIBRARIES = libswrAVX.la - -BUILT_SOURCES = \ - rasterizer/scripts/gen_knobs.cpp \ - rasterizer/scripts/gen_knobs.h \ - rasterizer/jitter/state_llvm.h \ - rasterizer/jitter/builder_gen.h \ - rasterizer/jitter/builder_gen.cpp \ - rasterizer/jitter/builder_x86.h \ - rasterizer/jitter/builder_x86.cpp - -libswrAVX_la_SOURCES = \ - $(CXX_SOURCES) \ - $(COMMON_CXX_SOURCES) \ - $(CORE_CXX_SOURCES) \ - $(JITTER_CXX_SOURCES) \ - $(MEMORY_CXX_SOURCES) \ - $(BUILT_SOURCES) - -rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/scripts/gen_knobs.py \ - rasterizer/scripts - -rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \ - --input $(srcdir)/../rasterizer/core/state.h \ - --output rasterizer/jitter/state_llvm.h - -rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \ - --output rasterizer/jitter/builder_gen.h \ - --gen_h - -rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \ - --output rasterizer/jitter/builder_gen.cpp \ - --gen_cpp - -rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --output rasterizer/jitter/builder_x86.h \ - --gen_x86_h - -rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --output rasterizer/jitter/builder_x86.cpp \ - --gen_x86_cpp - - -libswrAVX_la_LIBADD = \ - $(top_builddir)/src/gallium/auxiliary/libgallium.la \ - $(top_builddir)/src/mesa/libmesagallium.la - -include $(top_srcdir)/install-gallium-links.mk diff --git a/src/gallium/drivers/swr/avx2/Makefile.am b/src/gallium/drivers/swr/avx2/Makefile.am deleted file mode 100644 index a3968ecd95e..00000000000 --- a/src/gallium/drivers/swr/avx2/Makefile.am +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2015 Intel Corporation. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -include ../Makefile.sources-arch -include $(top_srcdir)/src/gallium/Automake.inc - -VPATH = $(srcdir) $(srcdir)/.. - -AM_CXXFLAGS = \ - -march=core-avx2 \ - -DKNOB_ARCH=KNOB_ARCH_AVX2 \ - $(GALLIUM_DRIVER_CFLAGS) \ - $(LLVM_CFLAGS) \ - -I$(builddir)/rasterizer/scripts \ - -I$(builddir)/rasterizer/jitter \ - -I$(srcdir)/../rasterizer \ - -I$(srcdir)/../rasterizer/core \ - -I$(srcdir)/../rasterizer/jitter - -lib_LTLIBRARIES = libswrAVX2.la - -BUILT_SOURCES = \ - rasterizer/scripts/gen_knobs.cpp \ - rasterizer/scripts/gen_knobs.h \ - rasterizer/jitter/state_llvm.h \ - rasterizer/jitter/builder_gen.h \ - rasterizer/jitter/builder_gen.cpp \ - rasterizer/jitter/builder_x86.h \ - rasterizer/jitter/builder_x86.cpp - -libswrAVX2_la_SOURCES = \ - $(CXX_SOURCES) \ - $(COMMON_CXX_SOURCES) \ - $(CORE_CXX_SOURCES) \ - $(JITTER_CXX_SOURCES) \ - $(MEMORY_CXX_SOURCES) \ - $(BUILT_SOURCES) - -rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/scripts/gen_knobs.py \ - rasterizer/scripts - -rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \ - --input $(srcdir)/../rasterizer/core/state.h \ - --output rasterizer/jitter/state_llvm.h - -rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \ - --output rasterizer/jitter/builder_gen.h \ - --gen_h - -rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \ - --output rasterizer/jitter/builder_gen.cpp \ - --gen_cpp - -rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --output rasterizer/jitter/builder_x86.h \ - --gen_x86_h - -rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py - $(PYTHON2) $(PYTHON_FLAGS) \ - $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \ - --output rasterizer/jitter/builder_x86.cpp \ - --gen_x86_cpp - - -libswrAVX2_la_LIBADD = \ - $(top_builddir)/src/gallium/auxiliary/libgallium.la \ - $(top_builddir)/src/mesa/libmesagallium.la - -include $(top_srcdir)/install-gallium-links.mk diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript index a816e2fea41..0650804018b 100644 --- a/src/gallium/tests/unit/SConscript +++ b/src/gallium/tests/unit/SConscript @@ -24,10 +24,8 @@ for progname in progs: target = progname, source = progname + '.c', ) - - env.Alias(progname, env.InstallProgram(prog)) - - # http://www.scons.org/wiki/UnitTests - test_alias = env.Alias('unit', [prog], prog[0].abspath) - AlwaysBuild(test_alias) - + if progname not in [ + 'u_cache_test', # too long + 'translate_test', # unreliable + ]: + env.UnitTest(progname, prog) diff --git a/src/gallium/tests/unit/pipe_barrier_test.c b/src/gallium/tests/unit/pipe_barrier_test.c index bab6acadb80..bb7989a79fd 100644 --- a/src/gallium/tests/unit/pipe_barrier_test.c +++ b/src/gallium/tests/unit/pipe_barrier_test.c @@ -35,37 +35,78 @@ #include <stdio.h> +#include <stdlib.h> #include "os/os_thread.h" #include "os/os_time.h" +#include "util/u_atomic.h" #define NUM_THREADS 10 +static int verbosity = 0; + static pipe_thread threads[NUM_THREADS]; static pipe_barrier barrier; static int thread_ids[NUM_THREADS]; +static volatile int waiting = 0; +static volatile int proceeded = 0; + + +#define LOG(fmt, ...) \ + if (verbosity > 0) { \ + fprintf(stdout, fmt, ##__VA_ARGS__); \ + } + +#define CHECK(_cond) \ + if (!(_cond)) { \ + fprintf(stderr, "%s:%u: `%s` failed\n", __FILE__, __LINE__, #_cond); \ + _exit(EXIT_FAILURE); \ + } + static PIPE_THREAD_ROUTINE(thread_function, thread_data) { int thread_id = *((int *) thread_data); - printf("thread %d starting\n", thread_id); - os_time_sleep(thread_id * 1000 * 1000); - printf("thread %d before barrier\n", thread_id); + LOG("thread %d starting\n", thread_id); + os_time_sleep(thread_id * 100 * 1000); + LOG("thread %d before barrier\n", thread_id); + + CHECK(p_atomic_read(&proceeded) == 0); + p_atomic_inc(&waiting); + pipe_barrier_wait(&barrier); - printf("thread %d exiting\n", thread_id); + + CHECK(p_atomic_read(&waiting) == NUM_THREADS); + + p_atomic_inc(&proceeded); + + LOG("thread %d exiting\n", thread_id); return 0; } -int main() +int main(int argc, char *argv[]) { int i; - printf("pipe_barrier_test starting\n"); + for (i = 1; i < argc; ++i) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + ++verbosity; + } else { + fprintf(stderr, "error: unrecognized option `%s`\n", arg); + exit(EXIT_FAILURE); + } + } + + // Disable buffering + setbuf(stdout, NULL); + + LOG("pipe_barrier_test starting\n"); pipe_barrier_init(&barrier, NUM_THREADS); @@ -78,9 +119,11 @@ int main() pipe_thread_wait(threads[i]); } + CHECK(p_atomic_read(&proceeded) == NUM_THREADS); + pipe_barrier_destroy(&barrier); - printf("pipe_barrier_test exiting\n"); + LOG("pipe_barrier_test exiting\n"); return 0; } diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c index fc49862c2ca..7ca606750a9 100644 --- a/src/gallium/tests/unit/translate_test.c +++ b/src/gallium/tests/unit/translate_test.c @@ -70,8 +70,9 @@ int main(int argc, char** argv) util_cpu_detect(); - if(argc <= 1) - {} + if (argc <= 1 || + !strcmp(argv[1], "default") ) + create_fn = translate_create; else if (!strcmp(argv[1], "generic")) create_fn = translate_generic_create; else if (!strcmp(argv[1], "x86")) @@ -129,7 +130,7 @@ int main(int argc, char** argv) if (!create_fn) { - printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n"); + printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n"); return 2; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3f307f4ef70..aedb5a23f02 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1988,10 +1988,11 @@ fs_visitor::assign_constant_locations() */ const unsigned int max_push_components = 16 * 8; - /* For vulkan we don't limit the max_chunk_size. We set it to 32 float = - * 128 bytes, which is the maximum vulkan push constant size. + /* We push small arrays, but no bigger than 16 floats. This is big enough + * for a vec4 but hopefully not large enough to push out other stuff. We + * should probably use a better heuristic at some point. */ - const unsigned int max_chunk_size = 32; + const unsigned int max_chunk_size = 16; unsigned int num_push_constants = 0; unsigned int num_pull_constants = 0; @@ -2018,8 +2019,14 @@ fs_visitor::assign_constant_locations() if (!contiguous[u]) { unsigned chunk_size = u - chunk_start + 1; - if (num_push_constants + chunk_size <= max_push_components && - chunk_size <= max_chunk_size) { + /* Decide whether we should push or pull this parameter. In the + * Vulkan driver, push constants are explicitly exposed via the API + * so we push everything. In GL, we only push small arrays. + */ + if (stage_prog_data->pull_param == NULL || + (num_push_constants + chunk_size <= max_push_components && + chunk_size <= max_chunk_size)) { + assert(num_push_constants + chunk_size <= max_push_components); for (unsigned j = chunk_start; j <= u; j++) push_constant_loc[j] = num_push_constants++; } else { @@ -4515,7 +4522,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_MOV_INDIRECT: /* Prior to Broadwell, we only have 8 address subregisters */ - return devinfo->gen < 8 ? 8 : inst->exec_size; + return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16); default: return inst->exec_size; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index ae80832544b..851cccf0f7c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -367,29 +367,53 @@ fs_generator::generate_mov_indirect(fs_inst *inst, /* The destination stride of an instruction (in bytes) must be greater * than or equal to the size of the rest of the instruction. Since the * address register is of type UW, we can't use a D-type instruction. - * In order to get around this, re re-type to UW and use a stride. + * In order to get around this, re retype to UW and use a stride. */ indirect_byte_offset = retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + struct brw_reg ind_src; if (devinfo->gen < 8) { - /* Prior to broadwell, we have a restriction that the bottom 5 bits - * of the base offset and the bottom 5 bits of the indirect must add - * to less than 32. In other words, the hardware needs to be able to - * add the bottom five bits of the two to get the subnumber and add - * the next 7 bits of each to get the actual register number. Since - * the indirect may cause us to cross a register boundary, this makes - * it almost useless. We could try and do something clever where we - * use a actual base offset if base_offset % 32 == 0 but that would - * mean we were generating different code depending on the base - * offset. Instead, for the sake of consistency, we'll just do the - * add ourselves. + /* From the Haswell PRM section "Register Region Restrictions": + * + * "The lower bits of the AddressImmediate must not overflow to + * change the register address. The lower 5 bits of Address + * Immediate when added to lower 5 bits of address register gives + * the sub-register offset. The upper bits of Address Immediate + * when added to upper bits of address register gives the register + * address. Any overflow from sub-register offset is dropped." + * + * This restriction is only listed in the Haswell PRM but emperical + * testing indicates that it applies on all older generations and is + * lifted on Broadwell. + * + * Since the indirect may cause us to cross a register boundary, this + * makes the base offset almost useless. We could try and do + * something clever where we use a actual base offset if + * base_offset % 32 == 0 but that would mean we were generating + * different code depending on the base offset. Instead, for the + * sake of consistency, we'll just do the add ourselves. */ brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); - brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type)); + ind_src = brw_VxH_indirect(0, 0); } else { brw_MOV(p, addr, indirect_byte_offset); - brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type)); + ind_src = brw_VxH_indirect(0, imm_byte_offset); + } + + brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type)); + + if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE && + !inst->get_next()->is_tail_sentinel() && + ((fs_inst *)inst->get_next())->mlen > 0) { + /* From the Sandybridge PRM: + * + * "[Errata: DevSNB(SNB)] If MRF register is updated by any + * instruction that “indexed/indirect” source AND is followed by a + * send, the instruction requires a “Switch”. This is to avoid + * race condition where send may dispatch before MRF is updated." + */ + brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH); } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index ab564bbcb9e..c16f1ed5477 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2743,7 +2743,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); if (const_offset == NULL) { fs_reg base_offset = retype(get_nir_src(instr->src[1]), - BRW_REGISTER_TYPE_D); + BRW_REGISTER_TYPE_UD); for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6143f65efa1..11db159109f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -285,7 +285,7 @@ public: void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg dst, src_reg orig_src, - int base_offset, + int base_offset, src_reg indirect); void emit_pull_constant_load_reg(dst_reg dst, src_reg surf_index, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 33c5f07cec9..4b12a72910e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -758,7 +758,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, pull->mlen = 2; pull->header_size = 1; } else if (devinfo->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + dst_reg grf_offset = dst_reg(this, glsl_type::uint_type); grf_offset.type = offset_reg.type; @@ -1587,21 +1587,21 @@ vec4_visitor::move_grf_array_access_to_scratch() void vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, dst_reg temp, src_reg orig_src, - int base_offset, src_reg indirect) + int base_offset, src_reg indirect) { int reg_offset = base_offset + orig_src.reg_offset; const unsigned index = prog_data->base.binding_table.pull_constants_start; src_reg offset; if (indirect.file != BAD_FILE) { - offset = src_reg(this, glsl_type::int_type); + offset = src_reg(this, glsl_type::uint_type); emit_before(block, inst, ADD(dst_reg(offset), indirect, - brw_imm_d(reg_offset * 16))); + brw_imm_ud(reg_offset * 16))); } else if (devinfo->gen >= 8) { /* Store the offset in a GRF so we can send-from-GRF. */ - offset = src_reg(this, glsl_type::int_type); - emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16))); + offset = src_reg(this, glsl_type::uint_type); + emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16))); } else { offset = brw_imm_d(reg_offset * 16); } @@ -1629,6 +1629,12 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, void vec4_visitor::move_uniform_array_access_to_pull_constants() { + /* The vulkan dirver doesn't support pull constants other than UBOs so + * everything has to be pushed regardless. + */ + if (stage_prog_data->pull_param == NULL) + return; + int pull_constant_loc[this->uniforms]; memset(pull_constant_loc, -1, sizeof(pull_constant_loc)); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index dbec82fbd44..db9d94d3b34 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -932,7 +932,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = { NULL }; -static bool +static int intel_get_param(__DRIscreen *psp, int param, int *value) { int ret; @@ -943,20 +943,17 @@ intel_get_param(__DRIscreen *psp, int param, int *value) gp.value = value; ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); - if (ret) { - if (ret != -EINVAL) + if (ret < 0 && ret != -EINVAL) _mesa_warning(NULL, "drm_i915_getparam: %d", ret); - return false; - } - return true; + return ret; } static bool intel_get_boolean(__DRIscreen *psp, int param) { int value = 0; - return intel_get_param(psp, param, &value) && value; + return (intel_get_param(psp, param, &value) == 0) && value; } static void @@ -1093,12 +1090,12 @@ intel_detect_sseu(struct intel_screen *intelScreen) ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL, &intelScreen->subslice_total); - if (ret != -EINVAL) + if (ret < 0 && ret != -EINVAL) goto err_out; ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_EU_TOTAL, &intelScreen->eu_total); - if (ret != -EINVAL) + if (ret < 0 && ret != -EINVAL) goto err_out; /* Without this information, we cannot get the right Braswell brandstrings, @@ -1114,7 +1111,7 @@ intel_detect_sseu(struct intel_screen *intelScreen) err_out: intelScreen->subslice_total = -1; intelScreen->eu_total = -1; - _mesa_warning(NULL, "Failed to query GPU properties.\n"); + _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret)); } static bool diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 4fd2dfef8cc..b4d04b4de5f 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -704,6 +704,10 @@ st_DrawAtlasBitmaps(struct gl_context *ctx, st_validate_state(st, ST_PIPELINE_RENDER); sv = st_create_texture_sampler_view(pipe, stObj->pt); + if (!sv) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)"); + return; + } setup_render_state(ctx, sv, color, true); @@ -793,6 +797,8 @@ st_DrawAtlasBitmaps(struct gl_context *ctx, pipe_resource_reference(&vb.buffer, NULL); + pipe_sampler_view_reference(&sv, NULL); + /* We uploaded modified constants, need to invalidate them. */ st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS; } diff --git a/src/util/SConscript b/src/util/SConscript index 5f3ecc1cdfc..73f34303976 100644 --- a/src/util/SConscript +++ b/src/util/SConscript @@ -47,17 +47,14 @@ env.Alias('mesautil', mesautil) Export('mesautil') -# http://www.scons.org/wiki/UnitTests u_atomic_test = env.Program( target = 'u_atomic_test', source = ['u_atomic_test.c'], ) -alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath) -AlwaysBuild(alias) +env.UnitTest("u_atomic_test", u_atomic_test) roundeven_test = env.Program( target = 'roundeven_test', source = ['roundeven_test.c'], ) -alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath) -AlwaysBuild(alias) +env.UnitTest("roundeven_test", roundeven_test) |