summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-04-14 17:14:28 -0700
committerJason Ekstrand <[email protected]>2016-04-14 17:14:28 -0700
commit5567ae0547d5f31e51e5f32e78065894e594fd1a (patch)
tree98ff61f6afbfd7d5517553e1511f0f88cd34feeb
parent48cc8c284aa28405eaa2335bb8f96324c8153ca7 (diff)
parentf1d29099b4eedafb0302a21c0673d12a6610c369 (diff)
Merge remote-tracking branch 'public/master' into vulkan
-rw-r--r--SConstruct5
-rw-r--r--appveyor.yml3
-rw-r--r--configure.ac2
-rw-r--r--doxygen/Makefile.orig35
-rw-r--r--doxygen/header.html.orig19
-rw-r--r--install-gallium-links.mk6
-rwxr-xr-xscons/gallium.py27
-rw-r--r--src/compiler/SConscript1
-rw-r--r--src/compiler/SConscript.nir73
-rw-r--r--src/compiler/nir/nir.h20
-rw-r--r--src/compiler/nir/nir_intrinsics.h30
-rw-r--r--src/compiler/nir/nir_lower_io.c4
-rw-r--r--src/compiler/nir/nir_print.c6
-rw-r--r--src/compiler/nir/nir_to_ssa.c1
-rw-r--r--src/gallium/Makefile.am2
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript6
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c3
-rw-r--r--src/gallium/drivers/radeon/r600_buffer_common.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c12
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c79
-rw-r--r--src/gallium/drivers/swr/Makefile.am94
-rw-r--r--src/gallium/drivers/swr/Makefile.sources91
-rw-r--r--src/gallium/drivers/swr/Makefile.sources-arch111
-rw-r--r--src/gallium/drivers/swr/avx/Makefile.am99
-rw-r--r--src/gallium/drivers/swr/avx2/Makefile.am99
-rw-r--r--src/gallium/tests/unit/SConscript12
-rw-r--r--src/gallium/tests/unit/pipe_barrier_test.c57
-rw-r--r--src/gallium/tests/unit/translate_test.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp52
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp18
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.c17
-rw-r--r--src/mesa/state_tracker/st_cb_bitmap.c6
-rw-r--r--src/util/SConscript7
40 files changed, 501 insertions, 537 deletions
diff --git a/SConstruct b/SConstruct
index ef71ab69c38..e2e49fcc6ca 100644
--- a/SConstruct
+++ b/SConstruct
@@ -84,6 +84,11 @@ env.Append(CPPPATH = [
#print env.Dump()
+# Add a check target for running tests
+check = env.Alias('check')
+env.AlwaysBuild(check)
+
+
#######################################################################
# Invoke host SConscripts
#
diff --git a/appveyor.yml b/appveyor.yml
index bf7ac752857..2e9b9d650d7 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -65,6 +65,9 @@ install:
build_script:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
+after_build:
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1 check
+
# It's possible to setup notification here, as described in
# http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but
diff --git a/configure.ac b/configure.ac
index 0d3f0ff3c9d..2aa46dccdbb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2551,8 +2551,6 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/softpipe/Makefile
src/gallium/drivers/svga/Makefile
src/gallium/drivers/swr/Makefile
- src/gallium/drivers/swr/avx/Makefile
- src/gallium/drivers/swr/avx2/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/drivers/virgl/Makefile
diff --git a/doxygen/Makefile.orig b/doxygen/Makefile.orig
deleted file mode 100644
index b1bc15d5a05..00000000000
--- a/doxygen/Makefile.orig
+++ /dev/null
@@ -1,35 +0,0 @@
-
-default: full
-
-all: full subset
-
-%.tag: %.doxy
- doxygen $<
-
-FULL = \
- main.doxy \
- math.doxy \
- vbo.doxy \
- glapi.doxy \
- glsl.doxy \
- swrast.doxy \
- swrast_setup.doxy \
- tnl.doxy \
- tnl_dd.doxy \
- gbm.doxy \
- i965.doxy
-
-full: $(FULL:.doxy=.tag)
- $(foreach FILE,$(FULL),doxygen $(FILE);)
-
-SUBSET = \
- main.doxy \
- math.doxy
-
-subset: $(SUBSET:.doxy=.tag)
- $(foreach FILE,$(SUBSET),doxygen $(FILE);)
-
-clean:
- -rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
- -rm -rf *.tag
- -rm -rf *.db
diff --git a/doxygen/header.html.orig b/doxygen/header.html.orig
deleted file mode 100644
index 034231cbdfd..00000000000
--- a/doxygen/header.html.orig
+++ /dev/null
@@ -1,19 +0,0 @@
-<html>
-<head>
-<title>Mesa Source Code Documentation</title>
-<link href="doxygen.css" rel="stylesheet" type="text/css">
-</head>
-<body>
-<div class="qindex">
-<a class="qindex" href="../main/index.html">core</a> |
-<a class="qindex" href="../glapi/index.html">glapi</a> |
-<a class="qindex" href="../glsl/index.html">glsl</a> |
-<a class="qindex" href="../vbo/index.html">vbo</a> |
-<a class="qindex" href="../math/index.html">math</a> |
-<a class="qindex" href="../swrast/index.html">swrast</a> |
-<a class="qindex" href="../swrast_setup/index.html">swrast_setup</a> |
-<a class="qindex" href="../tnl/index.html">tnl</a> |
-<a class="qindex" href="../tnl_dd/index.html">tnl_dd</a> |
-<a class="qindex" href="../gbm/index.html">gbm</a> |
-<a class="qindex" href="../i965/index.html">i965</a>
-</div>
diff --git a/install-gallium-links.mk b/install-gallium-links.mk
index 4010cad15c0..ac5a499c48f 100644
--- a/install-gallium-links.mk
+++ b/install-gallium-links.mk
@@ -12,9 +12,9 @@ all-local : .install-gallium-links
link_dir=$(top_builddir)/$(LIB_DIR)/egl; \
fi; \
$(MKDIR_P) $$link_dir; \
- file_list=$(dri_LTLIBRARIES:%.la=.libs/%.so); \
- file_list+=$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
- file_list+=$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*); \
+ file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)"; \
+ file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
+ file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)"; \
for f in $$file_list; do \
if test -h .libs/$$f; then \
cp -d $$f $$link_dir; \
diff --git a/scons/gallium.py b/scons/gallium.py
index f37042d9af1..94321b2e847 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -82,11 +82,6 @@ def install_shared_library(env, sources, version = ()):
return targets
-def createInstallMethods(env):
- env.AddMethod(install_program, 'InstallProgram')
- env.AddMethod(install_shared_library, 'InstallSharedLibrary')
-
-
def msvc2013_compat(env):
if env['gcc']:
env.Append(CCFLAGS = [
@@ -94,8 +89,20 @@ def msvc2013_compat(env):
'-Werror=pointer-arith',
])
-def createMSVCCompatMethods(env):
- env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+
+def unit_test(env, test_name, program_target, args=None):
+ env.InstallProgram(program_target)
+
+ cmd = [program_target[0].abspath]
+ if args is not None:
+ cmd += args
+ cmd = ' '.join(cmd)
+
+ # http://www.scons.org/wiki/UnitTests
+ action = SCons.Action.Action(cmd, " Running %s ..." % test_name)
+ alias = env.Alias(test_name, program_target, action)
+ env.AlwaysBuild(alias)
+ env.Depends('check', alias)
def num_jobs():
@@ -667,8 +674,10 @@ def generate(env):
# Custom builders and methods
env.Tool('custom')
- createInstallMethods(env)
- createMSVCCompatMethods(env)
+ env.AddMethod(install_program, 'InstallProgram')
+ env.AddMethod(install_shared_library, 'InstallSharedLibrary')
+ env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+ env.AddMethod(unit_test, 'UnitTest')
env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
diff --git a/src/compiler/SConscript b/src/compiler/SConscript
index 8969d821984..10c79c44348 100644
--- a/src/compiler/SConscript
+++ b/src/compiler/SConscript
@@ -22,3 +22,4 @@ compiler = env.ConvenienceLibrary(
Export('compiler')
SConscript('SConscript.glsl')
+SConscript('SConscript.nir')
diff --git a/src/compiler/SConscript.nir b/src/compiler/SConscript.nir
new file mode 100644
index 00000000000..51cc0d403fd
--- /dev/null
+++ b/src/compiler/SConscript.nir
@@ -0,0 +1,73 @@
+import common
+
+Import('*')
+
+from sys import executable as python_cmd
+
+env = env.Clone()
+
+env.MSVC2013Compat()
+
+env.Prepend(CPPPATH = [
+ '#include',
+ '#src',
+ '#src/mapi',
+ '#src/mesa',
+ '#src/gallium/include',
+ '#src/gallium/auxiliary',
+ '#src/compiler/nir',
+])
+
+# Make generated headers reachable from the include path.
+env.Prepend(CPPPATH = [Dir('.').abspath, Dir('nir').abspath])
+
+# nir generated sources
+
+nir_builder_opcodes_h = env.CodeGenerate(
+ target = 'nir/nir_builder_opcodes.h',
+ script = 'nir/nir_builder_opcodes_h.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_constant_expressions.c',
+ script = 'nir/nir_constant_expressions.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_opcodes.h',
+ script = 'nir/nir_opcodes_h.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_opcodes.c',
+ script = 'nir/nir_opcodes_c.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+ target = 'nir/nir_opt_algebraic.c',
+ script = 'nir/nir_opt_algebraic.py',
+ source = [],
+ command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+# parse Makefile.sources
+source_lists = env.ParseSourceList('Makefile.sources')
+
+nir_sources = source_lists['NIR_FILES']
+nir_sources += source_lists['NIR_GENERATED_FILES']
+
+nir = env.ConvenienceLibrary(
+ target = 'nir',
+ source = nir_sources,
+)
+
+env.Alias('nir', nir)
+Export('nir')
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index fede1954cf0..bbbc2089db3 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -507,7 +507,14 @@ typedef struct nir_src {
bool is_ssa;
} nir_src;
-#define NIR_SRC_INIT (nir_src) { { NULL } }
+static inline nir_src
+nir_src_init(void)
+{
+ nir_src src = { { NULL } };
+ return src;
+}
+
+#define NIR_SRC_INIT nir_src_init()
#define nir_foreach_use(reg_or_ssa_def, src) \
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
@@ -530,7 +537,14 @@ typedef struct {
bool is_ssa;
} nir_dest;
-#define NIR_DEST_INIT (nir_dest) { { { NULL } } }
+static inline nir_dest
+nir_dest_init(void)
+{
+ nir_dest dest = { { { NULL } } };
+ return dest;
+}
+
+#define NIR_DEST_INIT nir_dest_init()
#define nir_foreach_def(reg, dest) \
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
@@ -957,7 +971,7 @@ typedef enum {
NIR_INTRINSIC_UCP_ID = 4,
/**
- * The ammount of data, starting from BASE, that this instruction may
+ * The amount of data, starting from BASE, that this instruction may
* access. This is used to provide bounds if the offset is not constant.
*/
NIR_INTRINSIC_RANGE = 5,
diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
index fa162f9d126..3cb4f95394c 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -42,9 +42,9 @@
#define ARR(...) { __VA_ARGS__ }
-INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
+INTRINSIC(copy_var, 0, ARR(0), false, 0, 2, 0, xx, xx, xx, 0)
/*
* Interpolation of input. The interp_var_at* intrinsics are similar to the
@@ -72,7 +72,7 @@ INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
* around/optimized in general
*/
-#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
+#define BARRIER(name) INTRINSIC(name, 0, ARR(0), false, 0, 0, 0, xx, xx, xx, 0)
BARRIER(barrier)
BARRIER(discard)
@@ -89,7 +89,7 @@ BARRIER(memory_barrier)
* The latter can be used as code motion barrier, which is currently not
* feasible with NIR.
*/
-INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(shader_clock, 0, ARR(0), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
/*
* Memory barrier with semantics analogous to the compute shader
@@ -113,8 +113,8 @@ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
*
* end_primitive implements GLSL's EndPrimitive() built-in.
*/
-INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(emit_vertex, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
+INTRINSIC(end_primitive, 0, ARR(0), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
/**
* Geometry Shader intrinsics with a vertex count.
@@ -137,7 +137,7 @@ INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
*/
#define ATOMIC(name, flags) \
- INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
+ INTRINSIC(atomic_counter_##name##_var, 0, ARR(0), true, 1, 1, 0, xx, xx, xx, flags) \
INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
ATOMIC(inc, 0)
@@ -170,9 +170,9 @@ INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
+INTRINSIC(image_size, 0, ARR(0), true, 4, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
+INTRINSIC(image_samples, 0, ARR(0), true, 1, 1, 0, xx, xx, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/*
@@ -278,7 +278,7 @@ INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
- INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
+ INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
idx0, idx1, idx2, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -313,8 +313,9 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
* of the start of the variable being loaded and and the offset source is a
* offset into that variable.
*
- * Uniform load operations have a second index that specifies the size of the
- * variable being loaded. If const_index[1] == 0, then the size is unknown.
+ * Uniform load operations have a second "range" index that specifies the
+ * range (starting at base) of the data from which we are loading. If
+ * const_index[1] == 0, then the range is unknown.
*
* Some load operations such as UBO/SSBO load and per_vertex loads take an
* additional source to specify which UBO/SSBO/vertex to load from.
@@ -328,9 +329,8 @@ SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
-/* src[] = { offset }. const_index[] = { base, range } */
-LOAD(uniform, 1, 2, BASE, RANGE, xx,
- NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 2, BASE, RANGE, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { buffer_index, offset }. No const_index */
LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/* src[] = { offset }. const_index[] = { base } */
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 369a8ee537e..df1f7a5d765 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -278,8 +278,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
intrin->variables[0]->var->data.driver_location);
if (load->intrinsic == nir_intrinsic_load_uniform) {
- load->const_index[1] =
- state->type_size(intrin->variables[0]->var->type);
+ nir_intrinsic_set_range(load,
+ state->type_size(intrin->variables[0]->var->type));
}
if (per_vertex)
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 2793020953e..bbb4edf3260 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -31,6 +31,10 @@
#include <stdlib.h>
#include <inttypes.h> /* for PRIx64 macro */
+#if defined(_WIN32) && !defined(snprintf)
+#define snprintf _snprintf
+#endif
+
static void
print_tabs(unsigned num_tabs, FILE *fp)
{
@@ -514,8 +518,6 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
[NIR_INTRINSIC_STREAM_ID] = "stream-id",
[NIR_INTRINSIC_UCP_ID] = "ucp-id",
[NIR_INTRINSIC_RANGE] = "range",
- [NIR_INTRINSIC_DESC_SET] = "desc-set",
- [NIR_INTRINSIC_BINDING] = "binding",
};
for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
if (!info->index_map[idx])
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
index 23d709a218a..1a772fff2d5 100644
--- a/src/compiler/nir/nir_to_ssa.c
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -27,7 +27,6 @@
#include "nir.h"
#include <stdlib.h>
-#include <unistd.h>
/*
* Implements the classic to-SSA algorithm described by Cytron et. al. in
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index 086e1701128..ef2bc1016d5 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -80,8 +80,6 @@ endif
if HAVE_GALLIUM_SWR
SUBDIRS += drivers/swr
-SUBDIRS += drivers/swr/avx
-SUBDIRS += drivers/swr/avx2
endif
## vc4/rpi
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 8188156afc2..fbbd22a5299 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -38,10 +38,6 @@ if not env['embedded']:
target = testname,
source = [testname + '.c', 'lp_test_main.c'],
)
- env.InstallProgram(target)
-
- # http://www.scons.org/wiki/UnitTests
- alias = env.Alias(testname, [target], target[0].abspath)
- AlwaysBuild(alias)
+ env.UnitTest(testname, target)
Export('llvmpipe')
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 3479c343261..fe4982aa9fd 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -202,6 +202,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVE4_COMPUTE_CLASS 0x0000a0c0
#define NVF0_COMPUTE_CLASS 0x0000a1c0
#define GM107_COMPUTE_CLASS 0x0000b0c0
+#define GM200_COMPUTE_CLASS 0x0000b1c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 9a34007c6e5..3bf98ad6a3b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -644,9 +644,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0xf0:
case 0x100:
case 0x110:
+ case 0x120:
if (debug_get_bool_option("NVF0_COMPUTE", false))
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
- case 0x120:
return 0;
default:
return -1;
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 4d069df983e..3d0190928e3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -54,6 +54,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
case 0x110:
obj_class = GM107_COMPUTE_CLASS;
break;
+ case 0x120:
+ obj_class = GM200_COMPUTE_CLASS;
+ break;
default:
NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
return -1;
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 47514e91d23..664dc5bfdce 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -376,6 +376,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
0, 0, resource, level, box);
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+ if (!data) {
+ pipe_resource_reference((struct pipe_resource **)&staging, NULL);
+ return NULL;
+ }
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b5557d800c7..7fc1461ec9b 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1192,7 +1192,9 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx,
{
sctx->const_buffers[shader].desc.pointer_dirty = true;
sctx->rw_buffers[shader].desc.pointer_dirty = true;
+ sctx->shader_buffers[shader].desc.pointer_dirty = true;
sctx->samplers[shader].views.desc.pointer_dirty = true;
+ sctx->images[shader].desc.pointer_dirty = true;
if (shader == PIPE_SHADER_VERTEX)
sctx->vertex_buffers.pointer_dirty = true;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c58467ddcb0..c26960b1ca3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5839,6 +5839,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
radeon_llvm_dispose(&ctx.radeon_bld);
+ /* Add the scratch offset to input SGPRs. */
+ if (shader->config.scratch_bytes_per_wave)
+ shader->info.num_input_sgprs += 1; /* scratch byte offset */
+
/* Calculate the number of fragment input VGPRs. */
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
shader->info.num_input_vgprs = 0;
@@ -6761,6 +6765,13 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
return true;
}
+static void si_fix_num_sgprs(struct si_shader *shader)
+{
+ unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
+
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+}
+
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
@@ -6850,6 +6861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
+ si_fix_num_sgprs(shader);
si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
stderr);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 82ae4c43245..af9ffdd381f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1487,7 +1487,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
- sscreen->b.family >= CHIP_STONEY) {
+ sscreen->b.family == CHIP_STONEY) {
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
case PIPE_FORMAT_ETC2_RGB8:
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index b7ebb48e6a9..d560aae1f7d 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -306,7 +306,6 @@ static void si_set_tesseval_regs(struct si_shader *shader,
static void si_shader_ls(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@@ -321,30 +320,21 @@ static void si_shader_ls(struct si_shader *shader)
* VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1;
- num_user_sgprs = SI_LS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B528_DX10_CLAMP(1) |
S_00B528_FLOAT_MODE(shader->config.float_mode);
- shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ shader->config.rsrc2 = S_00B52C_USER_SGPR(SI_LS_NUM_USER_SGPR) |
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
}
static void si_shader_hs(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
uint64_t va;
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
@@ -354,32 +344,22 @@ static void si_shader_hs(struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
- num_user_sgprs = SI_TCS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with tessellation factor
- * buffer offset. */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B428_SGPRS((num_sgprs - 1) / 8) |
+ S_00B428_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B428_DX10_CLAMP(1) |
S_00B428_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
- S_00B42C_USER_SGPR(num_user_sgprs) |
+ S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) |
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned vgpr_comp_cnt;
uint64_t va;
@@ -400,21 +380,13 @@ static void si_shader_es(struct si_shader *shader)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
shader->selector->esgs_itemsize / 4);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B328_SGPRS((num_sgprs - 1) / 8) |
+ S_00B328_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B328_DX10_CLAMP(1) |
S_00B328_FLOAT_MODE(shader->config.float_mode));
@@ -458,7 +430,6 @@ static void si_shader_gs(struct si_shader *shader)
unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
unsigned gs_num_invocations = shader->selector->gs_num_invocations;
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
uint64_t va;
unsigned max_stream = shader->selector->max_gs_stream;
@@ -494,22 +465,13 @@ static void si_shader_gs(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);
- num_user_sgprs = SI_GS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
- if ((num_user_sgprs + 2) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B228_SGPRS((num_sgprs - 1) / 8) |
+ S_00B228_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B228_DX10_CLAMP(1) |
S_00B228_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
- S_00B22C_USER_SGPR(num_user_sgprs) |
+ S_00B22C_USER_SGPR(SI_GS_NUM_USER_SGPR) |
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
}
@@ -523,7 +485,7 @@ static void si_shader_gs(struct si_shader *shader)
static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
{
struct si_pm4_state *pm4;
- unsigned num_sgprs, num_user_sgprs;
+ unsigned num_user_sgprs;
unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
@@ -566,13 +528,6 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
} else
unreachable("invalid shader selector type");
- num_sgprs = shader->config.num_sgprs;
- if (num_user_sgprs > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 2;
- }
- assert(num_sgprs <= 104);
-
/* VS is required to export at least one param. */
nparams = MAX2(shader->info.nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
@@ -594,7 +549,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B128_SGPRS((num_sgprs - 1) / 8) |
+ S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_FLOAT_MODE(shader->config.float_mode));
@@ -684,7 +639,6 @@ static void si_shader_ps(struct si_shader *shader)
struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned spi_ps_in_control, spi_shader_col_format, cb_shader_mask;
- unsigned num_sgprs, num_user_sgprs;
unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
uint64_t va;
bool has_centroid;
@@ -771,23 +725,14 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);
- num_user_sgprs = SI_PS_NUM_USER_SGPR;
- num_sgprs = shader->config.num_sgprs;
- /* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
- if ((num_user_sgprs + 1) > num_sgprs) {
- /* Last 2 reserved SGPRs are used for VCC */
- num_sgprs = num_user_sgprs + 1 + 2;
- }
- assert(num_sgprs <= 104);
-
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B028_SGPRS((num_sgprs - 1) / 8) |
+ S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(1) |
S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
- S_00B02C_USER_SGPR(num_user_sgprs) |
+ S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
/* Prefer RE_Z if the shader is complex enough. The requirement is either:
diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am
index f08806aaf77..d6d6e7dc611 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -28,4 +28,96 @@ noinst_LTLIBRARIES = libmesaswr.la
libmesaswr_la_SOURCES = $(LOADER_SOURCES)
-EXTRA_DIST = Makefile.sources-arch
+COMMON_CXXFLAGS = \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(LLVM_CFLAGS) \
+ -I$(builddir)/rasterizer/scripts \
+ -I$(builddir)/rasterizer/jitter \
+ -I$(srcdir)/rasterizer \
+ -I$(srcdir)/rasterizer/core \
+ -I$(srcdir)/rasterizer/jitter
+
+COMMON_SOURCES = \
+ $(CXX_SOURCES) \
+ $(COMMON_CXX_SOURCES) \
+ $(CORE_CXX_SOURCES) \
+ $(JITTER_CXX_SOURCES) \
+ $(MEMORY_CXX_SOURCES) \
+ $(BUILT_SOURCES)
+
+BUILT_SOURCES = \
+ rasterizer/scripts/gen_knobs.cpp \
+ rasterizer/scripts/gen_knobs.h \
+ rasterizer/jitter/state_llvm.h \
+ rasterizer/jitter/builder_gen.h \
+ rasterizer/jitter/builder_gen.cpp \
+ rasterizer/jitter/builder_x86.h \
+ rasterizer/jitter/builder_x86.cpp
+
+rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/scripts/gen_knobs.py \
+ rasterizer/scripts
+
+rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \
+ --input $(srcdir)/rasterizer/core/state.h \
+ --output rasterizer/jitter/state_llvm.h
+
+rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
+ --output rasterizer/jitter/builder_gen.h \
+ --gen_h
+
+rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
+ --output rasterizer/jitter/builder_gen.cpp \
+ --gen_cpp
+
+rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --output rasterizer/jitter/builder_x86.h \
+ --gen_x86_h
+
+rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
+ $(PYTHON2) $(PYTHON_FLAGS) \
+ $(srcdir)/rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
+ --output rasterizer/jitter/builder_x86.cpp \
+ --gen_x86_cpp
+
+
+COMMON_LIBADD = \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/mesa/libmesagallium.la
+
+lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
+
+libswrAVX_la_CXXFLAGS = \
+ -march=core-avx-i \
+ -DKNOB_ARCH=KNOB_ARCH_AVX \
+ $(COMMON_CXXFLAGS)
+
+libswrAVX_la_SOURCES = \
+ $(COMMON_SOURCES)
+
+libswrAVX_la_LIBADD = \
+ $(COMMON_LIBADD)
+
+libswrAVX2_la_CXXFLAGS = \
+ -march=core-avx2 \
+ -DKNOB_ARCH=KNOB_ARCH_AVX2 \
+ $(COMMON_CXXFLAGS)
+
+libswrAVX2_la_SOURCES = \
+ $(COMMON_SOURCES)
+
+libswrAVX2_la_LIBADD = \
+ $(COMMON_LIBADD)
+
+include $(top_srcdir)/install-gallium-links.mk
diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources
index 72247211184..be3d8f027f8 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -21,3 +21,94 @@
LOADER_SOURCES := \
swr_loader.cpp
+
+CXX_SOURCES := \
+ swr_clear.cpp \
+ swr_context.cpp \
+ swr_context.h \
+ swr_context_llvm.h \
+ swr_draw.cpp \
+ swr_public.h \
+ swr_resource.h \
+ swr_screen.cpp \
+ swr_screen.h \
+ swr_state.cpp \
+ swr_state.h \
+ swr_tex_sample.cpp \
+ swr_tex_sample.h \
+ swr_scratch.h \
+ swr_scratch.cpp \
+ swr_shader.cpp \
+ swr_memory.h \
+ swr_fence.h \
+ swr_fence.cpp \
+ swr_query.h \
+ swr_query.cpp
+
+COMMON_CXX_SOURCES := \
+ rasterizer/common/containers.hpp \
+ rasterizer/common/formats.cpp \
+ rasterizer/common/formats.h \
+ rasterizer/common/isa.hpp \
+ rasterizer/common/os.h \
+ rasterizer/common/rdtsc_buckets.cpp \
+ rasterizer/common/rdtsc_buckets.h \
+ rasterizer/common/rdtsc_buckets_shared.h \
+ rasterizer/common/rdtsc_buckets_shared.h \
+ rasterizer/common/simdintrin.h \
+ rasterizer/common/swr_assert.cpp \
+ rasterizer/common/swr_assert.h
+
+CORE_CXX_SOURCES := \
+ rasterizer/core/api.cpp \
+ rasterizer/core/api.h \
+ rasterizer/core/arena.h \
+ rasterizer/core/backend.cpp \
+ rasterizer/core/backend.h \
+ rasterizer/core/blend.h \
+ rasterizer/core/clip.cpp \
+ rasterizer/core/clip.h \
+ rasterizer/core/context.h \
+ rasterizer/core/depthstencil.h \
+ rasterizer/core/fifo.hpp \
+ rasterizer/core/format_traits.h \
+ rasterizer/core/format_types.h \
+ rasterizer/core/frontend.cpp \
+ rasterizer/core/frontend.h \
+ rasterizer/core/knobs.h \
+ rasterizer/core/knobs_init.h \
+ rasterizer/core/multisample.cpp \
+ rasterizer/core/multisample.h \
+ rasterizer/core/pa_avx.cpp \
+ rasterizer/core/pa.h \
+ rasterizer/core/rasterizer.cpp \
+ rasterizer/core/rasterizer.h \
+ rasterizer/core/rdtsc_core.cpp \
+ rasterizer/core/rdtsc_core.h \
+ rasterizer/core/ringbuffer.h \
+ rasterizer/core/state.h \
+ rasterizer/core/threads.cpp \
+ rasterizer/core/threads.h \
+ rasterizer/core/tilemgr.cpp \
+ rasterizer/core/tilemgr.h \
+ rasterizer/core/utils.cpp \
+ rasterizer/core/utils.h
+
+JITTER_CXX_SOURCES := \
+ rasterizer/jitter/blend_jit.cpp \
+ rasterizer/jitter/blend_jit.h \
+ rasterizer/jitter/builder.cpp \
+ rasterizer/jitter/builder.h \
+ rasterizer/jitter/builder_misc.cpp \
+ rasterizer/jitter/builder_misc.h \
+ rasterizer/jitter/fetch_jit.cpp \
+ rasterizer/jitter/fetch_jit.h \
+ rasterizer/jitter/JitManager.cpp \
+ rasterizer/jitter/JitManager.h \
+ rasterizer/jitter/streamout_jit.cpp \
+ rasterizer/jitter/streamout_jit.h
+
+MEMORY_CXX_SOURCES := \
+ rasterizer/memory/ClearTile.cpp \
+ rasterizer/memory/LoadTile.cpp \
+ rasterizer/memory/StoreTile.cpp
diff --git a/src/gallium/drivers/swr/Makefile.sources-arch b/src/gallium/drivers/swr/Makefile.sources-arch
deleted file mode 100644
index a04b1203c7c..00000000000
--- a/src/gallium/drivers/swr/Makefile.sources-arch
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-CXX_SOURCES := \
- swr_clear.cpp \
- swr_context.cpp \
- swr_context.h \
- swr_context_llvm.h \
- swr_draw.cpp \
- swr_public.h \
- swr_resource.h \
- swr_screen.cpp \
- swr_screen.h \
- swr_state.cpp \
- swr_state.h \
- swr_tex_sample.cpp \
- swr_tex_sample.h \
- swr_scratch.h \
- swr_scratch.cpp \
- swr_shader.cpp \
- swr_memory.h \
- swr_fence.h \
- swr_fence.cpp \
- swr_query.h \
- swr_query.cpp
-
-COMMON_CXX_SOURCES := \
- rasterizer/common/containers.hpp \
- rasterizer/common/formats.cpp \
- rasterizer/common/formats.h \
- rasterizer/common/isa.hpp \
- rasterizer/common/os.h \
- rasterizer/common/rdtsc_buckets.cpp \
- rasterizer/common/rdtsc_buckets.h \
- rasterizer/common/rdtsc_buckets_shared.h \
- rasterizer/common/rdtsc_buckets_shared.h \
- rasterizer/common/simdintrin.h \
- rasterizer/common/swr_assert.cpp \
- rasterizer/common/swr_assert.h
-
-CORE_CXX_SOURCES := \
- rasterizer/core/api.cpp \
- rasterizer/core/api.h \
- rasterizer/core/arena.h \
- rasterizer/core/backend.cpp \
- rasterizer/core/backend.h \
- rasterizer/core/blend.h \
- rasterizer/core/clip.cpp \
- rasterizer/core/clip.h \
- rasterizer/core/context.h \
- rasterizer/core/depthstencil.h \
- rasterizer/core/fifo.hpp \
- rasterizer/core/format_traits.h \
- rasterizer/core/format_types.h \
- rasterizer/core/frontend.cpp \
- rasterizer/core/frontend.h \
- rasterizer/core/knobs.h \
- rasterizer/core/knobs_init.h \
- rasterizer/core/multisample.cpp \
- rasterizer/core/multisample.h \
- rasterizer/core/pa_avx.cpp \
- rasterizer/core/pa.h \
- rasterizer/core/rasterizer.cpp \
- rasterizer/core/rasterizer.h \
- rasterizer/core/rdtsc_core.cpp \
- rasterizer/core/rdtsc_core.h \
- rasterizer/core/ringbuffer.h \
- rasterizer/core/state.h \
- rasterizer/core/threads.cpp \
- rasterizer/core/threads.h \
- rasterizer/core/tilemgr.cpp \
- rasterizer/core/tilemgr.h \
- rasterizer/core/utils.cpp \
- rasterizer/core/utils.h
-
-JITTER_CXX_SOURCES := \
- rasterizer/jitter/blend_jit.cpp \
- rasterizer/jitter/blend_jit.h \
- rasterizer/jitter/builder.cpp \
- rasterizer/jitter/builder.h \
- rasterizer/jitter/builder_misc.cpp \
- rasterizer/jitter/builder_misc.h \
- rasterizer/jitter/fetch_jit.cpp \
- rasterizer/jitter/fetch_jit.h \
- rasterizer/jitter/JitManager.cpp \
- rasterizer/jitter/JitManager.h \
- rasterizer/jitter/streamout_jit.cpp \
- rasterizer/jitter/streamout_jit.h
-
-MEMORY_CXX_SOURCES := \
- rasterizer/memory/ClearTile.cpp \
- rasterizer/memory/LoadTile.cpp \
- rasterizer/memory/StoreTile.cpp
diff --git a/src/gallium/drivers/swr/avx/Makefile.am b/src/gallium/drivers/swr/avx/Makefile.am
deleted file mode 100644
index 384f1a7eecf..00000000000
--- a/src/gallium/drivers/swr/avx/Makefile.am
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include ../Makefile.sources-arch
-include $(top_srcdir)/src/gallium/Automake.inc
-
-VPATH = $(srcdir) $(srcdir)/..
-
-AM_CXXFLAGS = \
- -march=core-avx-i \
- -DKNOB_ARCH=KNOB_ARCH_AVX \
- $(GALLIUM_DRIVER_CFLAGS) \
- $(LLVM_CFLAGS) \
- -I$(builddir)/rasterizer/scripts \
- -I$(builddir)/rasterizer/jitter \
- -I$(srcdir)/../rasterizer \
- -I$(srcdir)/../rasterizer/core \
- -I$(srcdir)/../rasterizer/jitter
-
-lib_LTLIBRARIES = libswrAVX.la
-
-BUILT_SOURCES = \
- rasterizer/scripts/gen_knobs.cpp \
- rasterizer/scripts/gen_knobs.h \
- rasterizer/jitter/state_llvm.h \
- rasterizer/jitter/builder_gen.h \
- rasterizer/jitter/builder_gen.cpp \
- rasterizer/jitter/builder_x86.h \
- rasterizer/jitter/builder_x86.cpp
-
-libswrAVX_la_SOURCES = \
- $(CXX_SOURCES) \
- $(COMMON_CXX_SOURCES) \
- $(CORE_CXX_SOURCES) \
- $(JITTER_CXX_SOURCES) \
- $(MEMORY_CXX_SOURCES) \
- $(BUILT_SOURCES)
-
-rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/scripts/gen_knobs.py \
- rasterizer/scripts
-
-rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
- --input $(srcdir)/../rasterizer/core/state.h \
- --output rasterizer/jitter/state_llvm.h
-
-rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.h \
- --gen_h
-
-rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.cpp \
- --gen_cpp
-
-rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.h \
- --gen_x86_h
-
-rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.cpp \
- --gen_x86_cpp
-
-
-libswrAVX_la_LIBADD = \
- $(top_builddir)/src/gallium/auxiliary/libgallium.la \
- $(top_builddir)/src/mesa/libmesagallium.la
-
-include $(top_srcdir)/install-gallium-links.mk
diff --git a/src/gallium/drivers/swr/avx2/Makefile.am b/src/gallium/drivers/swr/avx2/Makefile.am
deleted file mode 100644
index a3968ecd95e..00000000000
--- a/src/gallium/drivers/swr/avx2/Makefile.am
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (C) 2015 Intel Corporation. All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include ../Makefile.sources-arch
-include $(top_srcdir)/src/gallium/Automake.inc
-
-VPATH = $(srcdir) $(srcdir)/..
-
-AM_CXXFLAGS = \
- -march=core-avx2 \
- -DKNOB_ARCH=KNOB_ARCH_AVX2 \
- $(GALLIUM_DRIVER_CFLAGS) \
- $(LLVM_CFLAGS) \
- -I$(builddir)/rasterizer/scripts \
- -I$(builddir)/rasterizer/jitter \
- -I$(srcdir)/../rasterizer \
- -I$(srcdir)/../rasterizer/core \
- -I$(srcdir)/../rasterizer/jitter
-
-lib_LTLIBRARIES = libswrAVX2.la
-
-BUILT_SOURCES = \
- rasterizer/scripts/gen_knobs.cpp \
- rasterizer/scripts/gen_knobs.h \
- rasterizer/jitter/state_llvm.h \
- rasterizer/jitter/builder_gen.h \
- rasterizer/jitter/builder_gen.cpp \
- rasterizer/jitter/builder_x86.h \
- rasterizer/jitter/builder_x86.cpp
-
-libswrAVX2_la_SOURCES = \
- $(CXX_SOURCES) \
- $(COMMON_CXX_SOURCES) \
- $(CORE_CXX_SOURCES) \
- $(JITTER_CXX_SOURCES) \
- $(MEMORY_CXX_SOURCES) \
- $(BUILT_SOURCES)
-
-rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/scripts/gen_knobs.py \
- rasterizer/scripts
-
-rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_types.py \
- --input $(srcdir)/../rasterizer/core/state.h \
- --output rasterizer/jitter/state_llvm.h
-
-rasterizer/jitter/builder_gen.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.h \
- --gen_h
-
-rasterizer/jitter/builder_gen.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --input $(LLVM_INCLUDEDIR)/llvm/IR/IRBuilder.h \
- --output rasterizer/jitter/builder_gen.cpp \
- --gen_cpp
-
-rasterizer/jitter/builder_x86.h: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.h \
- --gen_x86_h
-
-rasterizer/jitter/builder_x86.cpp: rasterizer/jitter/scripts/gen_llvm_ir_macros.py
- $(PYTHON2) $(PYTHON_FLAGS) \
- $(srcdir)/../rasterizer/jitter/scripts/gen_llvm_ir_macros.py \
- --output rasterizer/jitter/builder_x86.cpp \
- --gen_x86_cpp
-
-
-libswrAVX2_la_LIBADD = \
- $(top_builddir)/src/gallium/auxiliary/libgallium.la \
- $(top_builddir)/src/mesa/libmesagallium.la
-
-include $(top_srcdir)/install-gallium-links.mk
diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript
index a816e2fea41..0650804018b 100644
--- a/src/gallium/tests/unit/SConscript
+++ b/src/gallium/tests/unit/SConscript
@@ -24,10 +24,8 @@ for progname in progs:
target = progname,
source = progname + '.c',
)
-
- env.Alias(progname, env.InstallProgram(prog))
-
- # http://www.scons.org/wiki/UnitTests
- test_alias = env.Alias('unit', [prog], prog[0].abspath)
- AlwaysBuild(test_alias)
-
+ if progname not in [
+ 'u_cache_test', # too long
+ 'translate_test', # unreliable
+ ]:
+ env.UnitTest(progname, prog)
diff --git a/src/gallium/tests/unit/pipe_barrier_test.c b/src/gallium/tests/unit/pipe_barrier_test.c
index bab6acadb80..bb7989a79fd 100644
--- a/src/gallium/tests/unit/pipe_barrier_test.c
+++ b/src/gallium/tests/unit/pipe_barrier_test.c
@@ -35,37 +35,78 @@
#include <stdio.h>
+#include <stdlib.h>
#include "os/os_thread.h"
#include "os/os_time.h"
+#include "util/u_atomic.h"
#define NUM_THREADS 10
+static int verbosity = 0;
+
static pipe_thread threads[NUM_THREADS];
static pipe_barrier barrier;
static int thread_ids[NUM_THREADS];
+static volatile int waiting = 0;
+static volatile int proceeded = 0;
+
+
+#define LOG(fmt, ...) \
+ if (verbosity > 0) { \
+ fprintf(stdout, fmt, ##__VA_ARGS__); \
+ }
+
+#define CHECK(_cond) \
+ if (!(_cond)) { \
+ fprintf(stderr, "%s:%u: `%s` failed\n", __FILE__, __LINE__, #_cond); \
+ _exit(EXIT_FAILURE); \
+ }
+
static PIPE_THREAD_ROUTINE(thread_function, thread_data)
{
int thread_id = *((int *) thread_data);
- printf("thread %d starting\n", thread_id);
- os_time_sleep(thread_id * 1000 * 1000);
- printf("thread %d before barrier\n", thread_id);
+ LOG("thread %d starting\n", thread_id);
+ os_time_sleep(thread_id * 100 * 1000);
+ LOG("thread %d before barrier\n", thread_id);
+
+ CHECK(p_atomic_read(&proceeded) == 0);
+ p_atomic_inc(&waiting);
+
pipe_barrier_wait(&barrier);
- printf("thread %d exiting\n", thread_id);
+
+ CHECK(p_atomic_read(&waiting) == NUM_THREADS);
+
+ p_atomic_inc(&proceeded);
+
+ LOG("thread %d exiting\n", thread_id);
return 0;
}
-int main()
+int main(int argc, char *argv[])
{
int i;
- printf("pipe_barrier_test starting\n");
+ for (i = 1; i < argc; ++i) {
+ const char *arg = argv[i];
+ if (strcmp(arg, "-v") == 0) {
+ ++verbosity;
+ } else {
+ fprintf(stderr, "error: unrecognized option `%s`\n", arg);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ // Disable buffering
+ setbuf(stdout, NULL);
+
+ LOG("pipe_barrier_test starting\n");
pipe_barrier_init(&barrier, NUM_THREADS);
@@ -78,9 +119,11 @@ int main()
pipe_thread_wait(threads[i]);
}
+ CHECK(p_atomic_read(&proceeded) == NUM_THREADS);
+
pipe_barrier_destroy(&barrier);
- printf("pipe_barrier_test exiting\n");
+ LOG("pipe_barrier_test exiting\n");
return 0;
}
diff --git a/src/gallium/tests/unit/translate_test.c b/src/gallium/tests/unit/translate_test.c
index fc49862c2ca..7ca606750a9 100644
--- a/src/gallium/tests/unit/translate_test.c
+++ b/src/gallium/tests/unit/translate_test.c
@@ -70,8 +70,9 @@ int main(int argc, char** argv)
util_cpu_detect();
- if(argc <= 1)
- {}
+ if (argc <= 1 ||
+ !strcmp(argv[1], "default") )
+ create_fn = translate_create;
else if (!strcmp(argv[1], "generic"))
create_fn = translate_generic_create;
else if (!strcmp(argv[1], "x86"))
@@ -129,7 +130,7 @@ int main(int argc, char** argv)
if (!create_fn)
{
- printf("Usage: ./translate_test [generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
+ printf("Usage: ./translate_test [default|generic|x86|nosse|sse|sse2|sse3|sse4.1]\n");
return 2;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3f307f4ef70..aedb5a23f02 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1988,10 +1988,11 @@ fs_visitor::assign_constant_locations()
*/
const unsigned int max_push_components = 16 * 8;
- /* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
- * 128 bytes, which is the maximum vulkan push constant size.
+ /* We push small arrays, but no bigger than 16 floats. This is big enough
+ * for a vec4 but hopefully not large enough to push out other stuff. We
+ * should probably use a better heuristic at some point.
*/
- const unsigned int max_chunk_size = 32;
+ const unsigned int max_chunk_size = 16;
unsigned int num_push_constants = 0;
unsigned int num_pull_constants = 0;
@@ -2018,8 +2019,14 @@ fs_visitor::assign_constant_locations()
if (!contiguous[u]) {
unsigned chunk_size = u - chunk_start + 1;
- if (num_push_constants + chunk_size <= max_push_components &&
- chunk_size <= max_chunk_size) {
+ /* Decide whether we should push or pull this parameter. In the
+ * Vulkan driver, push constants are explicitly exposed via the API
+ * so we push everything. In GL, we only push small arrays.
+ */
+ if (stage_prog_data->pull_param == NULL ||
+ (num_push_constants + chunk_size <= max_push_components &&
+ chunk_size <= max_chunk_size)) {
+ assert(num_push_constants + chunk_size <= max_push_components);
for (unsigned j = chunk_start; j <= u; j++)
push_constant_loc[j] = num_push_constants++;
} else {
@@ -4515,7 +4522,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
case SHADER_OPCODE_MOV_INDIRECT:
/* Prior to Broadwell, we only have 8 address subregisters */
- return devinfo->gen < 8 ? 8 : inst->exec_size;
+ return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16);
default:
return inst->exec_size;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index ae80832544b..851cccf0f7c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -367,29 +367,53 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
/* The destination stride of an instruction (in bytes) must be greater
* than or equal to the size of the rest of the instruction. Since the
* address register is of type UW, we can't use a D-type instruction.
- * In order to get around this, re re-type to UW and use a stride.
+ * In order to get around this, re retype to UW and use a stride.
*/
indirect_byte_offset =
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+ struct brw_reg ind_src;
if (devinfo->gen < 8) {
- /* Prior to broadwell, we have a restriction that the bottom 5 bits
- * of the base offset and the bottom 5 bits of the indirect must add
- * to less than 32. In other words, the hardware needs to be able to
- * add the bottom five bits of the two to get the subnumber and add
- * the next 7 bits of each to get the actual register number. Since
- * the indirect may cause us to cross a register boundary, this makes
- * it almost useless. We could try and do something clever where we
- * use a actual base offset if base_offset % 32 == 0 but that would
- * mean we were generating different code depending on the base
- * offset. Instead, for the sake of consistency, we'll just do the
- * add ourselves.
+ /* From the Haswell PRM section "Register Region Restrictions":
+ *
+ * "The lower bits of the AddressImmediate must not overflow to
+ * change the register address. The lower 5 bits of Address
+ * Immediate when added to lower 5 bits of address register gives
+ * the sub-register offset. The upper bits of Address Immediate
+ * when added to upper bits of address register gives the register
+ * address. Any overflow from sub-register offset is dropped."
+ *
+ * This restriction is only listed in the Haswell PRM but emperical
+ * testing indicates that it applies on all older generations and is
+ * lifted on Broadwell.
+ *
+ * Since the indirect may cause us to cross a register boundary, this
+ * makes the base offset almost useless. We could try and do
+ * something clever where we use a actual base offset if
+ * base_offset % 32 == 0 but that would mean we were generating
+ * different code depending on the base offset. Instead, for the
+ * sake of consistency, we'll just do the add ourselves.
*/
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
+ ind_src = brw_VxH_indirect(0, 0);
} else {
brw_MOV(p, addr, indirect_byte_offset);
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ ind_src = brw_VxH_indirect(0, imm_byte_offset);
+ }
+
+ brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
+
+ if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
+ !inst->get_next()->is_tail_sentinel() &&
+ ((fs_inst *)inst->get_next())->mlen > 0) {
+ /* From the Sandybridge PRM:
+ *
+ * "[Errata: DevSNB(SNB)] If MRF register is updated by any
+ * instruction that “indexed/indirect” source AND is followed by a
+ * send, the instruction requires a “Switch”. This is to avoid
+ * race condition where send may dispatch before MRF is updated."
+ */
+ brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ab564bbcb9e..c16f1ed5477 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2743,7 +2743,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset == NULL) {
fs_reg base_offset = retype(get_nir_src(instr->src[1]),
- BRW_REGISTER_TYPE_D);
+ BRW_REGISTER_TYPE_UD);
for (int i = 0; i < instr->num_components; i++)
VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6143f65efa1..11db159109f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -285,7 +285,7 @@ public:
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
- int base_offset,
+ int base_offset,
src_reg indirect);
void emit_pull_constant_load_reg(dst_reg dst,
src_reg surf_index,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 33c5f07cec9..4b12a72910e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -758,7 +758,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
pull->mlen = 2;
pull->header_size = 1;
} else if (devinfo->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+ dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
grf_offset.type = offset_reg.type;
@@ -1587,21 +1587,21 @@ vec4_visitor::move_grf_array_access_to_scratch()
void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src,
- int base_offset, src_reg indirect)
+ int base_offset, src_reg indirect)
{
int reg_offset = base_offset + orig_src.reg_offset;
const unsigned index = prog_data->base.binding_table.pull_constants_start;
src_reg offset;
if (indirect.file != BAD_FILE) {
- offset = src_reg(this, glsl_type::int_type);
+ offset = src_reg(this, glsl_type::uint_type);
emit_before(block, inst, ADD(dst_reg(offset), indirect,
- brw_imm_d(reg_offset * 16)));
+ brw_imm_ud(reg_offset * 16)));
} else if (devinfo->gen >= 8) {
/* Store the offset in a GRF so we can send-from-GRF. */
- offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
+ offset = src_reg(this, glsl_type::uint_type);
+ emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
} else {
offset = brw_imm_d(reg_offset * 16);
}
@@ -1629,6 +1629,12 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
void
vec4_visitor::move_uniform_array_access_to_pull_constants()
{
+ /* The vulkan dirver doesn't support pull constants other than UBOs so
+ * everything has to be pushed regardless.
+ */
+ if (stage_prog_data->pull_param == NULL)
+ return;
+
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index dbec82fbd44..db9d94d3b34 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -932,7 +932,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
NULL
};
-static bool
+static int
intel_get_param(__DRIscreen *psp, int param, int *value)
{
int ret;
@@ -943,20 +943,17 @@ intel_get_param(__DRIscreen *psp, int param, int *value)
gp.value = value;
ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
- if (ret) {
- if (ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL)
_mesa_warning(NULL, "drm_i915_getparam: %d", ret);
- return false;
- }
- return true;
+ return ret;
}
static bool
intel_get_boolean(__DRIscreen *psp, int param)
{
int value = 0;
- return intel_get_param(psp, param, &value) && value;
+ return (intel_get_param(psp, param, &value) == 0) && value;
}
static void
@@ -1093,12 +1090,12 @@ intel_detect_sseu(struct intel_screen *intelScreen)
ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
&intelScreen->subslice_total);
- if (ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL)
goto err_out;
ret = intel_get_param(intelScreen->driScrnPriv,
I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
- if (ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL)
goto err_out;
/* Without this information, we cannot get the right Braswell brandstrings,
@@ -1114,7 +1111,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
err_out:
intelScreen->subslice_total = -1;
intelScreen->eu_total = -1;
- _mesa_warning(NULL, "Failed to query GPU properties.\n");
+ _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret));
}
static bool
diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c
index 4fd2dfef8cc..b4d04b4de5f 100644
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -704,6 +704,10 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
st_validate_state(st, ST_PIPELINE_RENDER);
sv = st_create_texture_sampler_view(pipe, stObj->pt);
+ if (!sv) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)");
+ return;
+ }
setup_render_state(ctx, sv, color, true);
@@ -793,6 +797,8 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
pipe_resource_reference(&vb.buffer, NULL);
+ pipe_sampler_view_reference(&sv, NULL);
+
/* We uploaded modified constants, need to invalidate them. */
st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
}
diff --git a/src/util/SConscript b/src/util/SConscript
index 5f3ecc1cdfc..73f34303976 100644
--- a/src/util/SConscript
+++ b/src/util/SConscript
@@ -47,17 +47,14 @@ env.Alias('mesautil', mesautil)
Export('mesautil')
-# http://www.scons.org/wiki/UnitTests
u_atomic_test = env.Program(
target = 'u_atomic_test',
source = ['u_atomic_test.c'],
)
-alias = env.Alias("u_atomic_test", u_atomic_test, u_atomic_test[0].abspath)
-AlwaysBuild(alias)
+env.UnitTest("u_atomic_test", u_atomic_test)
roundeven_test = env.Program(
target = 'roundeven_test',
source = ['roundeven_test.c'],
)
-alias = env.Alias("roundeven_test", roundeven_test, roundeven_test[0].abspath)
-AlwaysBuild(alias)
+env.UnitTest("roundeven_test", roundeven_test)