summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c29
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp213
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp52
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp251
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp35
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp18
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_surface_formats.c47
-rw-r--r--src/mesa/drivers/dri/i965/brw_surface_formats.h41
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c28
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp45
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp56
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp130
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp1
-rw-r--r--src/mesa/main/mtypes.h5
28 files changed, 616 insertions, 396 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 46895882414..2802ec9887c 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -53,6 +53,7 @@ i965_compiler_FILES = \
brw_shader.cpp \
brw_shader.h \
brw_surface_formats.c \
+ brw_surface_formats.h \
brw_util.c \
brw_util.h \
brw_vec4_builder.h \
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index 6509267a52e..46d9a40c2cf 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -82,7 +82,8 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
.lower_uadd_carry = true, \
.lower_usub_borrow = true, \
.lower_fdiv = true, \
- .native_integers = true
+ .native_integers = true, \
+ .vertex_id_zero_based = true
static const struct nir_shader_compiler_options scalar_nir_options = {
COMMON_OPTIONS,
@@ -155,7 +156,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
- devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
+ devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true);
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 231e0001d54..cf0b088613b 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -100,6 +100,9 @@ struct brw_compiler {
bool precise_trig;
};
+struct brw_compiler *
+brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
+
/**
* Program key structures.
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 60b696cfb98..8ef5afea149 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -63,6 +63,7 @@
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
+#ifndef _3DPRIM_POINTLIST /* FIXME: Avoid clashing with defines from bdw_pack.h */
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
@@ -86,6 +87,7 @@
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x16
#define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); })
+#endif /* bdw_pack.h */
/* We use this offset to be able to pass native primitive types in struct
* _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET +
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index c703fb5d4cf..3666190fc36 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -482,3 +482,15 @@ brw_get_device_info(int devid)
return devinfo;
}
+
+const char *
+brw_get_device_name(int devid)
+{
+ switch (devid) {
+#undef CHIPSET
+#define CHIPSET(id, family, name) case id: return name;
+#include "pci_ids/i965_pci_ids.h"
+ default:
+ return NULL;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index c641ffc281e..4e7f3135960 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -144,3 +144,4 @@ struct brw_device_info
};
const struct brw_device_info *brw_get_device_info(int devid);
+const char *brw_get_device_name(int devid);
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c295d91223c..afa8a4e9eae 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -54,23 +54,6 @@
#define FILE_DEBUG_FLAG DEBUG_PRIMS
-static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
- [GL_POINTS] =_3DPRIM_POINTLIST,
- [GL_LINES] = _3DPRIM_LINELIST,
- [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
- [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
- [GL_TRIANGLES] = _3DPRIM_TRILIST,
- [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
- [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
- [GL_QUADS] = _3DPRIM_QUADLIST,
- [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
- [GL_POLYGON] = _3DPRIM_POLYGON,
- [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
- [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
- [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
- [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
-};
-
static const GLenum reduced_prim[GL_POLYGON+1] = {
[GL_POINTS] = GL_POINTS,
@@ -85,18 +68,6 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
[GL_POLYGON] = GL_TRIANGLES
};
-uint32_t
-get_hw_prim_for_gl_prim(int mode)
-{
- if (mode >= BRW_PRIM_OFFSET)
- return mode - BRW_PRIM_OFFSET;
- else {
- assert(mode < ARRAY_SIZE(prim_to_hw_prim));
- return prim_to_hw_prim[mode];
- }
-}
-
-
/* When the primitive changes, set a state bit and re-validate. Not
* the nicest and would rather deal with this by having all the
* programs be immune to the active primitive (ie. cope with all
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1a6a229e444..33c4adc4705 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -174,7 +174,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
* CSE can later notice that those loads are all the same and eliminate
* the redundant ones.
*/
- fs_reg vec4_offset = vgrf(glsl_type::int_type);
+ fs_reg vec4_offset = vgrf(glsl_type::uint_type);
bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
int scale = 1;
@@ -433,7 +433,6 @@ fs_reg::fs_reg(struct ::brw_reg reg) :
{
this->reg_offset = 0;
this->subreg_offset = 0;
- this->reladdr = NULL;
this->stride = 1;
if (this->file == IMM &&
(this->type != BRW_REGISTER_TYPE_V &&
@@ -448,7 +447,6 @@ fs_reg::equals(const fs_reg &r) const
{
return (this->backend_reg::equals(r) &&
subreg_offset == r.subreg_offset &&
- !reladdr && !r.reladdr &&
stride == r.stride);
}
@@ -853,7 +851,10 @@ fs_inst::regs_read(int arg) const
assert(src[2].file == IMM);
unsigned region_length = src[2].ud;
- if (src[0].file == FIXED_GRF) {
+ if (src[0].file == UNIFORM) {
+ assert(region_length % 4 == 0);
+ return region_length / 4;
+ } else if (src[0].file == FIXED_GRF) {
/* If the start of the region is not register aligned, then
* there's some portion of the register that's technically
* unread at the beginning.
@@ -867,7 +868,7 @@ fs_inst::regs_read(int arg) const
* unread portion at the beginning.
*/
if (src[0].subnr)
- region_length += src[0].subnr * type_sz(src[0].type);
+ region_length += src[0].subnr;
return DIV_ROUND_UP(region_length, REG_SIZE);
} else {
@@ -1023,7 +1024,6 @@ fs_visitor::import_uniforms(fs_visitor *v)
this->push_constant_loc = v->push_constant_loc;
this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
- this->param_size = v->param_size;
}
fs_reg *
@@ -1926,9 +1926,7 @@ fs_visitor::compact_virtual_grfs()
* maximum number of fragment shader uniform components (64). If
* there are too many of these, they'd fill up all of register space.
* So, this will push some of them out to the pull constant buffer and
- * update the program to load them. We also use pull constants for all
- * indirect constant loads because we don't support indirect accesses in
- * registers yet.
+ * update the program to load them.
*/
void
fs_visitor::assign_constant_locations()
@@ -1937,20 +1935,21 @@ fs_visitor::assign_constant_locations()
if (dispatch_width != min_dispatch_width)
return;
- unsigned int num_pull_constants = 0;
-
- pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
- memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
-
bool is_live[uniforms];
memset(is_live, 0, sizeof(is_live));
+ /* For each uniform slot, a value of true indicates that the given slot and
+ * the next slot must remain contiguous. This is used to keep us from
+ * splitting arrays apart.
+ */
+ bool contiguous[uniforms];
+ memset(contiguous, 0, sizeof(contiguous));
+
/* First, we walk through the instructions and do two things:
*
* 1) Figure out which uniforms are live.
*
- * 2) Find all indirect access of uniform arrays and flag them as needing
- * to go into the pull constant buffer.
+ * 2) Mark any indirectly used ranges of registers as contiguous.
*
* Note that we don't move constant-indexed accesses to arrays. No
* testing has been done of the performance impact of this choice.
@@ -1960,20 +1959,19 @@ fs_visitor::assign_constant_locations()
if (inst->src[i].file != UNIFORM)
continue;
- if (inst->src[i].reladdr) {
- int uniform = inst->src[i].nr;
+ int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
- /* If this array isn't already present in the pull constant buffer,
- * add it.
- */
- if (pull_constant_loc[uniform] == -1) {
- assert(param_size[uniform]);
- for (int j = 0; j < param_size[uniform]; j++)
- pull_constant_loc[uniform + j] = num_pull_constants++;
+ if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
+ assert(inst->src[2].ud % 4 == 0);
+ unsigned last = constant_nr + (inst->src[2].ud / 4) - 1;
+ assert(last < uniforms);
+
+ for (unsigned j = constant_nr; j < last; j++) {
+ is_live[j] = true;
+ contiguous[j] = true;
}
+ is_live[last] = true;
} else {
- /* Mark the the one accessed uniform as live */
- int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
if (constant_nr >= 0 && constant_nr < (int) uniforms)
is_live[constant_nr] = true;
}
@@ -1988,29 +1986,48 @@ fs_visitor::assign_constant_locations()
* If changing this value, note the limitation about total_regs in
* brw_curbe.c.
*/
- unsigned int max_push_components = 16 * 8;
+ const unsigned int max_push_components = 16 * 8;
+
+ /* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
+ * 128 bytes, which is the maximum vulkan push constant size.
+ */
+ const unsigned int max_chunk_size = 32;
+
unsigned int num_push_constants = 0;
+ unsigned int num_pull_constants = 0;
push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
+ pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
- for (unsigned int i = 0; i < uniforms; i++) {
- if (!is_live[i] || pull_constant_loc[i] != -1) {
- /* This UNIFORM register is either dead, or has already been demoted
- * to a pull const. Mark it as no longer living in the param[] array.
- */
- push_constant_loc[i] = -1;
+ int chunk_start = -1;
+ for (unsigned u = 0; u < uniforms; u++) {
+ push_constant_loc[u] = -1;
+ pull_constant_loc[u] = -1;
+
+ if (!is_live[u])
continue;
- }
- if (num_push_constants < max_push_components) {
- /* Retain as a push constant. Record the location in the params[]
- * array.
- */
- push_constant_loc[i] = num_push_constants++;
- } else {
- /* Demote to a pull constant. */
- push_constant_loc[i] = -1;
- pull_constant_loc[i] = num_pull_constants++;
+ /* This is the first live uniform in the chunk */
+ if (chunk_start < 0)
+ chunk_start = u;
+
+ /* If this element does not need to be contiguous with the next, we
+ * split at this point and everthing between chunk_start and u forms a
+ * single chunk.
+ */
+ if (!contiguous[u]) {
+ unsigned chunk_size = u - chunk_start + 1;
+
+ if (num_push_constants + chunk_size <= max_push_components &&
+ chunk_size <= max_chunk_size) {
+ for (unsigned j = chunk_start; j <= u; j++)
+ push_constant_loc[j] = num_push_constants++;
+ } else {
+ for (unsigned j = chunk_start; j <= u; j++)
+ pull_constant_loc[j] = num_pull_constants++;
+ }
+
+ chunk_start = -1;
}
}
@@ -2041,51 +2058,67 @@ fs_visitor::assign_constant_locations()
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
*/
void
-fs_visitor::demote_pull_constants()
+fs_visitor::lower_constant_loads()
{
- foreach_block_and_inst (block, fs_inst, inst, cfg) {
+ const unsigned index = stage_prog_data->binding_table.pull_constants_start;
+
+ foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+ /* Set up the annotation tracking for new generated instructions. */
+ const fs_builder ibld(this, block, inst);
+
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file != UNIFORM)
continue;
- int pull_index;
+ /* We'll handle this case later */
+ if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
+ continue;
+
unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
- if (location >= uniforms) /* Out of bounds access */
- pull_index = -1;
- else
- pull_index = pull_constant_loc[location];
+ if (location >= uniforms)
+ continue; /* Out of bounds access */
+
+ int pull_index = pull_constant_loc[location];
if (pull_index == -1)
continue;
- /* Set up the annotation tracking for new generated instructions. */
- const fs_builder ibld(this, block, inst);
- const unsigned index = stage_prog_data->binding_table.pull_constants_start;
- fs_reg dst = vgrf(glsl_type::float_type);
-
assert(inst->src[i].stride == 0);
- /* Generate a pull load into dst. */
- if (inst->src[i].reladdr) {
- VARYING_PULL_CONSTANT_LOAD(ibld, dst,
- brw_imm_ud(index),
- *inst->src[i].reladdr,
- pull_index * 4);
- inst->src[i].reladdr = NULL;
- inst->src[i].stride = 1;
- } else {
- const fs_builder ubld = ibld.exec_all().group(8, 0);
- struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
- ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, brw_imm_ud(index), offset);
- inst->src[i].set_smear(pull_index & 3);
- }
- brw_mark_surface_used(prog_data, index);
+ fs_reg dst = vgrf(glsl_type::float_type);
+ const fs_builder ubld = ibld.exec_all().group(8, 0);
+ struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ dst, brw_imm_ud(index), offset);
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = VGRF;
inst->src[i].nr = dst.nr;
inst->src[i].reg_offset = 0;
+ inst->src[i].set_smear(pull_index & 3);
+
+ brw_mark_surface_used(prog_data, index);
+ }
+
+ if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
+ inst->src[0].file == UNIFORM) {
+
+ unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
+ if (location >= uniforms)
+ continue; /* Out of bounds access */
+
+ int pull_index = pull_constant_loc[location];
+
+ if (pull_index == -1)
+ continue;
+
+ VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
+ brw_imm_ud(index),
+ inst->src[1],
+ pull_index * 4);
+ inst->remove(block);
+
+ brw_mark_surface_used(prog_data, index);
}
}
invalidate_live_intervals();
@@ -2787,10 +2820,23 @@ fs_visitor::emit_repclear_shader()
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
int base_mrf = 1;
int color_mrf = base_mrf + 2;
+ fs_inst *mov;
+
+ if (uniforms == 1) {
+ mov = bld.exec_all().group(4, 0)
+ .MOV(brw_message_reg(color_mrf),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ } else {
+ struct brw_reg reg =
+ brw_reg(BRW_GENERAL_REGISTER_FILE,
+ 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- fs_inst *mov = bld.exec_all().group(4, 0)
- .MOV(brw_message_reg(color_mrf),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ mov = bld.exec_all().group(4, 0)
+ .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
+ }
fs_inst *write;
if (key->nr_color_regions == 1) {
@@ -2819,8 +2865,10 @@ fs_visitor::emit_repclear_shader()
assign_curb_setup();
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
- assert(mov->src[0].file == FIXED_GRF);
- mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
+ if (uniforms == 1) {
+ assert(mov->src[0].file == FIXED_GRF);
+ mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
+ }
}
/**
@@ -4467,6 +4515,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
return 8;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ /* Prior to Broadwell, we only have 8 address subregisters */
+ return devinfo->gen < 8 ? 8 : inst->exec_size;
+
default:
return inst->exec_size;
}
@@ -4749,9 +4801,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
break;
case UNIFORM:
fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
- if (inst->src[i].reladdr) {
- fprintf(file, "+reladdr");
- } else if (inst->src[i].subreg_offset) {
+ if (inst->src[i].subreg_offset) {
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
}
@@ -4862,7 +4912,6 @@ fs_visitor::get_instruction_generating_reg(fs_inst *start,
{
if (end == start ||
end->is_partial_write() ||
- reg.reladdr ||
!reg.equals(end->dst)) {
return NULL;
} else {
@@ -5080,7 +5129,7 @@ fs_visitor::optimize()
bld = fs_builder(this, 64);
assign_constant_locations();
- demote_pull_constants();
+ lower_constant_loads();
validate();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index d4acc8798be..2b00129b4ba 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -139,7 +139,7 @@ public:
void split_virtual_grfs();
bool compact_virtual_grfs();
void assign_constant_locations();
- void demote_pull_constants();
+ void lower_constant_loads();
void invalidate_live_intervals();
void calculate_live_intervals();
void calculate_register_pressure();
@@ -225,7 +225,7 @@ public:
void emit_unspill(bblock_t *block, fs_inst *inst, fs_reg reg,
uint32_t spill_offset, int count);
void emit_spill(bblock_t *block, fs_inst *inst, fs_reg reg,
- uint32_t spill_offset, int count);
+ uint32_t spill_offset, int count, bool we_all);
void emit_nir_code();
void nir_setup_inputs();
@@ -326,8 +326,6 @@ public:
const struct brw_vue_map *input_vue_map;
- int *param_size;
-
int *virtual_grf_start;
int *virtual_grf_end;
brw::fs_live_variables *live_intervals;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c883fe3f259..ae80832544b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -351,23 +351,47 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
- /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
- struct brw_reg addr = vec8(brw_address_reg(0));
+ if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
+ imm_byte_offset += indirect_byte_offset.ud;
- /* The destination stride of an instruction (in bytes) must be greater
- * than or equal to the size of the rest of the instruction. Since the
- * address register is of type UW, we can't use a D-type instruction.
- * In order to get around this, re re-type to UW and use a stride.
- */
- indirect_byte_offset =
- retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+ reg.nr = imm_byte_offset / REG_SIZE;
+ reg.subnr = imm_byte_offset % REG_SIZE;
+ brw_MOV(p, dst, reg);
+ } else {
+ /* Prior to Broadwell, there are only 8 address registers. */
+ assert(inst->exec_size == 8 || devinfo->gen >= 8);
- /* Prior to Broadwell, there are only 8 address registers. */
- assert(inst->exec_size == 8 || devinfo->gen >= 8);
+ /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+ struct brw_reg addr = vec8(brw_address_reg(0));
- brw_MOV(p, addr, indirect_byte_offset);
- brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ /* The destination stride of an instruction (in bytes) must be greater
+ * than or equal to the size of the rest of the instruction. Since the
+ * address register is of type UW, we can't use a D-type instruction.
+ * In order to get around this, re re-type to UW and use a stride.
+ */
+ indirect_byte_offset =
+ retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+ if (devinfo->gen < 8) {
+ /* Prior to broadwell, we have a restriction that the bottom 5 bits
+ * of the base offset and the bottom 5 bits of the indirect must add
+ * to less than 32. In other words, the hardware needs to be able to
+ * add the bottom five bits of the two to get the subnumber and add
+ * the next 7 bits of each to get the actual register number. Since
+ * the indirect may cause us to cross a register boundary, this makes
+ * it almost useless. We could try and do something clever where we
+ * use a actual base offset if base_offset % 32 == 0 but that would
+ * mean we were generating different code depending on the base
+ * offset. Instead, for the sake of consistency, we'll just do the
+ * add ourselves.
+ */
+ brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
+ brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
+ } else {
+ brw_MOV(p, addr, indirect_byte_offset);
+ brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ }
+ }
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5cca91ec5b4..60d58b19ef2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -179,15 +179,6 @@ fs_visitor::nir_setup_uniforms()
return;
uniforms = nir->num_uniforms / 4;
-
- nir_foreach_variable(var, &nir->uniforms) {
- /* UBO's and atomics don't take up space in the uniform file */
- if (var->interface_type != NULL || var->type->contains_atomic())
- continue;
-
- if (type_size_scalar(var->type) > 0)
- param_size[var->data.driver_location / 4] = type_size_scalar(var->type);
- }
}
static bool
@@ -865,9 +856,41 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
unreachable("Should have been lowered by borrow_to_arith().");
case nir_op_umod:
+ case nir_op_irem:
+ /* According to the sign table for INT DIV in the Ivy Bridge PRM, it
+ * appears that our hardware just does the right thing for signed
+ * remainder.
+ */
bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
break;
+ case nir_op_imod: {
+ /* Get a regular C-style remainder. If a % b == 0, set the predicate. */
+ bld.emit(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
+
+ /* Math instructions don't support conditional mod */
+ inst = bld.MOV(bld.null_reg_d(), result);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ /* Now, we need to determine if signs of the sources are different.
+ * When we XOR the sources, the top bit is 0 if they are the same and 1
+ * if they are different. We can then use a conditional modifier to
+ * turn that into a predicate. This leads us to an XOR.l instruction.
+ */
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D);
+ inst = bld.XOR(tmp, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ /* If the result of the initial remainder operation is non-zero and the
+ * two sources have different signs, add in a copy of op[1] to get the
+ * final integer modulus value.
+ */
+ inst = bld.ADD(result, result, op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
+
case nir_op_flt:
case nir_op_ilt:
case nir_op_ult:
@@ -1242,6 +1265,8 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
{
fs_reg image(UNIFORM, deref->var->data.driver_location / 4,
BRW_REGISTER_TYPE_UD);
+ fs_reg indirect;
+ unsigned indirect_max = 0;
for (const nir_deref *tail = &deref->deref; tail->child;
tail = tail->child) {
@@ -1253,7 +1278,7 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
image = offset(image, bld, base * element_size);
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
- fs_reg tmp = vgrf(glsl_type::int_type);
+ fs_reg tmp = vgrf(glsl_type::uint_type);
if (devinfo->gen == 7 && !devinfo->is_haswell) {
/* IVB hangs when trying to access an invalid surface index with
@@ -1271,15 +1296,31 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
bld.MOV(tmp, get_nir_src(deref_array->indirect));
}
+ indirect_max += element_size * (tail->type->length - 1);
+
bld.MUL(tmp, tmp, brw_imm_ud(element_size * 4));
- if (image.reladdr)
- bld.ADD(*image.reladdr, *image.reladdr, tmp);
- else
- image.reladdr = new(mem_ctx) fs_reg(tmp);
+ if (indirect.file == BAD_FILE) {
+ indirect = tmp;
+ } else {
+ bld.ADD(indirect, indirect, tmp);
+ }
}
}
- return image;
+ if (indirect.file == BAD_FILE) {
+ return image;
+ } else {
+ /* Emit a pile of MOVs to load the uniform into a temporary. The
+ * dead-code elimination pass will get rid of what we don't use.
+ */
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
+ for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
+ bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+ offset(tmp, bld, j), offset(image, bld, j),
+ indirect, brw_imm_ud((indirect_max + 1) * 4));
+ }
+ return tmp;
+ }
}
void
@@ -2368,6 +2409,82 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
nir_emit_shared_atomic(bld, BRW_AOP_CMPWR, instr);
break;
+ case nir_intrinsic_load_shared: {
+ assert(devinfo->gen >= 7);
+
+ fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+
+ /* Get the offset to read from */
+ fs_reg offset_reg;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
+ /* Read the vector */
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */,
+ instr->num_components,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ for (int i = 0; i < instr->num_components; i++)
+ bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
+
+ break;
+ }
+
+ case nir_intrinsic_store_shared: {
+ assert(devinfo->gen >= 7);
+
+ /* Block index */
+ fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+
+ /* Value */
+ fs_reg val_reg = get_nir_src(instr->src[0]);
+
+ /* Writemask */
+ unsigned writemask = instr->const_index[1];
+
+ /* Combine groups of consecutive enabled channels in one write
+ * message. We use ffs to find the first enabled channel and then ffs on
+ * the bit-inverse, down-shifted writemask to determine the length of
+ * the block of enabled bits.
+ */
+ while (writemask) {
+ unsigned first_component = ffs(writemask) - 1;
+ unsigned length = ffs(~(writemask >> first_component)) - 1;
+ fs_reg offset_reg;
+
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ if (const_offset) {
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
+ 4 * first_component);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0] + 4 * first_component));
+ }
+
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, first_component),
+ 1 /* dims */, length,
+ BRW_PREDICATE_NONE);
+
+ /* Clear the bits in the writemask that we just wrote, then try
+ * again to see if more channels are left.
+ */
+ writemask &= (15 << (first_component + length));
+ }
+
+ break;
+ }
+
default:
nir_emit_intrinsic(bld, instr);
break;
@@ -2578,12 +2695,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
/* Offsets are in bytes but they should always be multiples of 4 */
assert(const_offset->u32[0] % 4 == 0);
src.reg_offset = const_offset->u32[0] / 4;
+
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ }
} else {
- src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
- }
+ fs_reg indirect = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
- for (unsigned j = 0; j < instr->num_components; j++) {
- bld.MOV(offset(dest, bld, j), offset(src, bld, j));
+ /* We need to pass a size to the MOV_INDIRECT but we don't want it to
+ * go past the end of the uniform. In order to keep the n'th
+ * component from running past, we subtract off the size of all but
+ * one component of the vector.
+ */
+ assert(instr->const_index[1] >= instr->num_components * 4);
+ unsigned read_size = instr->const_index[1] -
+ (instr->num_components - 1) * 4;
+
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+ offset(dest, bld, j), offset(src, bld, j),
+ indirect, brw_imm_ud(read_size));
+ }
}
break;
}
@@ -2691,82 +2824,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
- case nir_intrinsic_load_shared: {
- assert(devinfo->gen >= 7);
-
- fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
-
- /* Get the offset to read from */
- fs_reg offset_reg;
- nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
- if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
- } else {
- offset_reg = vgrf(glsl_type::uint_type);
- bld.ADD(offset_reg,
- retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(instr->const_index[0]));
- }
-
- /* Read the vector */
- fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
- 1 /* dims */,
- instr->num_components,
- BRW_PREDICATE_NONE);
- read_result.type = dest.type;
- for (int i = 0; i < instr->num_components; i++)
- bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
-
- break;
- }
-
- case nir_intrinsic_store_shared: {
- assert(devinfo->gen >= 7);
-
- /* Block index */
- fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
-
- /* Value */
- fs_reg val_reg = get_nir_src(instr->src[0]);
-
- /* Writemask */
- unsigned writemask = instr->const_index[1];
-
- /* Combine groups of consecutive enabled channels in one write
- * message. We use ffs to find the first enabled channel and then ffs on
- * the bit-inverse, down-shifted writemask to determine the length of
- * the block of enabled bits.
- */
- while (writemask) {
- unsigned first_component = ffs(writemask) - 1;
- unsigned length = ffs(~(writemask >> first_component)) - 1;
- fs_reg offset_reg;
-
- nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
- if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
- 4 * first_component);
- } else {
- offset_reg = vgrf(glsl_type::uint_type);
- bld.ADD(offset_reg,
- retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(instr->const_index[0] + 4 * first_component));
- }
-
- emit_untyped_write(bld, surf_index, offset_reg,
- offset(val_reg, bld, first_component),
- 1 /* dims */, length,
- BRW_PREDICATE_NONE);
-
- /* Clear the bits in the writemask that we just wrote, then try
- * again to see if more channels are left.
- */
- writemask &= (15 << (first_component + length));
- }
-
- break;
- }
-
case nir_intrinsic_load_input: {
fs_reg src;
if (stage == MESA_SHADER_VERTEX) {
@@ -3022,6 +3079,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
+ /* Our hardware requires a LOD for buffer textures */
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ lod = brw_imm_d(0);
+
for (unsigned i = 0; i < instr->num_srcs; i++) {
fs_reg src = get_nir_src(instr->src[i].src);
switch (instr->src[i].src_type) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 2347cd5d33f..8396854fcb1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -751,6 +751,7 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst,
dst);
unspill_inst->offset = spill_offset;
unspill_inst->regs_written = reg_size;
+ unspill_inst->force_writemask_all = true;
if (!gen7_read) {
unspill_inst->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
@@ -764,11 +765,11 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst,
void
fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
- uint32_t spill_offset, int count)
+ uint32_t spill_offset, int count, bool we_all)
{
int reg_size = 1;
int spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
- if (dispatch_width == 16 && count % 2 == 0) {
+ if (inst->exec_size == 16 && count % 2 == 0) {
spill_base_mrf = FIRST_SPILL_MRF(devinfo->gen);
reg_size = 2;
}
@@ -784,6 +785,8 @@ fs_visitor::emit_spill(bblock_t *block, fs_inst *inst, fs_reg src,
spill_inst->offset = spill_offset + i * reg_size * REG_SIZE;
spill_inst->mlen = 1 + reg_size; /* header, value */
spill_inst->base_mrf = spill_base_mrf;
+ spill_inst->force_writemask_all = we_all;
+ spill_inst->force_sechalf = inst->force_sechalf;
}
}
@@ -805,30 +808,13 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (unsigned int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file == VGRF) {
+ if (inst->src[i].file == VGRF)
spill_costs[inst->src[i].nr] += loop_scale;
-
- /* Register spilling logic assumes full-width registers; smeared
- * registers have a width of 1 so if we try to spill them we'll
- * generate invalid assembly. This shouldn't be a problem because
- * smeared registers are only used as short-term temporaries when
- * loading pull constants, so spilling them is unlikely to reduce
- * register pressure anyhow.
- */
- if (!inst->src[i].is_contiguous()) {
- no_spill[inst->src[i].nr] = true;
- }
- }
}
- if (inst->dst.file == VGRF) {
+ if (inst->dst.file == VGRF)
spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
- if (!inst->dst.is_contiguous()) {
- no_spill[inst->dst.nr] = true;
- }
- }
-
switch (inst->opcode) {
case BRW_OPCODE_DO:
@@ -938,12 +924,15 @@ fs_visitor::spill_reg(int spill_reg)
* inst->regs_written(), then we need to unspill the destination
* since we write back out all of the regs_written().
*/
- if (inst->is_partial_write())
+ bool need_unspill = inst->is_partial_write() ||
+ type_sz(inst->dst.type) != 4;
+ if (need_unspill)
emit_unspill(block, inst, spill_src, subset_spill_offset,
inst->regs_written);
emit_spill(block, inst, spill_src, subset_spill_offset,
- inst->regs_written);
+ inst->regs_written,
+ need_unspill || inst->force_writemask_all);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 75734d2cfa0..4adffdd75fb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -717,6 +717,15 @@ namespace {
bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
brw_imm_d(-(int)scale(widths[c] - s) - 1),
BRW_CONDITIONAL_GE);
+
+ /* Mask off all but the bits we actually want. Otherwise, if
+ * we pass a negative number into the hardware when it's
+ * expecting something like UINT8, it will happily clamp it to
+ * +255 for us.
+ */
+ if (is_signed && widths[c] < 32)
+ bld.AND(offset(dst, bld, c), offset(dst, bld, c),
+ brw_imm_d((1 << widths[c]) - 1));
}
}
@@ -787,6 +796,15 @@ namespace {
/* Convert to integer. */
bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
+
+ /* Mask off all but the bits we actually want. Otherwise, if
+ * we pass a negative number into the hardware when it's
+ * expecting something like UINT8, it will happily clamp it to
+ * +255 for us.
+ */
+ if (is_signed && widths[c] < 32)
+ bld.AND(offset(dst, bld, c), offset(dst, bld, c),
+ brw_imm_d((1 << widths[c]) - 1));
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4fbcf2bd105..daabf708b06 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1069,9 +1069,6 @@ fs_visitor::init()
this->spilled_any_registers = false;
this->do_dual_src = false;
-
- if (dispatch_width == 8)
- this->param_size = rzalloc_array(mem_ctx, int, stage_prog_data->nr_params);
}
fs_visitor::~fs_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index c3eec2efb42..e4f20f4ffc9 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -58,8 +58,6 @@ public:
*/
int subreg_offset;
- fs_reg *reladdr;
-
/** Register region horizontal stride */
uint8_t stride;
};
@@ -136,8 +134,7 @@ component(fs_reg reg, unsigned idx)
static inline bool
is_uniform(const fs_reg &reg)
{
- return (reg.stride == 0 || reg.is_null()) &&
- (!reg.reladdr || is_uniform(*reg.reladdr));
+ return (reg.stride == 0 || reg.is_null());
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index c62840a6e15..83921891d1c 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -468,7 +468,7 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
/* Get rid of split copies */
nir = nir_optimize(nir, is_scalar);
- OPT(nir_remove_dead_variables);
+ OPT(nir_remove_dead_variables, nir_var_local);
return nir;
}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 3112c0c4014..b093a87bb82 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -279,7 +279,7 @@ brw_get_scratch_bo(struct brw_context *brw,
void brwInitFragProgFuncs( struct dd_function_table *functions )
{
- assert(functions->ProgramStringNotify == _tnl_program_string);
+ /* assert(functions->ProgramStringNotify == _tnl_program_string); */
functions->NewProgram = brwNewProgram;
functions->DeleteProgram = brwDeleteProgram;
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index ba9b5b90b63..fa7878dec82 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -25,21 +25,8 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
-
-struct surface_format_info {
- bool exists;
- int sampling;
- int filtering;
- int shadow_compare;
- int chroma_key;
- int render_target;
- int alpha_blend;
- int input_vb;
- int streamed_output_vb;
- int color_processing;
- int lossless_compression;
- const char *name;
-};
+#include "brw_wm.h"
+#include "brw_surface_formats.h"
/* This macro allows us to write the table almost as it appears in the PRM,
* while restructuring it to turn it into the C code we want.
@@ -86,7 +73,7 @@ struct surface_format_info {
* - VOL4_Part1 section 3.9.11 Render Target Write.
* - Render Target Surface Types [SKL+]
*/
-const struct surface_format_info surface_formats[] = {
+const struct brw_surface_format_info surface_formats[] = {
/* smpl filt shad CK RT AB VB SO color ccs_e */
SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT)
SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT)
@@ -218,7 +205,7 @@ const struct surface_format_info surface_formats[] = {
SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0)
SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
- SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM)
+ SF( Y, Y, x, Y, 90, x, x, x, x, x, A4B4G4R4_UNORM)
SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT)
SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT)
SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM)
@@ -281,13 +268,13 @@ const struct surface_format_info surface_formats[] = {
SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16)
SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8)
SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB)
- SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8)
- SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8)
- SF( x, x, x, x, x, x, x, x, x, x, EAC_R11)
- SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11)
- SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
- SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
- SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC1_RGB8)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8)
+ SF(80, 80, x, x, x, x, x, x, x, x, EAC_R11)
+ SF(80, 80, x, x, x, x, x, x, x, x, EAC_RG11)
+ SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_R11)
+ SF(80, 80, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8)
SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT)
SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT)
SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED)
@@ -302,10 +289,10 @@ const struct surface_format_info surface_formats[] = {
SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT)
SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU)
SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU)
- SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
- SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
- SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
- SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8)
+ SF(80, 80, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8)
SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT)
SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT)
SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16)
@@ -618,7 +605,7 @@ brw_init_surface_formats(struct brw_context *brw)
for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) {
uint32_t texture, render;
- const struct surface_format_info *rinfo, *tinfo;
+ const struct brw_surface_format_info *rinfo, *tinfo;
bool is_integer = _mesa_is_format_integer_color(format);
render = texture = brw_format_for_mesa_format(format);
@@ -828,7 +815,7 @@ bool
brw_losslessly_compressible_format(const struct brw_context *brw,
uint32_t brw_format)
{
- const struct surface_format_info * const sinfo =
+ const struct brw_surface_format_info * const sinfo =
&surface_formats[brw_format];
const int gen = brw->gen * 10;
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.h b/src/mesa/drivers/dri/i965/brw_surface_formats.h
new file mode 100644
index 00000000000..a5cd49f5260
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+struct brw_surface_format_info {
+ bool exists;
+ int sampling;
+ int filtering;
+ int shadow_compare;
+ int chroma_key;
+ int render_target;
+ int alpha_blend;
+ int input_vb;
+ int streamed_output_vb;
+ int color_processing;
+ int lossless_compression;
+ const char *name;
+};
+
+extern const struct brw_surface_format_info surface_formats[];
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index bf7f9c61c84..934b6b8d627 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -98,3 +98,31 @@ GLuint brw_translate_blend_factor( GLenum factor )
unreachable("not reached");
}
}
+
+static const GLuint prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
+ [GL_POINTS] =_3DPRIM_POINTLIST,
+ [GL_LINES] = _3DPRIM_LINELIST,
+ [GL_LINE_LOOP] = _3DPRIM_LINELOOP,
+ [GL_LINE_STRIP] = _3DPRIM_LINESTRIP,
+ [GL_TRIANGLES] = _3DPRIM_TRILIST,
+ [GL_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
+ [GL_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
+ [GL_QUADS] = _3DPRIM_QUADLIST,
+ [GL_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
+ [GL_POLYGON] = _3DPRIM_POLYGON,
+ [GL_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
+ [GL_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
+ [GL_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
+ [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
+};
+
+uint32_t
+get_hw_prim_for_gl_prim(int mode)
+{
+ if (mode >= BRW_PRIM_OFFSET)
+ return mode - BRW_PRIM_OFFSET;
+ else {
+ assert(mode < ARRAY_SIZE(prim_to_hw_prim));
+ return prim_to_hw_prim[mode];
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b9cf3f657a1..00253438c44 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -496,11 +496,6 @@ vec4_visitor::split_uniform_registers()
inst->src[i].reg_offset = 0;
}
}
-
- /* Update that everything is now vector-sized. */
- for (int i = 0; i < this->uniforms; i++) {
- this->uniform_size[i] = 1;
- }
}
void
@@ -558,7 +553,6 @@ vec4_visitor::pack_uniform_registers()
* push constants.
*/
for (int src = 0; src < uniforms; src++) {
- assert(src < uniform_array_size);
int size = chans_used[src];
if (size == 0)
@@ -794,7 +788,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
emit_pull_constant_load(block, inst, temp, inst->src[i],
- pull_constant_loc[uniform]);
+ pull_constant_loc[uniform], src_reg());
inst->src[i].file = temp.file;
inst->src[i].nr = temp.nr;
@@ -1599,8 +1593,6 @@ vec4_visitor::setup_uniforms(int reg)
* matter what, or the GPU would hang.
*/
if (devinfo->gen < 6 && this->uniforms == 0) {
- assert(this->uniforms < this->uniform_array_size);
-
stage_prog_data->param =
reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
for (unsigned int i = 0; i < 4; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index d43a5a82052..6143f65efa1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -115,8 +115,6 @@ public:
*/
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
- int *uniform_size;
- int uniform_array_size; /*< Size of the uniform_size array */
int uniforms;
src_reg shader_start_time;
@@ -278,8 +276,6 @@ public:
src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
src_reg *reladdr, int reg_offset);
- src_reg get_pull_constant_offset(bblock_t *block, vec4_instruction *inst,
- src_reg *reladdr, int reg_offset);
void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
@@ -289,7 +285,8 @@ public:
void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
- int base_offset);
+ int base_offset,
+ src_reg indirect);
void emit_pull_constant_load_reg(dst_reg dst,
src_reg surf_index,
src_reg offset,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 8409e820f09..4b3b08903c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1399,6 +1399,48 @@ generate_set_simd4x2_header_gen9(struct brw_codegen *p,
}
static void
+generate_mov_indirect(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst, struct brw_reg reg,
+ struct brw_reg indirect, struct brw_reg length)
+{
+ assert(indirect.type == BRW_REGISTER_TYPE_UD);
+
+ unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2);
+
+ /* This instruction acts in align1 mode */
+ assert(inst->force_writemask_all || reg.writemask == 0xf);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ struct brw_reg addr = vec2(brw_address_reg(0));
+
+ /* We need to move the indirect value into the address register. In order
+ * to make things make some sense, we want to respect at least the X
+ * component of the swizzle. In order to do that, we need to convert the
+ * subnr (probably 0) to an align1 subnr and add in the swizzle. We then
+ * use a region of <8,4,0>:uw to pick off the first 2 bytes of the indirect
+ * and splat it out to all four channels of the given half of a0.
+ */
+ assert(brw_is_single_value_swizzle(indirect.swizzle));
+ indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)) * 2;
+ indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0);
+
+ brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset));
+
+ /* Use a <4,1> region Vx1 region*/
+ struct brw_reg src = brw_VxH_indirect(0, 0);
+ src.width = BRW_WIDTH_4;
+ src.hstride = BRW_HORIZONTAL_STRIDE_1;
+
+ brw_MOV(p, dst, retype(src, reg.type));
+
+ brw_pop_insn_state(p);
+}
+
+static void
generate_code(struct brw_codegen *p,
const struct brw_compiler *compiler,
void *log_data,
@@ -1946,6 +1988,9 @@ generate_code(struct brw_codegen *p,
brw_WAIT(p);
break;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]);
+
default:
unreachable("Unsupported opcode");
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 6c8fd06fb5e..4cb03adb2bf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -132,15 +132,6 @@ void
vec4_visitor::nir_setup_uniforms()
{
uniforms = nir->num_uniforms / 16;
-
- nir_foreach_variable(var, &nir->uniforms) {
- /* UBO's and atomics don't take up space in the uniform file */
- if (var->interface_type != NULL || var->type->contains_atomic())
- continue;
-
- if (type_size_vec4(var->type) > 0)
- uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type);
- }
}
void
@@ -708,12 +699,14 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
/* Offsets are in bytes but they should always be multiples of 16 */
assert(const_offset->u32[0] % 16 == 0);
src.reg_offset = const_offset->u32[0] / 16;
+
+ emit(MOV(dest, src));
} else {
- src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
- src.reladdr = new(mem_ctx) src_reg(tmp);
- }
+ src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
- emit(MOV(dest, src));
+ emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
+ indirect, brw_imm_ud(instr->const_index[1]));
+ }
break;
}
@@ -1128,9 +1121,41 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
case nir_op_umod:
+ case nir_op_irem:
+ /* According to the sign table for INT DIV in the Ivy Bridge PRM, it
+ * appears that our hardware just does the right thing for signed
+ * remainder.
+ */
emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
break;
+ case nir_op_imod: {
+ /* Get a regular C-style remainder. If a % b == 0, set the predicate. */
+ inst = emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
+
+ /* Math instructions don't support conditional mod */
+ inst = emit(MOV(dst_null_d(), src_reg(dst)));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ /* Now, we need to determine if signs of the sources are different.
+ * When we XOR the sources, the top bit is 0 if they are the same and 1
+ * if they are different. We can then use a conditional modifier to
+ * turn that into a predicate. This leads us to an XOR.l instruction.
+ */
+ src_reg tmp = src_reg(this, glsl_type::ivec4_type);
+ inst = emit(XOR(dst_reg(tmp), op[0], op[1]));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ /* If the result of the initial remainder operation is non-zero and the
+ * two sources have different signs, add in a copy of op[1] to get the
+ * final integer modulus value.
+ */
+ inst = emit(ADD(dst, src_reg(dst), op[1]));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
+
case nir_op_ldexp:
unreachable("not reached: should be handled by ldexp_to_arith()");
@@ -1630,7 +1655,6 @@ vec4_visitor::nir_emit_jump(nir_jump_instr *instr)
break;
case nir_jump_return:
- /* fall through */
default:
unreachable("unknown jump");
}
@@ -1700,6 +1724,10 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
nir_tex_instr_dest_size(instr));
dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
+ /* Our hardware requires a LOD for buffer textures */
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ lod = brw_imm_d(0);
+
/* Load the texture operation sources */
uint32_t constant_offset = 0;
for (unsigned i = 0; i < instr->num_srcs; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index 84aa89a7865..28aaaebd0b3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -59,8 +59,6 @@ vec4_tcs_visitor::emit_nir_code()
* copies VS outputs to TES inputs.
*/
uniforms = 2;
- uniform_size[0] = 1;
- uniform_size[1] = 1;
uint64_t varyings = key->outputs_written;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 2ab141fdf21..33c5f07cec9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1410,27 +1410,6 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
}
}
-src_reg
-vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
- src_reg *reladdr, int reg_offset)
-{
- if (reladdr) {
- src_reg index = src_reg(this, glsl_type::int_type);
-
- emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- brw_imm_d(reg_offset * 16)));
-
- return index;
- } else if (devinfo->gen >= 8) {
- /* Store the offset in a GRF so we can send-from-GRF. */
- src_reg offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
- return offset;
- } else {
- return brw_imm_d(reg_offset * 16);
- }
-}
-
/**
* Emits an instruction before @inst to load the value named by @orig_src
* from scratch space at @base_offset to @temp.
@@ -1608,12 +1587,24 @@ vec4_visitor::move_grf_array_access_to_scratch()
void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src,
- int base_offset)
+ int base_offset, src_reg indirect)
{
int reg_offset = base_offset + orig_src.reg_offset;
const unsigned index = prog_data->base.binding_table.pull_constants_start;
- src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
- reg_offset);
+
+ src_reg offset;
+ if (indirect.file != BAD_FILE) {
+ offset = src_reg(this, glsl_type::int_type);
+
+ emit_before(block, inst, ADD(dst_reg(offset), indirect,
+ brw_imm_d(reg_offset * 16)));
+ } else if (devinfo->gen >= 8) {
+ /* Store the offset in a GRF so we can send-from-GRF. */
+ offset = src_reg(this, glsl_type::int_type);
+ emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
+ } else {
+ offset = brw_imm_d(reg_offset * 16);
+ }
emit_pull_constant_load_reg(temp,
brw_imm_ud(index),
@@ -1640,59 +1631,55 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
{
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
- bool nested_reladdr;
- /* Walk through and find array access of uniforms. Put a copy of that
- * uniform in the pull constant buffer.
- *
- * Note that we don't move constant-indexed accesses to arrays. No
- * testing has been done of the performance impact of this choice.
+ /* First, walk through the instructions and determine which things need to
+ * be pulled. We mark something as needing to be pulled by setting
+ * pull_constant_loc to 0.
*/
- do {
- nested_reladdr = false;
-
- foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- for (int i = 0 ; i < 3; i++) {
- if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
- continue;
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ /* We only care about MOV_INDIRECT of a uniform */
+ if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+ inst->src[0].file != UNIFORM)
+ continue;
- int uniform = inst->src[i].nr;
+ int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
- if (inst->src[i].reladdr->reladdr)
- nested_reladdr = true; /* will need another pass */
+ for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
+ pull_constant_loc[uniform_nr + j] = 0;
+ }
- /* If this array isn't already present in the pull constant buffer,
- * add it.
- */
- if (pull_constant_loc[uniform] == -1) {
- const gl_constant_value **values =
- &stage_prog_data->param[uniform * 4];
+ /* Next, we walk the list of uniforms and assign real pull constant
+ * locations and set their corresponding entries in pull_param.
+ */
+ for (int j = 0; j < this->uniforms; j++) {
+ if (pull_constant_loc[j] < 0)
+ continue;
- pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
+ pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
- assert(uniform < uniform_array_size);
- for (int j = 0; j < uniform_size[uniform] * 4; j++) {
- stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
- = values[j];
- }
- }
+ for (int i = 0; i < 4; i++) {
+ stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
+ = stage_prog_data->param[j * 4 + i];
+ }
+ }
- /* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
+ /* Finally, we can walk through the instructions and lower MOV_INDIRECT
+ * instructions to actual uniform pulls.
+ */
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ /* We only care about MOV_INDIRECT of a uniform */
+ if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+ inst->src[0].file != UNIFORM)
+ continue;
- dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+ int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
- emit_pull_constant_load(block, inst, temp, inst->src[i],
- pull_constant_loc[uniform]);
+ assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
- inst->src[i].file = temp.file;
- inst->src[i].nr = temp.nr;
- inst->src[i].reg_offset = temp.reg_offset;
- inst->src[i].reladdr = NULL;
- }
- }
- } while (nested_reladdr);
+ emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
+ pull_constant_loc[uniform_nr], inst->src[1]);
+ inst->remove(block);
+ }
/* Now there are no accesses of the UNIFORM file with a reladdr, so
* no need to track them as larger-than-vec4 objects. This will be
@@ -1745,17 +1732,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
this->uniforms = 0;
-
- /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
- * at least one. See setup_uniforms() in brw_vec4.cpp.
- */
- this->uniform_array_size = 1;
- if (prog_data) {
- this->uniform_array_size =
- MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
- }
-
- this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
}
vec4_visitor::~vec4_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index f3cfc8892d3..39f0c0b932d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -161,7 +161,6 @@ void
vec4_vs_visitor::setup_uniform_clipplane_values()
{
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
- assert(this->uniforms < uniform_array_size);
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
this->userplane[i].type = BRW_REGISTER_TYPE_F;
for (int j = 0; j < 4; ++j) {
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b2060c282f4..1fbda420401 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2519,6 +2519,11 @@ struct gl_uniform_block
GLuint Binding;
/**
+ * Vulkan descriptor set qualifier for this block.
+ */
+ GLuint Set;
+
+ /**
* Minimum size (in bytes) of a buffer object to back this uniform buffer
* (GL_UNIFORM_BLOCK_DATA_SIZE).
*/