summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am7
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_compact.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp125
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp22
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp87
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp116
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp51
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_surface_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp339
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp115
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h122
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp157
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp485
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp84
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c37
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c44
-rw-r--r--src/mesa/drivers/dri/i965/gen7_cs_state.c31
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sf_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen8_sf_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c33
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.h2
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c3
-rw-r--r--src/mesa/drivers/dri/i965/intel_fbo.c10
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.c14
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp822
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_context.c2
59 files changed, 2199 insertions, 842 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 04b3f9cc8ce..9d003e48bd8 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -59,6 +59,7 @@ TESTS = \
test_fs_saturate_propagation \
test_eu_compact \
test_vf_float_conversions \
+ test_vec4_cmod_propagation \
test_vec4_copy_propagation \
test_vec4_register_coalesce
@@ -94,6 +95,12 @@ test_vec4_copy_propagation_LDADD = \
$(top_builddir)/src/gtest/libgtest.la \
$(TEST_LIBS)
+test_vec4_cmod_propagation_SOURCES = \
+ test_vec4_cmod_propagation.cpp
+test_vec4_cmod_propagation_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(TEST_LIBS)
+
test_eu_compact_SOURCES = \
test_eu_compact.c
nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index ccd540dabca..ed2654ef329 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -58,6 +58,7 @@ i965_compiler_FILES = \
brw_util.c \
brw_util.h \
brw_vec4_builder.h \
+ brw_vec4_cmod_propagation.cpp \
brw_vec4_copy_propagation.cpp \
brw_vec4.cpp \
brw_vec4_cse.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 10bcd4bafd4..5d46615bc7b 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -528,7 +528,9 @@ cfg_t::dump_domtree()
{
printf("digraph DominanceTree {\n");
foreach_block(block, this) {
- printf("\t%d -> %d\n", block->idom->num, block->num);
+ if (block->idom) {
+ printf("\t%d -> %d\n", block->idom->num, block->num);
+ }
}
printf("}\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index a06b0aa1cd0..69e39e8964d 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -90,6 +90,8 @@ struct bblock_t {
struct exec_list parents;
struct exec_list children;
int num;
+
+ unsigned cycle_count;
};
static inline struct backend_instruction *
@@ -285,6 +287,8 @@ struct cfg_t {
int num_blocks;
bool idom_dirty;
+
+ unsigned cycle_count;
};
/* Note that this is implemented with a double for loop -- break will
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index d9967143d8a..e5133ef5a3d 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -338,6 +338,7 @@ struct brw_wm_prog_data {
} binding_table;
uint8_t computed_depth_mode;
+ bool computed_stencil;
bool early_fragment_tests;
bool no_8;
@@ -443,9 +444,7 @@ struct brw_vue_map {
* directly correspond to a gl_varying_slot, the value comes from
* brw_varying_slot.
*
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
+ * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
*/
signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
@@ -467,8 +466,8 @@ static inline GLuint brw_vue_slot_to_offset(GLuint slot)
* Convert a vertex output (brw_varying_slot) into a byte offset within the
* VUE.
*/
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
+static inline
+GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
{
return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4f503ae4869..c83f47bdff7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -501,8 +501,6 @@ struct brw_cache_item {
};
-typedef void (*cache_aux_free_func)(const void *aux);
-
struct brw_cache {
struct brw_context *brw;
@@ -512,9 +510,6 @@ struct brw_cache {
uint32_t next_offset;
bool bo_used_by_gpu;
-
- /** Optional functions for freeing other pointers attached to a prog_data. */
- cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
@@ -1177,7 +1172,7 @@ struct brw_context
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[60];
- const struct brw_tracked_state compute_atoms[8];
+ const struct brw_tracked_state compute_atoms[9];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
@@ -1463,7 +1458,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
struct brw_stage_prog_data *prog_data,
bool dword_pitch);
void brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data);
void brw_upload_image_surfaces(struct brw_context *brw,
@@ -1680,6 +1675,7 @@ struct opcode_desc {
extern const struct opcode_desc opcode_descs[128];
extern const char * const conditional_modifier[16];
+extern const char *const pred_ctrl_align16[16];
void
brw_emit_depthbuffer(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 169d092f90e..754da9fc3da 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -913,20 +913,15 @@ enum opcode {
/**
* Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
- * individual sources instead of as a single payload blob:
- *
- * Source 0: [required] Color 0.
- * Source 1: [optional] Color 1 (for dual source blend messages).
- * Source 2: [optional] Src0 Alpha.
- * Source 3: [optional] Source Depth (gl_FragDepth)
- * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread
- * Source 5: [optional] Sample Mask (gl_SampleMask).
- * Source 6: [required] Number of color components (as a UD immediate).
+ * individual sources instead of as a single payload blob. The
+ * position/ordering of the arguments are defined by the enum
+ * fb_write_logical_srcs.
*/
FS_OPCODE_FB_WRITE_LOGICAL,
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
+ FS_OPCODE_PACK_STENCIL_REF,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
@@ -1332,6 +1327,17 @@ enum brw_urb_write_flags {
BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
};
+enum fb_write_logical_srcs {
+ FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */
+ FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */
+ FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
+ FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
+ FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
+ FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
+ FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
+ FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
+};
+
#ifdef __cplusplus
/**
* Allow brw_urb_write_flags enums to be ORed together.
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 65172490da3..6372fb5c55f 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -311,7 +311,7 @@ static const struct brw_device_info brw_device_info_chv = {
.max_gs_threads = 336, \
.max_hs_threads = 336, \
.max_ds_threads = 336, \
- .max_wm_threads = 64 * 6, \
+ .max_wm_threads = 64 * 9, \
.max_cs_threads = 56, \
.urb = { \
.size = 384, \
@@ -335,6 +335,10 @@ static const struct brw_device_info brw_device_info_skl_gt3 = {
GEN9_FEATURES, .gt = 3,
};
+static const struct brw_device_info brw_device_info_skl_gt4 = {
+ GEN9_FEATURES, .gt = 4,
+};
+
static const struct brw_device_info brw_device_info_bxt = {
GEN9_FEATURES,
.is_broxton = 1,
@@ -359,7 +363,7 @@ static const struct brw_device_info brw_device_info_bxt = {
};
const struct brw_device_info *
-brw_get_device_info(int devid, int revision)
+brw_get_device_info(int devid)
{
const struct brw_device_info *devinfo;
switch (devid) {
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 7bab5716b43..6f4a250e874 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -86,5 +86,5 @@ struct brw_device_info
/** @} */
};
-const struct brw_device_info *brw_get_device_info(int devid, int revision);
+const struct brw_device_info *brw_get_device_info(int devid);
const char *brw_get_device_name(int devid);
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index db23a187a93..df747107188 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -252,7 +252,7 @@ static const char *const pred_inv[2] = {
[1] = "-"
};
-static const char *const pred_ctrl_align16[16] = {
+const char *const pred_ctrl_align16[16] = {
[1] = "",
[2] = ".x",
[3] = ".y",
@@ -726,7 +726,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
string(file, "null");
- return -1;
+ break;
case BRW_ARF_ADDRESS:
format(file, "a%d", _reg_nr & 0x0f);
break;
@@ -908,7 +908,6 @@ src_ia1(FILE *file,
unsigned _addr_subreg_nr,
unsigned _negate,
unsigned __abs,
- unsigned _addr_mode,
unsigned _horiz_stride, unsigned _width, unsigned _vert_stride)
{
int err = 0;
@@ -1143,7 +1142,6 @@ src0(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_src0_ia_subreg_nr(devinfo, inst),
brw_inst_src0_negate(devinfo, inst),
brw_inst_src0_abs(devinfo, inst),
- brw_inst_src0_address_mode(devinfo, inst),
brw_inst_src0_hstride(devinfo, inst),
brw_inst_src0_width(devinfo, inst),
brw_inst_src0_vstride(devinfo, inst));
@@ -1200,7 +1198,6 @@ src1(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_src1_ia_subreg_nr(devinfo, inst),
brw_inst_src1_negate(devinfo, inst),
brw_inst_src1_abs(devinfo, inst),
- brw_inst_src1_address_mode(devinfo, inst),
brw_inst_src1_hstride(devinfo, inst),
brw_inst_src1_width(devinfo, inst),
brw_inst_src1_vstride(devinfo, inst));
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 1f4a3516fa2..40ec87d38f0 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -261,7 +261,7 @@ void
brw_disassemble(const struct brw_device_info *devinfo,
void *assembly, int start, int end, FILE *out)
{
- bool dump_hex = false;
+ bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
for (int offset = start; offset < end;) {
brw_inst *insn = assembly + offset;
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index f787ea3d4f8..07ace6bfbcb 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -1407,6 +1407,9 @@ void
brw_compact_instructions(struct brw_codegen *p, int start_offset,
int num_annotations, struct annotation *annotation)
{
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
+ return;
+
const struct brw_device_info *devinfo = p->devinfo;
void *store = p->store + start_offset / 16;
/* For an instruction at byte offset 16*i before compaction, this is the
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index bf2fee9ed48..a6fbb542919 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -410,7 +410,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
} else {
- brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset);
+ brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
}
}
@@ -2511,12 +2511,20 @@ brw_send_indirect_message(struct brw_codegen *p,
struct brw_reg desc)
{
const struct brw_device_info *devinfo = p->devinfo;
- struct brw_inst *send, *setup;
+ struct brw_inst *send;
+ int setup;
assert(desc.type == BRW_REGISTER_TYPE_UD);
+ /* We hold on to the setup instruction (the SEND in the direct case, the OR
+ * in the indirect case) by its index in the instruction store. The
+ * pointer returned by next_insn() may become invalid if emitting the SEND
+ * in the indirect case reallocs the store.
+ */
+
if (desc.file == BRW_IMMEDIATE_VALUE) {
- setup = send = next_insn(p, BRW_OPCODE_SEND);
+ setup = p->nr_insn;
+ send = next_insn(p, BRW_OPCODE_SEND);
brw_set_src1(p, send, desc);
} else {
@@ -2531,7 +2539,8 @@ brw_send_indirect_message(struct brw_codegen *p,
* caller can specify additional descriptor bits with the usual
* brw_set_*_message() helper functions.
*/
- setup = brw_OR(p, addr, desc, brw_imm_ud(0));
+ setup = p->nr_insn;
+ brw_OR(p, addr, desc, brw_imm_ud(0));
brw_pop_insn_state(p);
@@ -2543,7 +2552,7 @@ brw_send_indirect_message(struct brw_codegen *p,
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_inst_set_sfid(devinfo, send, sfid);
- return setup;
+ return &p->store[setup];
}
static struct brw_inst *
@@ -2906,11 +2915,10 @@ brw_untyped_surface_read(struct brw_codegen *p,
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, dst, payload, surface, msg_length,
brw_surface_payload_size(p, num_channels, true, true),
- align1);
+ false);
brw_set_dp_untyped_surface_read_message(
p, insn, num_channels);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8320cd77299..e218a85a363 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -88,8 +88,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case IMM:
case UNIFORM:
unreachable("Invalid destination register file");
- default:
- unreachable("Invalid register file");
}
this->writes_accumulator = false;
@@ -538,18 +536,6 @@ fs_visitor::get_timestamp(const fs_builder &bld)
*/
bld.group(4, 0).exec_all().MOV(dst, ts);
- /* The caller wants the low 32 bits of the timestamp. Since it's running
- * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
- * which is plenty of time for our purposes. It is identical across the
- * EUs, but since it's tracking GPU core speed it will increment at a
- * varying rate as render P-states change.
- *
- * The caller could also check if render P-states have changed (or anything
- * else that might disrupt timing) by setting smear to 2 and checking if
- * that field is != 0.
- */
- dst.set_smear(0);
-
return dst;
}
@@ -557,6 +543,14 @@ void
fs_visitor::emit_shader_time_begin()
{
shader_start_time = get_timestamp(bld.annotate("shader time start"));
+
+ /* We want only the low 32 bits of the timestamp. Since it's running
+ * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
+ * which is plenty of time for our purposes. It is identical across the
+ * EUs, but since it's tracking GPU core speed it will increment at a
+ * varying rate as render P-states change.
+ */
+ shader_start_time.set_smear(0);
}
void
@@ -570,6 +564,15 @@ fs_visitor::emit_shader_time_end()
fs_reg shader_end_time = get_timestamp(ibld);
+ /* We only use the low 32 bits of the timestamp - see
+ * emit_shader_time_begin()).
+ *
+ * We could also check if render P-states have changed (or anything
+ * else that might disrupt timing) by setting smear to 2 and checking if
+ * that field is != 0.
+ */
+ shader_end_time.set_smear(0);
+
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
@@ -700,10 +703,10 @@ fs_inst::components_read(unsigned i) const
return 2;
case FS_OPCODE_FB_WRITE_LOGICAL:
- assert(src[6].file == IMM);
+ assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
/* First/second FB write color. */
if (i < 2)
- return src[6].fixed_hw_reg.dw1.ud;
+ return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
else
return 1;
@@ -841,9 +844,8 @@ fs_inst::regs_read(int arg) const
REG_SIZE);
case MRF:
unreachable("MRF registers are not allowed as sources");
- default:
- unreachable("Invalid register file");
}
+ return 0;
}
bool
@@ -1283,9 +1285,9 @@ fs_visitor::emit_sampleid_setup()
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
if (key->compute_sample_id) {
- fs_reg t1 = vgrf(glsl_type::int_type);
- fs_reg t2 = vgrf(glsl_type::int_type);
- t2.type = BRW_REGISTER_TYPE_UW;
+ fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
+ t1.set_smear(0);
+ fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
@@ -1306,13 +1308,13 @@ fs_visitor::emit_sampleid_setup()
* are sample 1 of subspan 0; the third group is sample 0 of
* subspan 1, and finally sample 1 of subspan 1.
*/
- abld.exec_all()
- .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ abld.exec_all().group(1, 0)
+ .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
fs_reg(0xc0));
- abld.exec_all().SHR(t1, t1, fs_reg(5));
+ abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
/* This works for both SIMD8 and SIMD16 */
- abld.exec_all()
+ abld.exec_all().group(4, 0)
.MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210));
/* This special instruction takes care of setting vstride=1,
@@ -1443,6 +1445,9 @@ fs_visitor::calculate_urb_setup()
}
}
} else {
+ bool include_vue_header =
+ nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
/* We have enough input varyings that the SF/SBE pipeline stage can't
* arbitrarily rearrange them to suit our whim; we have to put them
* in an order that matches the output of the previous pipeline stage
@@ -1452,15 +1457,14 @@ fs_visitor::calculate_urb_setup()
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
nir->info.separate_shader);
- int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+ int first_slot =
+ include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
slot++) {
int varying = prev_stage_vue_map.slot_to_varying[slot];
- /* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is
- * unused.
- */
- if (varying != BRW_VARYING_SLOT_COUNT &&
+ if (varying != BRW_VARYING_SLOT_PAD &&
(nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(varying))) {
prog_data->urb_setup[varying] = slot - first_slot;
@@ -2615,7 +2619,7 @@ fs_visitor::eliminate_find_live_channel()
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
if (depth == 0) {
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = fs_reg(0);
+ inst->src[0] = fs_reg(0u);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
@@ -2643,8 +2647,9 @@ fs_visitor::emit_repclear_shader()
fs_inst *mov;
if (uniforms == 1) {
- mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ mov = bld.exec_all().group(4, 0)
+ .MOV(brw_message_reg(color_mrf),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
} else {
struct brw_reg reg =
brw_reg(BRW_GENERAL_REGISTER_FILE,
@@ -2653,8 +2658,8 @@ fs_visitor::emit_repclear_shader()
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(reg));
+ mov = bld.exec_all().group(4, 0)
+ .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
}
fs_inst *write;
@@ -3366,15 +3371,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const brw_wm_prog_key *key,
const fs_visitor::thread_payload &payload)
{
- assert(inst->src[6].file == IMM);
+ assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
const brw_device_info *devinfo = bld.shader->devinfo;
- const fs_reg &color0 = inst->src[0];
- const fs_reg &color1 = inst->src[1];
- const fs_reg &src0_alpha = inst->src[2];
- const fs_reg &src_depth = inst->src[3];
- const fs_reg &dst_depth = inst->src[4];
- fs_reg sample_mask = inst->src[5];
- const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud;
+ const fs_reg &color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];
+ const fs_reg &color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];
+ const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
+ const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
+ const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
+ const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
+ fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
+ const unsigned components =
+ inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3464,6 +3471,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
length++;
}
+ if (src_stencil.file != BAD_FILE) {
+ assert(devinfo->gen >= 9);
+ assert(bld.dispatch_width() != 16);
+
+ sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.exec_all().annotate("FB write OS")
+ .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
+ retype(src_stencil, BRW_REGISTER_TYPE_UB));
+ length++;
+ }
+
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
@@ -4073,7 +4091,7 @@ fs_visitor::lower_logical_sends()
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
- fs_reg(0xffff));
+ fs_reg());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
@@ -4202,10 +4220,12 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
/* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
* here.
*/
- assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE ||
+ assert(devinfo->gen != 6 ||
+ inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH].file == BAD_FILE ||
inst->exec_size == 8);
/* Dual-source FB writes are unsupported in SIMD16 mode. */
- return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size);
+ return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ?
+ 8 : inst->exec_size);
case SHADER_OPCODE_TXD_LOGICAL:
/* TXD is unsupported in SIMD16 mode. */
@@ -4499,9 +4519,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->dst.fixed_hw_reg.subnr)
fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
break;
- default:
- fprintf(file, "???");
- break;
+ case IMM:
+ unreachable("not reached");
}
fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
@@ -4594,9 +4613,6 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->src[i].fixed_hw_reg.abs)
fprintf(file, "|");
break;
- default:
- fprintf(file, "???");
- break;
}
if (inst->src[i].abs)
fprintf(file, "|");
@@ -4977,8 +4993,7 @@ fs_visitor::allocate_registers()
if (failed)
return;
- if (!allocated_without_spills)
- schedule_instructions(SCHEDULE_POST);
+ schedule_instructions(SCHEDULE_POST);
if (last_scratch > 0)
prog_data->total_scratch = brw_get_scratch_size(last_scratch);
@@ -5236,6 +5251,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->uses_omask =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data->computed_depth_mode = computed_depth_mode(shader);
+ prog_data->computed_stencil =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 50e98becf03..8058b344b7a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -145,6 +145,8 @@ public:
void assign_vs_urb_setup();
bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
+ void calculate_payload_ranges(int payload_node_count,
+ int *payload_last_use_ip);
void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
int first_payload_node);
int choose_spill_reg(struct ra_graph *g);
@@ -337,6 +339,7 @@ public:
int *push_constant_loc;
fs_reg frag_depth;
+ fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
unsigned output_components[VARYING_SLOT_MAX];
@@ -427,6 +430,8 @@ private:
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+ void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg src);
void generate_barrier(fs_inst *inst, struct brw_reg src);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index df10a9de293..f121f3463d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -390,14 +390,21 @@ namespace brw {
src_reg
emit_uniformize(const src_reg &src) const
{
+ /* FIXME: We use a vector chan_index and dst to allow constant and
+ * copy propagration to move result all the way into the consuming
+ * instruction (typically a surface index or sampler index for a
+ * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
+ * dispatch. Once we teach const/copy propagation about scalars we
+ * should go back to scalar destinations here.
+ */
const fs_builder ubld = exec_all();
- const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
- const dst_reg dst = component(vgrf(src.type), 0);
+ const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+ const dst_reg dst = vgrf(src.type);
ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
- ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
+ ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
- return src_reg(dst);
+ return src_reg(component(dst, 0));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 5589716239a..26204827156 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -416,9 +416,10 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
inst->src[arg].subreg_offset = offset % 32;
}
break;
- default:
- unreachable("Invalid register file");
- break;
+
+ case MRF:
+ case IMM:
+ unreachable("not reached");
}
if (has_source_modifiers) {
@@ -612,6 +613,21 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ /* We only propagate into the surface argument of the
+ * instruction. Everything else goes through LOAD_PAYLOAD.
+ */
+ if (i == 1) {
+ inst->src[i] = val;
+ progress = true;
+ }
+ break;
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_BROADCAST:
inst->src[i] = val;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index c7628dcc2f4..3a28c8d591d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -93,7 +93,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case SHADER_OPCODE_LOAD_PAYLOAD:
return !inst->is_copy_payload(v->alloc);
default:
- return inst->is_send_from_grf() && !inst->has_side_effects();
+ return inst->is_send_from_grf() && !inst->has_side_effects() &&
+ !inst->is_volatile();
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index bb7e792044f..e207a77fdc1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -42,9 +42,13 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
return BRW_MESSAGE_REGISTER_FILE;
case IMM:
return BRW_IMMEDIATE_VALUE;
- default:
+ case BAD_FILE:
+ case HW_REG:
+ case ATTR:
+ case UNIFORM:
unreachable("not reached");
}
+ return 0;
}
static struct brw_reg
@@ -116,7 +120,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
/* Probably unused. */
brw_reg = brw_null_reg();
break;
- default:
+ case ATTR:
+ case UNIFORM:
unreachable("not reached");
}
if (reg->abs)
@@ -317,6 +322,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
brw_imm_ud(inst->target));
}
+ /* Set computes stencil to render target */
+ if (prog_data->computed_stencil) {
+ brw_OR(p,
+ vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
+ vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(0x1 << 14));
+ }
+
implied_header = brw_null_reg();
} else {
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -437,6 +450,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
}
void
+fs_generator::generate_stencil_ref_packing(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(dispatch_width == 8);
+ assert(devinfo->gen >= 9);
+
+ /* Stencil value updates are provided in 8 slots of 1 byte per slot.
+ * Presumably, in order to save memory bandwidth, the stencil reference
+ * values written from the FS need to be packed into 2 dwords (this makes
+ * sense because the stencil values are limited to 1 byte each and a SIMD8
+ * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
+ *
+ * The spec is confusing here because in the payload definition of MDP_RTW_S8
+ * (Message Data Payload for Render Target Writes with Stencil 8b) the
+ * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
+ * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
+ * packed values specified above and diagrammed below:
+ *
+ * 31 0
+ * --------------------------------
+ * DW | |
+ * 2-7 | IGNORED |
+ * | |
+ * --------------------------------
+ * DW1 | STC | STC | STC | STC |
+ * | slot7 | slot6 | slot5 | slot4|
+ * --------------------------------
+ * DW0 | STC | STC | STC | STC |
+ * | slot3 | slot2 | slot1 | slot0|
+ * --------------------------------
+ */
+
+ src.vstride = BRW_VERTICAL_STRIDE_4;
+ src.width = BRW_WIDTH_1;
+ src.hstride = BRW_HORIZONTAL_STRIDE_0;
+ assert(src.type == BRW_REGISTER_TYPE_UB);
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
+}
+
+void
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
{
brw_barrier(p, src);
@@ -1455,18 +1509,18 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
assert(src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD);
- brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW);
- if (dispatch_width == 8) {
+ struct brw_reg reg = stride(src1, 1, 4, 0);
+ if (devinfo->gen >= 8 || dispatch_width == 8) {
brw_ADD(p, dst, src0, reg);
} else if (dispatch_width == 16) {
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_ADD(p, firsthalf(dst), firsthalf(src0), reg);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2));
+ brw_pop_insn_state(p);
}
- brw_pop_insn_state(p);
}
void
@@ -2182,6 +2236,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_barrier(inst, src[0]);
break;
+ case FS_OPCODE_PACK_STENCIL_REF:
+ generate_stencil_ref_packing(inst, dst, src[0]);
+ break;
+
default:
unreachable("Unsupported opcode");
@@ -2216,9 +2274,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
if (unlikely(debug_flag)) {
fprintf(stderr, "Native code for %s\n"
- "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
+ "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
" bytes (%.0f%%)\n",
- shader_name, dispatch_width, before_size / 16, loop_count,
+ shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count,
spill_count, fill_count, promoted_constants, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
@@ -2228,12 +2286,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
}
compiler->shader_debug_log(log_data,
- "%s SIMD%d shader: %d inst, %d loops, "
+ "%s SIMD%d shader: %d inst, %d loops, %u cycles, "
"%d:%d spills:fills, Promoted %u constants, "
"compacted %d to %d bytes.\n",
stage_abbrev, dispatch_width, before_size / 16,
- loop_count, spill_count, fill_count,
- promoted_constants, before_size, after_size);
+ loop_count, cfg->cycle_count, spill_count,
+ fill_count, promoted_constants, before_size,
+ after_size);
return start_offset;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7b5a0482519..486741bea31 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -71,6 +71,14 @@ fs_visitor::nir_setup_inputs()
var->data.origin_upper_left);
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
input, reg), 0xF);
+ } else if (var->data.location == VARYING_SLOT_LAYER) {
+ struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3);
+ reg.type = BRW_REGISTER_TYPE_D;
+ bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
+ } else if (var->data.location == VARYING_SLOT_VIEWPORT) {
+ struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3);
+ reg.type = BRW_REGISTER_TYPE_D;
+ bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
} else {
emit_general_interpolation(input, var->name, var->type,
(glsl_interp_qualifier) var->data.interpolation,
@@ -114,6 +122,8 @@ fs_visitor::nir_setup_outputs()
}
} else if (var->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = reg;
+ } else if (var->data.location == FRAG_RESULT_STENCIL) {
+ this->frag_stencil = reg;
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
this->sample_mask = reg;
} else {
@@ -896,12 +906,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
-
bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
- fs_reg neg_result(result);
- neg_result.negate = true;
- inst = bld.ADD(result, neg_result, fs_reg(31));
+
+ inst = bld.ADD(result, result, fs_reg(31));
inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
break;
}
@@ -1322,6 +1331,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_shader_clock: {
+ /* We cannot do anything if there is an event, so ignore it for now */
+ fs_reg shader_clock = get_timestamp(bld);
+ const fs_reg srcs[] = { shader_clock.set_smear(0), shader_clock.set_smear(1) };
+
+ bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0);
+ break;
+ }
+
case nir_intrinsic_image_size: {
/* Get the referenced image variable and type. */
const nir_variable *var = instr->variables[0]->var;
@@ -1509,7 +1527,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
fs_reg(stage_prog_data->binding_table.ssbo_start));
- surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -1520,34 +1537,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
/* Get the offset to read from */
- fs_reg offset_reg = vgrf(glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
+ fs_reg offset_reg;
if (has_indirect) {
- bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+ offset_reg = get_nir_src(instr->src[1]);
} else {
- const_offset_bytes = instr->const_index[0];
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ offset_reg = fs_reg(instr->const_index[0]);
}
/* Read the vector */
- for (int i = 0; i < instr->num_components; i++) {
- fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
- read_result.type = dest.type;
- bld.MOV(dest, read_result);
- dest = offset(dest, bld, 1);
-
- /* Vector components are stored contiguous in memory */
- if (i < instr->num_components) {
- if (!has_indirect) {
- const_offset_bytes += 4;
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- } else {
- bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
- }
- }
- }
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */,
+ instr->num_components,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ for (int i = 0; i < instr->num_components; i++)
+ bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
break;
}
@@ -1765,52 +1769,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[1]),
fs_reg(stage_prog_data->binding_table.ssbo_start));
- surf_index = bld.emit_uniformize(surf_index);
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
- /* Offset */
- fs_reg offset_reg = vgrf(glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
- if (has_indirect) {
- bld.MOV(offset_reg, get_nir_src(instr->src[2]));
- } else {
- const_offset_bytes = instr->const_index[0];
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- }
-
/* Value */
fs_reg val_reg = get_nir_src(instr->src[0]);
/* Writemask */
unsigned writemask = instr->const_index[1];
- /* Write each component present in the writemask */
- unsigned skipped_channels = 0;
- for (int i = 0; i < instr->num_components; i++) {
- int component_mask = 1 << i;
- if (writemask & component_mask) {
- if (skipped_channels) {
- if (!has_indirect) {
- const_offset_bytes += 4 * skipped_channels;
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- } else {
- bld.ADD(offset_reg, offset_reg,
- brw_imm_ud(4 * skipped_channels));
- }
- skipped_channels = 0;
- }
+ /* Combine groups of consecutive enabled channels in one write
+ * message. We use ffs to find the first enabled channel and then ffs on
+ * the bit-inverse, down-shifted writemask to determine the length of
+ * the block of enabled bits.
+ */
+ while (writemask) {
+ unsigned first_component = ffs(writemask) - 1;
+ unsigned length = ffs(~(writemask >> first_component)) - 1;
+ fs_reg offset_reg;
- emit_untyped_write(bld, surf_index, offset_reg,
- offset(val_reg, bld, i),
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
+ if (!has_indirect) {
+ offset_reg = fs_reg(instr->const_index[0] + 4 * first_component);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
+ fs_reg(4 * first_component));
}
- skipped_channels++;
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, first_component),
+ 1 /* dims */, length,
+ BRW_PREDICATE_NONE);
+
+ /* Clear the bits in the writemask that we just wrote, then try
+ * again to see if more channels are left.
+ */
+ writemask &= (15 << (first_component + length));
}
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 36388fad98d..9251d9552a5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -330,32 +330,12 @@ count_to_loop_end(const bblock_t *block)
unreachable("not reached");
}
-/**
- * Sets up interference between thread payload registers and the virtual GRFs
- * to be allocated for program temporaries.
- *
- * We want to be able to reallocate the payload for our virtual GRFs, notably
- * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
- * our 128 registers.
- *
- * The layout of the payload registers is:
- *
- * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
- * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
- * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
- *
- * And we have payload_node_count nodes covering these registers in order
- * (note that in SIMD16, a node is two registers).
- */
-void
-fs_visitor::setup_payload_interference(struct ra_graph *g,
- int payload_node_count,
- int first_payload_node)
+void fs_visitor::calculate_payload_ranges(int payload_node_count,
+ int *payload_last_use_ip)
{
int loop_depth = 0;
int loop_end_ip = 0;
- int payload_last_use_ip[payload_node_count];
for (int i = 0; i < payload_node_count; i++)
payload_last_use_ip[i] = -1;
@@ -426,6 +406,33 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
ip++;
}
+}
+
+
+/**
+ * Sets up interference between thread payload registers and the virtual GRFs
+ * to be allocated for program temporaries.
+ *
+ * We want to be able to reallocate the payload for our virtual GRFs, notably
+ * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
+ * our 128 registers.
+ *
+ * The layout of the payload registers is:
+ *
+ * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
+ * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
+ * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
+ *
+ * And we have payload_node_count nodes covering these registers in order
+ * (note that in SIMD16, a node is two registers).
+ */
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+ int payload_node_count,
+ int first_payload_node)
+{
+ int payload_last_use_ip[payload_node_count];
+ calculate_payload_ranges(payload_node_count, payload_last_use_ip);
for (int i = 0; i < payload_node_count; i++) {
if (payload_last_use_ip[i] == -1)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 7cc4f3c927a..5c57944ca39 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
const fs_reg dst_depth = (payload.dest_depth_reg ?
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
fs_reg());
- fs_reg src_depth;
+ fs_reg src_depth, src_stencil;
if (source_depth_to_render_target) {
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@@ -706,10 +706,14 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+ src_stencil = frag_stencil;
+
const fs_reg sources[] = {
- color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
- fs_reg(components)
+ color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
+ sample_mask, fs_reg(components)
};
+ assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
sources, ARRAY_SIZE(sources));
@@ -740,6 +744,16 @@ fs_visitor::emit_fb_writes()
no16("Missing support for simd16 depth writes on gen6\n");
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+ /* From the 'Render Target Write message' section of the docs:
+ * "Output Stencil is not supported with SIMD16 Render Target Write
+ * Messages."
+ *
+ * FINISHME: split 16 into 2 8s
+ */
+ no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+ }
+
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 00125c0f405..76ed237d88a 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_GS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->gs.base,
- &brw->gs.prog_data->base.base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ &brw->gs.base, &brw->gs.prog_data->base.base);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 7726e4b78a0..4417555f18e 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -97,7 +97,9 @@ byte_offset(fs_reg reg, unsigned delta)
case MRF:
reg.reg += delta / 32;
break;
- default:
+ case IMM:
+ case HW_REG:
+ case UNIFORM:
assert(delta == 0);
}
reg.subreg_offset += delta % 32;
@@ -119,7 +121,7 @@ horiz_offset(fs_reg reg, unsigned delta)
case MRF:
case ATTR:
return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
- default:
+ case HW_REG:
assert(delta == 0);
}
return reg;
@@ -163,7 +165,6 @@ half(fs_reg reg, unsigned idx)
case ATTR:
case HW_REG:
- default:
unreachable("Cannot take half of this register type");
}
return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 1b57b65db27..29642c6d2a4 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -161,9 +161,6 @@ public:
const src_reg &src1 = src_reg(),
const src_reg &src2 = src_reg());
- struct brw_reg get_dst(unsigned gen);
- struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
-
dst_reg dst;
src_reg src[3];
@@ -186,6 +183,27 @@ public:
return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
}
+ bool reads_flag(unsigned c)
+ {
+ if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
+ return true;
+
+ switch (predicate) {
+ case BRW_PREDICATE_NONE:
+ return false;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_X:
+ return c == 0;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
+ return c == 1;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
+ return c == 2;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_W:
+ return c == 3;
+ default:
+ return true;
+ }
+ }
+
bool writes_flag()
{
return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 9a33188cb5c..8c1a34ee17a 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -205,6 +205,9 @@ brw_create_nir(struct brw_context *brw,
if (shader_prog) {
nir_lower_samplers(nir, shader_prog);
nir_validate_shader(nir);
+
+ nir_lower_atomics(nir, shader_prog);
+ nir_validate_shader(nir);
}
brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar);
@@ -278,9 +281,6 @@ brw_postprocess_nir(nir_shader *nir,
nir_lower_system_values(nir);
nir_validate_shader(nir);
- nir_lower_atomics(nir);
- nir_validate_shader(nir);
-
nir_optimize(nir, is_scalar);
if (devinfo->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 87e7e011541..083c46a3726 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -205,7 +205,7 @@ enum PACKED brw_reg_type {
/** @} */
/** Immediates only: @{ */
- BRW_REGISTER_TYPE_UV,
+ BRW_REGISTER_TYPE_UV, /* Gen6+ */
BRW_REGISTER_TYPE_V,
BRW_REGISTER_TYPE_VF,
/** @} */
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index b710c60148c..88c45f74333 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -26,6 +26,7 @@
*/
#include "brw_fs.h"
+#include "brw_fs_live_variables.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
#include "brw_shader.h"
@@ -400,22 +401,49 @@ schedule_node::set_latency_gen7(bool is_haswell)
class instruction_scheduler {
public:
instruction_scheduler(backend_shader *s, int grf_count,
+ int hw_reg_count, int block_count,
instruction_scheduler_mode mode)
{
this->bs = s;
this->mem_ctx = ralloc_context(NULL);
this->grf_count = grf_count;
+ this->hw_reg_count = hw_reg_count;
this->instructions.make_empty();
this->instructions_to_schedule = 0;
this->post_reg_alloc = (mode == SCHEDULE_POST);
this->mode = mode;
this->time = 0;
if (!post_reg_alloc) {
- this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count);
- this->grf_active = rzalloc_array(mem_ctx, bool, grf_count);
+ this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count);
+
+ this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
+ for (int i = 0; i < block_count; i++)
+ this->livein[i] = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(grf_count));
+
+ this->liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
+ for (int i = 0; i < block_count; i++)
+ this->liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(grf_count));
+
+ this->hw_liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
+ for (int i = 0; i < block_count; i++)
+ this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(hw_reg_count));
+
+ this->written = rzalloc_array(mem_ctx, bool, grf_count);
+
+ this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count);
+
+ this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count);
} else {
- this->remaining_grf_uses = NULL;
- this->grf_active = NULL;
+ this->reg_pressure_in = NULL;
+ this->livein = NULL;
+ this->liveout = NULL;
+ this->hw_liveout = NULL;
+ this->written = NULL;
+ this->reads_remaining = NULL;
+ this->hw_reads_remaining = NULL;
}
}
@@ -442,7 +470,8 @@ public:
*/
virtual int issue_time(backend_instruction *inst) = 0;
- virtual void count_remaining_grf_uses(backend_instruction *inst) = 0;
+ virtual void count_reads_remaining(backend_instruction *inst) = 0;
+ virtual void setup_liveness(cfg_t *cfg) = 0;
virtual void update_register_pressure(backend_instruction *inst) = 0;
virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;
@@ -453,33 +482,63 @@ public:
bool post_reg_alloc;
int instructions_to_schedule;
int grf_count;
+ int hw_reg_count;
int time;
+ int reg_pressure;
+ int block_idx;
exec_list instructions;
backend_shader *bs;
instruction_scheduler_mode mode;
- /**
- * Number of instructions left to schedule that reference each vgrf.
- *
- * Used so that we can prefer scheduling instructions that will end the
- * live intervals of multiple variables, to reduce register pressure.
+ /*
+ * The register pressure at the beginning of each basic block.
*/
- int *remaining_grf_uses;
- /**
- * Tracks whether each VGRF has had an instruction scheduled that uses it.
- *
- * This is used to estimate whether scheduling a new instruction will
- * increase register pressure.
+ int *reg_pressure_in;
+
+ /*
+ * The virtual GRF's whose range overlaps the beginning of each basic block.
+ */
+
+ BITSET_WORD **livein;
+
+ /*
+ * The virtual GRF's whose range overlaps the end of each basic block.
+ */
+
+ BITSET_WORD **liveout;
+
+ /*
+ * The hardware GRF's whose range overlaps the end of each basic block.
+ */
+
+ BITSET_WORD **hw_liveout;
+
+ /*
+ * Whether we've scheduled a write for this virtual GRF yet.
+ */
+
+ bool *written;
+
+ /*
+ * How many reads we haven't scheduled for this virtual GRF yet.
+ */
+
+ int *reads_remaining;
+
+ /*
+ * How many reads we haven't scheduled for this hardware GRF yet.
*/
- bool *grf_active;
+
+ int *hw_reads_remaining;
};
class fs_instruction_scheduler : public instruction_scheduler
{
public:
- fs_instruction_scheduler(fs_visitor *v, int grf_count,
+ fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count,
+ int block_count,
instruction_scheduler_mode mode);
void calculate_deps();
bool is_compressed(fs_inst *inst);
@@ -487,35 +546,109 @@ public:
int issue_time(backend_instruction *inst);
fs_visitor *v;
- void count_remaining_grf_uses(backend_instruction *inst);
+ void count_reads_remaining(backend_instruction *inst);
+ void setup_liveness(cfg_t *cfg);
void update_register_pressure(backend_instruction *inst);
int get_register_pressure_benefit(backend_instruction *inst);
};
fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
- int grf_count,
+ int grf_count, int hw_reg_count,
+ int block_count,
instruction_scheduler_mode mode)
- : instruction_scheduler(v, grf_count, mode),
+ : instruction_scheduler(v, grf_count, hw_reg_count, block_count, mode),
v(v)
{
}
+static bool
+is_src_duplicate(fs_inst *inst, int src)
+{
+ for (int i = 0; i < src; i++)
+ if (inst->src[i].equals(inst->src[src]))
+ return true;
+
+ return false;
+}
+
void
-fs_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be)
+fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
{
fs_inst *inst = (fs_inst *)be;
- if (!remaining_grf_uses)
+ if (!reads_remaining)
return;
- if (inst->dst.file == GRF)
- remaining_grf_uses[inst->dst.reg]++;
-
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file != GRF)
+ if (is_src_duplicate(inst, i))
continue;
- remaining_grf_uses[inst->src[i].reg]++;
+ if (inst->src[i].file == GRF) {
+ reads_remaining[inst->src[i].reg]++;
+ } else if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ if (inst->src[i].fixed_hw_reg.nr >= hw_reg_count)
+ continue;
+
+ for (int j = 0; j < inst->regs_read(i); j++)
+ hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + j]++;
+ }
+ }
+}
+
+void
+fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
+{
+ /* First, compute liveness on a per-GRF level using the in/out sets from
+ * liveness calculation.
+ */
+ for (int block = 0; block < cfg->num_blocks; block++) {
+ for (int i = 0; i < v->live_intervals->num_vars; i++) {
+ if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) {
+ int vgrf = v->live_intervals->vgrf_from_var[i];
+ if (!BITSET_TEST(livein[block], vgrf)) {
+ reg_pressure_in[block] += v->alloc.sizes[vgrf];
+ BITSET_SET(livein[block], vgrf);
+ }
+ }
+
+ if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i))
+ BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]);
+ }
+ }
+
+ /* Now, extend the live in/live out sets for when a range crosses a block
+ * boundary, which matches what our register allocator/interference code
+ * does to account for force_writemask_all and incompatible exec_mask's.
+ */
+ for (int block = 0; block < cfg->num_blocks - 1; block++) {
+ for (int i = 0; i < grf_count; i++) {
+ if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip &&
+ v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) {
+ if (!BITSET_TEST(livein[block + 1], i)) {
+ reg_pressure_in[block + 1] += v->alloc.sizes[i];
+ BITSET_SET(livein[block + 1], i);
+ }
+
+ BITSET_SET(liveout[block], i);
+ }
+ }
+ }
+
+ int payload_last_use_ip[hw_reg_count];
+ v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip);
+
+ for (int i = 0; i < hw_reg_count; i++) {
+ if (payload_last_use_ip[i] == -1)
+ continue;
+
+ for (int block = 0; block < cfg->num_blocks; block++) {
+ if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i])
+ reg_pressure_in[block]++;
+
+ if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i])
+ BITSET_SET(hw_liveout[block], i);
+ }
}
}
@@ -524,18 +657,24 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
{
fs_inst *inst = (fs_inst *)be;
- if (!remaining_grf_uses)
+ if (!reads_remaining)
return;
if (inst->dst.file == GRF) {
- remaining_grf_uses[inst->dst.reg]--;
- grf_active[inst->dst.reg] = true;
+ written[inst->dst.reg] = true;
}
for (int i = 0; i < inst->sources; i++) {
+ if (is_src_duplicate(inst, i))
+ continue;
+
if (inst->src[i].file == GRF) {
- remaining_grf_uses[inst->src[i].reg]--;
- grf_active[inst->src[i].reg] = true;
+ reads_remaining[inst->src[i].reg]--;
+ } else if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
+ inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+ for (int off = 0; off < inst->regs_read(i); off++)
+ hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + off]--;
}
}
}
@@ -547,20 +686,31 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
int benefit = 0;
if (inst->dst.file == GRF) {
- if (remaining_grf_uses[inst->dst.reg] == 1)
- benefit += v->alloc.sizes[inst->dst.reg];
- if (!grf_active[inst->dst.reg])
+ if (!BITSET_TEST(livein[block_idx], inst->dst.reg) &&
+ !written[inst->dst.reg])
benefit -= v->alloc.sizes[inst->dst.reg];
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file != GRF)
+ if (is_src_duplicate(inst, i))
continue;
- if (remaining_grf_uses[inst->src[i].reg] == 1)
+ if (inst->src[i].file == GRF &&
+ !BITSET_TEST(liveout[block_idx], inst->src[i].reg) &&
+ reads_remaining[inst->src[i].reg] == 1)
benefit += v->alloc.sizes[inst->src[i].reg];
- if (!grf_active[inst->src[i].reg])
- benefit -= v->alloc.sizes[inst->src[i].reg];
+
+ if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
+ inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+ for (int off = 0; off < inst->regs_read(i); off++) {
+ int reg = inst->src[i].fixed_hw_reg.nr + off;
+ if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
+ hw_reads_remaining[reg] == 1) {
+ benefit++;
+ }
+ }
+ }
}
return benefit;
@@ -575,20 +725,26 @@ public:
int issue_time(backend_instruction *inst);
vec4_visitor *v;
- void count_remaining_grf_uses(backend_instruction *inst);
+ void count_reads_remaining(backend_instruction *inst);
+ void setup_liveness(cfg_t *cfg);
void update_register_pressure(backend_instruction *inst);
int get_register_pressure_benefit(backend_instruction *inst);
};
vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
int grf_count)
- : instruction_scheduler(v, grf_count, SCHEDULE_POST),
+ : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST),
v(v)
{
}
void
-vec4_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be)
+vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+{
+}
+
+void
+vec4_instruction_scheduler::setup_liveness(cfg_t *cfg)
{
}
@@ -822,7 +978,7 @@ fs_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -927,10 +1083,10 @@ fs_instruction_scheduler::calculate_deps()
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_read(i); r++)
- add_dep(n, last_grf_write[inst->src[i].reg + r]);
+ add_dep(n, last_grf_write[inst->src[i].reg + r], 0);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]);
+ add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0);
}
}
} else if (inst->src[i].file == HW_REG &&
@@ -941,17 +1097,17 @@ fs_instruction_scheduler::calculate_deps()
if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
size = 1;
for (int r = 0; r < size; r++)
- add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
+ add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
} else {
- add_dep(n, last_fixed_grf_write);
+ add_dep(n, last_fixed_grf_write, 0);
}
} else if (inst->src[i].is_accumulator()) {
- add_dep(n, last_accumulator_write);
+ add_dep(n, last_accumulator_write, 0);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -1080,7 +1236,7 @@ vec4_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
/* No reads from MRF, and ATTR is already translated away */
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
@@ -1177,7 +1333,7 @@ vec4_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
add_barrier_deps(n);
@@ -1387,6 +1543,9 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
const struct brw_device_info *devinfo = bs->devinfo;
backend_instruction *inst = block->end();
time = 0;
+ if (!post_reg_alloc)
+ reg_pressure = reg_pressure_in[block->num];
+ block_idx = block->num;
/* Remove non-DAG heads from the list. */
foreach_in_list_safe(schedule_node, n, &instructions) {
@@ -1403,23 +1562,30 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
chosen->remove();
inst->insert_before(block, chosen->inst);
instructions_to_schedule--;
- update_register_pressure(chosen->inst);
- /* Update the clock for how soon an instruction could start after the
- * chosen one.
- */
- time += issue_time(chosen->inst);
+ if (!post_reg_alloc) {
+ reg_pressure -= get_register_pressure_benefit(chosen->inst);
+ update_register_pressure(chosen->inst);
+ }
/* If we expected a delay for scheduling, then bump the clock to reflect
- * that as well. In reality, the hardware will switch to another
- * hyperthread and may not return to dispatching our thread for a while
- * even after we're unblocked.
+ * that. In reality, the hardware will switch to another hyperthread
+ * and may not return to dispatching our thread for a while even after
+ * we're unblocked. After this, we have the time when the chosen
+ * instruction will start executing.
*/
time = MAX2(time, chosen->unblocked_time);
+ /* Update the clock for how soon an instruction could start after the
+ * chosen one.
+ */
+ time += issue_time(chosen->inst);
+
if (debug) {
fprintf(stderr, "clock %4d, scheduled: ", time);
bs->dump_instruction(chosen->inst);
+ if (!post_reg_alloc)
+ fprintf(stderr, "(register pressure %d)\n", reg_pressure);
}
/* Now that we've scheduled a new instruction, some of its
@@ -1466,30 +1632,53 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
if (block->end()->opcode == BRW_OPCODE_NOP)
block->end()->remove(block);
assert(instructions_to_schedule == 0);
+
+ block->cycle_count = time;
+}
+
+static unsigned get_cycle_count(cfg_t *cfg)
+{
+ unsigned count = 0, multiplier = 1;
+ foreach_block(block, cfg) {
+ if (block->start()->opcode == BRW_OPCODE_DO)
+ multiplier *= 10; /* assume that loops execute ~10 times */
+
+ count += block->cycle_count * multiplier;
+
+ if (block->end()->opcode == BRW_OPCODE_WHILE)
+ multiplier /= 10;
+ }
+
+ return count;
}
void
instruction_scheduler::run(cfg_t *cfg)
{
- if (debug) {
+ if (debug && !post_reg_alloc) {
fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bs->dump_instructions();
+ bs->dump_instructions();
}
- /* Populate the remaining GRF uses array to improve the pre-regalloc
- * scheduling.
- */
- if (remaining_grf_uses) {
- foreach_block_and_inst(block, backend_instruction, inst, cfg) {
- count_remaining_grf_uses(inst);
- }
- }
+ if (!post_reg_alloc)
+ setup_liveness(cfg);
foreach_block(block, cfg) {
if (block->end_ip - block->start_ip <= 1)
continue;
+ if (reads_remaining) {
+ memset(reads_remaining, 0,
+ grf_count * sizeof(*reads_remaining));
+ memset(hw_reads_remaining, 0,
+ hw_reg_count * sizeof(*hw_reads_remaining));
+ memset(written, 0, grf_count * sizeof(*written));
+
+ foreach_inst_in_block(fs_inst, inst, block)
+ count_reads_remaining(inst);
+ }
+
add_insts_from_block(block);
calculate_deps();
@@ -1501,23 +1690,29 @@ instruction_scheduler::run(cfg_t *cfg)
schedule_instructions(block);
}
- if (debug) {
+ if (debug && !post_reg_alloc) {
fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n",
post_reg_alloc);
bs->dump_instructions();
}
+
+ cfg->cycle_count = get_cycle_count(cfg);
}
void
fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
{
+ if (mode != SCHEDULE_POST)
+ calculate_live_intervals();
+
int grf_count;
if (mode == SCHEDULE_POST)
grf_count = grf_used;
else
grf_count = alloc.count;
- fs_instruction_scheduler sched(this, grf_count, mode);
+ fs_instruction_scheduler sched(this, grf_count, first_non_payload_grf,
+ cfg->num_blocks, mode);
sched.run(cfg);
if (unlikely(debug_enabled) && mode == SCHEDULE_POST) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index e48f559afa7..063cb84a958 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -298,6 +298,8 @@ brw_instruction_name(enum opcode op)
return "fb_write";
case FS_OPCODE_FB_WRITE_LOGICAL:
return "fb_write_logical";
+ case FS_OPCODE_PACK_STENCIL_REF:
+ return "pack_stencil_ref";
case FS_OPCODE_BLORP_FB_WRITE:
return "blorp_fb_write";
case FS_OPCODE_REP_FB_WRITE:
@@ -988,6 +990,20 @@ backend_instruction::has_side_effects() const
}
}
+bool
+backend_instruction::is_volatile() const
+{
+ switch (opcode) {
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ return true;
+ default:
+ return false;
+ }
+}
+
#ifndef NDEBUG
static bool
inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
@@ -1178,9 +1194,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
}
- if (shader_prog && shader_prog->NumAtomicBuffers) {
+ if (shader && shader->NumAtomicBuffers) {
stage_prog_data->binding_table.abo_start = next_binding_table_offset;
- next_binding_table_offset += shader_prog->NumAtomicBuffers;
+ next_binding_table_offset += shader->NumAtomicBuffers;
} else {
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 8899b30c1ae..f4647cca4f9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -115,6 +115,12 @@ struct backend_instruction : public exec_node {
* optimize these out unless you know what you are doing.
*/
bool has_side_effects() const;
+
+ /**
+ * True if the instruction might be affected by side effects of other
+ * instructions.
+ */
+ bool is_volatile() const;
#else
struct backend_instruction {
struct exec_node link;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index dc2b9415673..2aa1248fea6 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -49,6 +49,7 @@ extern const struct brw_tracked_state brw_clip_unit;
extern const struct brw_tracked_state brw_vs_pull_constants;
extern const struct brw_tracked_state brw_gs_pull_constants;
extern const struct brw_tracked_state brw_wm_pull_constants;
+extern const struct brw_tracked_state brw_cs_pull_constants;
extern const struct brw_tracked_state brw_constant_buffer;
extern const struct brw_tracked_state brw_curbe_offsets;
extern const struct brw_tracked_state brw_invariant_state;
@@ -220,7 +221,7 @@ bool brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
- uint32_t *inout_offset, void *out_aux);
+ uint32_t *inout_offset, void *inout_aux);
void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
@@ -345,7 +346,8 @@ calculate_attr_overrides(const struct brw_context *brw,
uint16_t *attr_overrides,
uint32_t *point_sprite_enables,
uint32_t *flat_enables,
- uint32_t *urb_entry_read_length);
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset);
/* gen6_surface_state.c */
void gen6_init_vtable_surface_functions(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 2fbcd146750..f7c0a2037d9 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -137,7 +137,7 @@ bool
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key, GLuint key_size,
- uint32_t *inout_offset, void *out_aux)
+ uint32_t *inout_offset, void *inout_aux)
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item;
@@ -155,11 +155,12 @@ brw_search_cache(struct brw_cache *cache,
if (item == NULL)
return false;
- *(void **)out_aux = ((char *)item->key + item->key_size);
+ void *aux = ((char *) item->key) + item->key_size;
- if (item->offset != *inout_offset) {
+ if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
brw->ctx.NewDriverState |= (1 << cache_id);
*inout_offset = item->offset;
+ *((void **) inout_aux) = aux;
}
return true;
@@ -349,11 +350,6 @@ brw_init_caches(struct brw_context *brw)
4096, 64);
if (brw->has_llc)
drm_intel_gem_bo_map_unsynchronized(cache->bo);
-
- cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_CS_PROG] = brw_stage_prog_data_free;
}
static void
@@ -367,9 +363,12 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
- if (cache->aux_free[c->cache_id]) {
+ if (c->cache_id == BRW_CACHE_VS_PROG ||
+ c->cache_id == BRW_CACHE_GS_PROG ||
+ c->cache_id == BRW_CACHE_FS_PROG ||
+ c->cache_id == BRW_CACHE_CS_PROG) {
const void *item_aux = c->key + c->key_size;
- cache->aux_free[c->cache_id](item_aux);
+ brw_stage_prog_data_free(item_aux);
}
free((void *)c->key);
free(c);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 79b8301954e..0344b8a7fb0 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
&brw_state_base_address,
&brw_cs_image_surfaces,
&gen7_cs_push_constants,
+ &brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
&brw_texture_surfaces,
@@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
&gen8_state_base_address,
&brw_cs_image_surfaces,
&gen7_cs_push_constants,
+ &brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
&brw_texture_surfaces,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3e7078d0b32..01eb1580953 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1370,9 +1370,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
vec4_instruction *inst = (vec4_instruction *)be_inst;
if (inst->predicate) {
- fprintf(file, "(%cf0.%d) ",
+ fprintf(file, "(%cf0.%d%s) ",
inst->predicate_inverse ? '-' : '+',
- inst->flag_subreg);
+ inst->flag_subreg,
+ pred_ctrl_align16[inst->predicate]);
}
fprintf(file, "%s", brw_instruction_name(inst->opcode));
@@ -1426,9 +1427,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case BAD_FILE:
fprintf(file, "(null)");
break;
- default:
- fprintf(file, "???");
- break;
+ case IMM:
+ case ATTR:
+ case UNIFORM:
+ unreachable("not reached");
}
if (inst->dst.writemask != WRITEMASK_XYZW) {
fprintf(file, ".");
@@ -1520,9 +1522,8 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case BAD_FILE:
fprintf(file, "(null)");
break;
- default:
- fprintf(file, "???");
- break;
+ case MRF:
+ unreachable("not reached");
}
/* Don't print .0; and only VGRFs have reg_offsets and sizes */
@@ -1787,13 +1788,100 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(time, src_reg(value)));
+ emit(MOV(time, value));
vec4_instruction *inst =
emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
inst->mlen = 2;
}
+void
+vec4_visitor::convert_to_hw_regs()
+{
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ for (int i = 0; i < 3; i++) {
+ struct src_reg &src = inst->src[i];
+ struct brw_reg reg;
+ switch (src.file) {
+ case GRF:
+ reg = brw_vec8_grf(src.reg + src.reg_offset, 0);
+ reg.type = src.type;
+ reg.dw1.bits.swizzle = src.swizzle;
+ reg.abs = src.abs;
+ reg.negate = src.negate;
+ break;
+
+ case IMM:
+ reg = brw_imm_reg(src.type);
+ reg.dw1.ud = src.fixed_hw_reg.dw1.ud;
+ break;
+
+ case UNIFORM:
+ reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
+ (src.reg + src.reg_offset) / 2,
+ ((src.reg + src.reg_offset) % 2) * 4),
+ 0, 4, 1);
+ reg.type = src.type;
+ reg.dw1.bits.swizzle = src.swizzle;
+ reg.abs = src.abs;
+ reg.negate = src.negate;
+
+ /* This should have been moved to pull constants. */
+ assert(!src.reladdr);
+ break;
+
+ case HW_REG:
+ assert(src.type == src.fixed_hw_reg.type);
+ continue;
+
+ case BAD_FILE:
+ /* Probably unused. */
+ reg = brw_null_reg();
+ break;
+
+ case MRF:
+ case ATTR:
+ unreachable("not reached");
+ }
+ src.fixed_hw_reg = reg;
+ }
+
+ dst_reg &dst = inst->dst;
+ struct brw_reg reg;
+
+ switch (inst->dst.file) {
+ case GRF:
+ reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ reg.type = dst.type;
+ reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case MRF:
+ assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ reg = brw_message_reg(dst.reg + dst.reg_offset);
+ reg.type = dst.type;
+ reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case HW_REG:
+ assert(dst.type == dst.fixed_hw_reg.type);
+ reg = dst.fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ reg = brw_null_reg();
+ break;
+
+ case IMM:
+ case ATTR:
+ case UNIFORM:
+ unreachable("not reached");
+ }
+
+ dst.fixed_hw_reg = reg;
+ }
+}
+
bool
vec4_visitor::run()
{
@@ -1862,6 +1950,7 @@ vec4_visitor::run()
OPT(dead_code_eliminate);
OPT(dead_control_flow_eliminate, this);
OPT(opt_copy_propagation);
+ OPT(opt_cmod_propagation);
OPT(opt_cse);
OPT(opt_algebraic);
OPT(opt_register_coalesce);
@@ -1914,6 +2003,8 @@ vec4_visitor::run()
opt_set_dependency_control();
+ convert_to_hw_regs();
+
if (last_scratch > 0) {
prog_data->base.total_scratch =
brw_get_scratch_size(last_scratch * REG_SIZE);
@@ -2020,9 +2111,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
return NULL;
}
- vec4_generator g(compiler, log_data, &prog_data->base,
- mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
- assembly = g.generate_assembly(v.cfg, final_assembly_size, shader);
+ assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
+ shader, &prog_data->base, v.cfg,
+ final_assembly_size);
}
return assembly;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index d861b2e85df..ec8abf49cd8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -52,6 +52,15 @@ extern "C" {
extern "C" {
#endif
+const unsigned *
+brw_vec4_generate_assembly(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg,
+ unsigned *out_assembly_size);
+
#ifdef __cplusplus
} /* extern "C" */
@@ -149,6 +158,7 @@ public:
int var_range_start(unsigned v, unsigned n) const;
int var_range_end(unsigned v, unsigned n) const;
bool virtual_grf_interferes(int a, int b);
+ bool opt_cmod_propagation();
bool opt_copy_propagation(bool do_constant_prop = true);
bool opt_cse_local(bblock_t *block);
bool opt_cse();
@@ -158,6 +168,7 @@ public:
bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
void opt_set_dependency_control();
void opt_schedule_instructions();
+ void convert_to_hw_regs();
vec4_instruction *emit(vec4_instruction *inst);
@@ -381,117 +392,6 @@ private:
unsigned last_scratch; /**< measured in 32-byte (register size) units */
};
-
-/**
- * The vertex shader code generator.
- *
- * Translates VS IR to actual i965 assembly code.
- */
-class vec4_generator
-{
-public:
- vec4_generator(const struct brw_compiler *compiler, void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev);
- ~vec4_generator();
-
- const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
- const nir_shader *nir);
-
-private:
- void generate_code(const cfg_t *cfg, const nir_shader *nir);
-
- void generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
-
- void generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index);
-
- void generate_vs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write_allocate(vec4_instruction *inst);
- void generate_gs_thread_end(vec4_instruction *inst);
- void generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
- void generate_gs_prepare_channel_masks(struct brw_reg dst);
- void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
- void generate_gs_get_instance_id(struct brw_reg dst);
- void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2);
- void generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_primitive_id(struct brw_reg dst);
- void generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index);
- void generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
- void generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index);
- void generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset);
- void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst);
-
- void generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
-
- void generate_unpack_flags(struct brw_reg dst);
-
- const struct brw_compiler *compiler;
- void *log_data; /* Passed to compiler->*_log functions */
-
- const struct brw_device_info *devinfo;
-
- struct brw_codegen *p;
-
- struct brw_vue_prog_data *prog_data;
-
- void *mem_ctx;
- const char *stage_name;
- const char *stage_abbrev;
- const bool debug_flag;
-};
-
} /* namespace brw */
#endif /* __cplusplus */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
new file mode 100644
index 00000000000..329f24269ce
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file brw_vec4_cmod_propagation.cpp
+ *
+ * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check
+ * brw_fs_cmod_propagation for further details on the rationale behind this
+ * optimization.
+ */
+
+#include "brw_vec4.h"
+#include "brw_cfg.h"
+
+namespace brw {
+
+static bool
+opt_cmod_propagation_local(bblock_t *block)
+{
+ bool progress = false;
+ int ip = block->end_ip + 1;
+
+ foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
+ ip--;
+
+ if ((inst->opcode != BRW_OPCODE_AND &&
+ inst->opcode != BRW_OPCODE_CMP &&
+ inst->opcode != BRW_OPCODE_MOV) ||
+ inst->predicate != BRW_PREDICATE_NONE ||
+ !inst->dst.is_null() ||
+ inst->src[0].file != GRF ||
+ inst->src[0].abs)
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_AND &&
+ !(inst->src[1].is_one() &&
+ inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ !inst->src[0].negate))
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->conditional_mod != BRW_CONDITIONAL_NZ)
+ continue;
+
+ bool read_flag = false;
+ foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
+ if (inst->src[0].in_range(scan_inst->dst,
+ scan_inst->regs_written)) {
+ if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
+ scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
+ (scan_inst->dst.writemask != WRITEMASK_X &&
+ scan_inst->dst.writemask != WRITEMASK_XYZW) ||
+ (scan_inst->dst.writemask == WRITEMASK_XYZW &&
+ inst->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
+ (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) {
+ break;
+ }
+
+ /* CMP's result is the same regardless of dest type. */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ scan_inst->opcode == BRW_OPCODE_CMP &&
+ (inst->dst.type == BRW_REGISTER_TYPE_D ||
+ inst->dst.type == BRW_REGISTER_TYPE_UD)) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+
+ /* If the AND wasn't handled by the previous case, it isn't safe
+ * to remove it.
+ */
+ if (inst->opcode == BRW_OPCODE_AND)
+ break;
+
+ /* Comparisons operate differently for ints and floats */
+ if (scan_inst->dst.type != inst->dst.type &&
+ (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
+ inst->dst.type == BRW_REGISTER_TYPE_F))
+ break;
+
+ /* If the instruction generating inst's source also wrote the
+ * flag, and inst is doing a simple .nz comparison, then inst
+ * is redundant - the appropriate value is already in the flag
+ * register. Delete inst.
+ */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ !inst->src[0].negate &&
+ scan_inst->writes_flag()) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+
+ /* Otherwise, try propagating the conditional. */
+ enum brw_conditional_mod cond =
+ inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
+ : inst->conditional_mod;
+
+ if (scan_inst->can_do_cmod() &&
+ ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
+ scan_inst->conditional_mod == cond)) {
+ scan_inst->conditional_mod = cond;
+ inst->remove(block);
+ progress = true;
+ }
+ break;
+ }
+
+ if (scan_inst->writes_flag())
+ break;
+
+ read_flag = read_flag || scan_inst->reads_flag();
+ }
+ }
+
+ return progress;
+}
+
+bool
+vec4_visitor::opt_cmod_propagation()
+{
+ bool progress = false;
+
+ foreach_block_reverse(block, cfg) {
+ progress = opt_cmod_propagation_local(block) || progress;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 8fc7a365bfc..284e0a8d0a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -78,13 +78,19 @@ vec4_visitor::dead_code_eliminate()
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
- if (inst->dst.file == GRF && !inst->has_side_effects()) {
+ if ((inst->dst.file == GRF && !inst->has_side_effects()) ||
+ (inst->dst.is_null() && inst->writes_flag())){
bool result_live[4] = { false };
- for (unsigned i = 0; i < inst->regs_written; i++) {
- for (int c = 0; c < 4; c++)
- result_live[c] |= BITSET_TEST(
- live, var_from_reg(alloc, offset(inst->dst, i), c));
+ if (inst->dst.file == GRF) {
+ for (unsigned i = 0; i < inst->regs_written; i++) {
+ for (int c = 0; c < 4; c++)
+ result_live[c] |= BITSET_TEST(
+ live, var_from_reg(alloc, offset(inst->dst, i), c));
+ }
+ } else {
+ for (unsigned c = 0; c < 4; c++)
+ result_live[c] = BITSET_TEST(flag_live, c);
}
/* If the instruction can't do writemasking, then it's all or
@@ -117,7 +123,11 @@ vec4_visitor::dead_code_eliminate()
}
if (inst->dst.is_null() && inst->writes_flag()) {
- if (!BITSET_TEST(flag_live, 0)) {
+ bool combined_live = false;
+ for (unsigned c = 0; c < 4; c++)
+ combined_live |= BITSET_TEST(flag_live, c);
+
+ if (!combined_live) {
inst->opcode = BRW_OPCODE_NOP;
progress = true;
continue;
@@ -136,7 +146,8 @@ vec4_visitor::dead_code_eliminate()
}
if (inst->writes_flag()) {
- BITSET_CLEAR(flag_live, 0);
+ for (unsigned c = 0; c < 4; c++)
+ BITSET_CLEAR(flag_live, c);
}
for (int i = 0; i < 3; i++) {
@@ -150,8 +161,10 @@ vec4_visitor::dead_code_eliminate()
}
}
- if (inst->reads_flag()) {
- BITSET_SET(flag_live, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (inst->reads_flag(c)) {
+ BITSET_SET(flag_live, c);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a84f6c47471..8bc21df5ffc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -20,146 +20,17 @@
* IN THE SOFTWARE.
*/
-#include <ctype.h>
#include "glsl/glsl_parser_extras.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
-extern "C" {
-#include "brw_eu.h"
-#include "main/macros.h"
-#include "program/prog_print.h"
-#include "program/prog_parameter.h"
-};
+using namespace brw;
-namespace brw {
-
-struct brw_reg
-vec4_instruction::get_dst(unsigned gen)
-{
- struct brw_reg brw_reg;
-
- switch (dst.file) {
- case GRF:
- brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
- brw_reg = retype(brw_reg, dst.type);
- brw_reg.dw1.bits.writemask = dst.writemask;
- break;
-
- case MRF:
- assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
- brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
- brw_reg = retype(brw_reg, dst.type);
- brw_reg.dw1.bits.writemask = dst.writemask;
- break;
-
- case HW_REG:
- assert(dst.type == dst.fixed_hw_reg.type);
- brw_reg = dst.fixed_hw_reg;
- break;
-
- case BAD_FILE:
- brw_reg = brw_null_reg();
- break;
-
- default:
- unreachable("not reached");
- }
- return brw_reg;
-}
-
-struct brw_reg
-vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
-{
- struct brw_reg brw_reg;
-
- switch (src[i].file) {
- case GRF:
- brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
- brw_reg = retype(brw_reg, src[i].type);
- brw_reg.dw1.bits.swizzle = src[i].swizzle;
- if (src[i].abs)
- brw_reg = brw_abs(brw_reg);
- if (src[i].negate)
- brw_reg = negate(brw_reg);
- break;
-
- case IMM:
- switch (src[i].type) {
- case BRW_REGISTER_TYPE_F:
- brw_reg = brw_imm_f(src[i].fixed_hw_reg.dw1.f);
- break;
- case BRW_REGISTER_TYPE_D:
- brw_reg = brw_imm_d(src[i].fixed_hw_reg.dw1.d);
- break;
- case BRW_REGISTER_TYPE_UD:
- brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud);
- break;
- case BRW_REGISTER_TYPE_VF:
- brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud);
- break;
- default:
- unreachable("not reached");
- }
- break;
-
- case UNIFORM:
- brw_reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
- (src[i].reg + src[i].reg_offset) / 2,
- ((src[i].reg + src[i].reg_offset) % 2) * 4),
- 0, 4, 1);
- brw_reg = retype(brw_reg, src[i].type);
- brw_reg.dw1.bits.swizzle = src[i].swizzle;
- if (src[i].abs)
- brw_reg = brw_abs(brw_reg);
- if (src[i].negate)
- brw_reg = negate(brw_reg);
-
- /* This should have been moved to pull constants. */
- assert(!src[i].reladdr);
- break;
-
- case HW_REG:
- assert(src[i].type == src[i].fixed_hw_reg.type);
- brw_reg = src[i].fixed_hw_reg;
- break;
-
- case BAD_FILE:
- /* Probably unused. */
- brw_reg = brw_null_reg();
- break;
- case ATTR:
- default:
- unreachable("not reached");
- }
-
- return brw_reg;
-}
-
-vec4_generator::vec4_generator(const struct brw_compiler *compiler,
- void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev)
- : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
- prog_data(prog_data),
- mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
- debug_flag(debug_flag)
-{
- p = rzalloc(mem_ctx, struct brw_codegen);
- brw_init_codegen(devinfo, p, mem_ctx);
-}
-
-vec4_generator::~vec4_generator()
-{
-}
-
-void
-vec4_generator::generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_math1_gen4(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
{
gen4_math(p,
dst,
@@ -178,11 +49,12 @@ check_gen6_math_src_arg(struct brw_reg src)
assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
}
-void
-vec4_generator::generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_math_gen6(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* Can't do writemask because math can't be align16. */
assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
@@ -196,11 +68,12 @@ vec4_generator::generate_math_gen6(vec4_instruction *inst,
brw_set_default_access_mode(p, BRW_ALIGN_16);
}
-void
-vec4_generator::generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_math2_gen4(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
* "Message Payload":
@@ -229,12 +102,15 @@ vec4_generator::generate_math2_gen4(vec4_instruction *inst,
BRW_MATH_PRECISION_FULL);
}
-void
-vec4_generator::generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index)
+static void
+generate_tex(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg sampler_index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
int msg_type = -1;
if (devinfo->gen >= 5) {
@@ -440,8 +316,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
}
}
-void
-vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
+static void
+generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -454,8 +330,8 @@ vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
+static void
+generate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
brw_urb_WRITE(p,
@@ -469,14 +345,14 @@ vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
+static void
+generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
/* We pass the temporary passed in src0 as the writeback register */
brw_urb_WRITE(p,
- inst->get_src(this->prog_data, 0), /* dest */
+ inst->src[0].fixed_hw_reg, /* dest */
inst->base_mrf, /* starting mrf reg nr */
src,
BRW_URB_WRITE_ALLOCATE_COMPLETE,
@@ -489,14 +365,13 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
- get_element_ud(inst->get_src(this->prog_data, 0), 0));
- brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0),
+ get_element_ud(inst->src[0].fixed_hw_reg, 0));
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
+static void
+generate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
brw_urb_WRITE(p,
@@ -510,10 +385,11 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_set_write_offset(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.3):
@@ -536,29 +412,29 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- assert(devinfo->gen >= 7 &&
+ assert(p->devinfo->gen >= 7 &&
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
src1.dw1.ud <= USHRT_MAX);
- if (src0.file == IMM) {
+ if (src0.file == BRW_IMMEDIATE_VALUE) {
brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
} else {
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
retype(src1, BRW_REGISTER_TYPE_UW));
}
- brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_set_vertex_count(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- if (devinfo->gen >= 8) {
+ if (p->devinfo->gen >= 8) {
/* Move the vertex count into the second MRF for the EOT write. */
brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
src);
@@ -580,16 +456,17 @@ vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
brw_MOV(p,
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
- brw_set_default_access_mode(p, BRW_ALIGN_16);
}
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_svb_write(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
int binding = inst->sol_binding;
bool final_write = inst->sol_final_write;
@@ -623,12 +500,12 @@ vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_svb_set_destination_index(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
{
-
int vertex = inst->sol_vertex;
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -637,8 +514,10 @@ vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
+static void
+generate_gs_set_dword_2(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -647,8 +526,9 @@ vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
+static void
+generate_gs_prepare_channel_masks(struct brw_codegen *p,
+ struct brw_reg dst)
{
/* We want to left shift just DWORD 4 (the x component belonging to the
* second geometry shader invocation) by 4 bits. So generate the
@@ -664,9 +544,10 @@ vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_set_channel_masks(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
/* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.5):
@@ -727,8 +608,9 @@ vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
+static void
+generate_gs_get_instance_id(struct brw_codegen *p,
+ struct brw_reg dst)
{
/* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
* and store into dst.0 & dst.4. So generate the instruction:
@@ -744,11 +626,12 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2)
+static void
+generate_gs_ff_sync_set_primitives(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -765,11 +648,12 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_ff_sync(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* This opcode uses an implied MRF register for:
* - the header of the ff_sync message. And as such it is expected to be
@@ -811,8 +695,8 @@ vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
+static void
+generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
/* In gen6, PrimitiveID is delivered in R0.1 of the payload */
struct brw_reg src = brw_vec8_grf(0, 0);
@@ -823,13 +707,14 @@ vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index)
+static void
+generate_oword_dual_block_offsets(struct brw_codegen *p,
+ struct brw_reg m1,
+ struct brw_reg index)
{
int second_vertex_offset;
- if (devinfo->gen >= 6)
+ if (p->devinfo->gen >= 6)
second_vertex_offset = 1;
else
second_vertex_offset = 16;
@@ -860,8 +745,9 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_unpack_flags(struct brw_reg dst)
+static void
+generate_unpack_flags(struct brw_codegen *p,
+ struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -878,16 +764,18 @@ vec4_generator::generate_unpack_flags(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index)
+static void
+generate_scratch_read(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
- generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
index);
uint32_t msg_type;
@@ -906,7 +794,7 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst,
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
if (devinfo->gen < 6)
- brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
+ brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf);
brw_set_dp_read_message(p, send,
255, /* binding table index: stateless access */
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
@@ -917,12 +805,14 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst,
1 /* rlen */);
}
-void
-vec4_generator::generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index)
+static void
+generate_scratch_write(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
struct brw_reg header = brw_vec8_grf(0, 0);
bool write_commit;
@@ -933,7 +823,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst,
gen6_resolve_implied_move(p, &header, inst->base_mrf);
- generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
index);
brw_MOV(p,
@@ -990,12 +880,15 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst,
write_commit);
}
-void
-vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
+static void
+generate_pull_constant_load(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset)
{
+ const struct brw_device_info *devinfo = p->devinfo;
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
@@ -1036,13 +929,15 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, surf_index);
}
-void
-vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg surf_index)
+static void
+generate_get_buffer_size(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
{
- assert(devinfo->gen >= 7);
+ assert(p->devinfo->gen >= 7);
assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
surf_index.file == BRW_IMMEDIATE_VALUE);
@@ -1062,11 +957,13 @@ vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
-void
-vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset)
+static void
+generate_pull_constant_load_gen7(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
{
assert(surf_index.type == BRW_REGISTER_TYPE_UD);
@@ -1123,9 +1020,10 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
}
}
-void
-vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst)
+static void
+generate_set_simd4x2_header_gen9(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -1140,9 +1038,18 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
+static void
+generate_code(struct brw_codegen *p,
+ const struct brw_compiler *compiler,
+ void *log_data,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg)
{
+ const struct brw_device_info *devinfo = p->devinfo;
+ const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage);
+ bool debug_flag = INTEL_DEBUG &
+ intel_debug_flag_for_shader_stage(nir->stage);
struct annotation_info annotation;
memset(&annotation, 0, sizeof(annotation));
int loop_count = 0;
@@ -1154,9 +1061,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
- src[i] = inst->get_src(this->prog_data, i);
+ src[i] = inst->src[i].fixed_hw_reg;
}
- dst = inst->get_dst(devinfo->gen);
+ dst = inst->dst.fixed_hw_reg;
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1383,9 +1290,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
brw_null_reg());
} else if (devinfo->gen == 6) {
- generate_math_gen6(inst, dst, src[0], brw_null_reg());
+ generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
} else {
- generate_math1_gen4(inst, dst, src[0]);
+ generate_math1_gen4(p, inst, dst, src[0]);
}
break;
@@ -1396,9 +1303,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
if (devinfo->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
} else if (devinfo->gen == 6) {
- generate_math_gen6(inst, dst, src[0], src[1]);
+ generate_math_gen6(p, inst, dst, src[0], src[1]);
} else {
- generate_math2_gen4(inst, dst, src[0], src[1]);
+ generate_math2_gen4(p, inst, dst, src[0], src[1]);
}
break;
@@ -1412,92 +1319,92 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_SAMPLEINFO:
- generate_tex(inst, dst, src[0], src[1]);
+ generate_tex(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_URB_WRITE:
- generate_vs_urb_write(inst);
+ generate_vs_urb_write(p, inst);
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
- generate_scratch_read(inst, dst, src[0]);
+ generate_scratch_read(p, inst, dst, src[0]);
break;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- generate_scratch_write(inst, dst, src[0], src[1]);
+ generate_scratch_write(p, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD:
- generate_pull_constant_load(inst, dst, src[0], src[1]);
+ generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
- generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
- generate_set_simd4x2_header_gen9(inst, dst);
+ generate_set_simd4x2_header_gen9(p, inst, dst);
break;
case VS_OPCODE_GET_BUFFER_SIZE:
- generate_get_buffer_size(inst, dst, src[0], src[1]);
+ generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_URB_WRITE:
- generate_gs_urb_write(inst);
+ generate_gs_urb_write(p, inst);
break;
case GS_OPCODE_URB_WRITE_ALLOCATE:
- generate_gs_urb_write_allocate(inst);
+ generate_gs_urb_write_allocate(p, inst);
break;
case GS_OPCODE_SVB_WRITE:
- generate_gs_svb_write(inst, dst, src[0], src[1]);
+ generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_SVB_SET_DST_INDEX:
- generate_gs_svb_set_destination_index(inst, dst, src[0]);
+ generate_gs_svb_set_destination_index(p, inst, dst, src[0]);
break;
case GS_OPCODE_THREAD_END:
- generate_gs_thread_end(inst);
+ generate_gs_thread_end(p, inst);
break;
case GS_OPCODE_SET_WRITE_OFFSET:
- generate_gs_set_write_offset(dst, src[0], src[1]);
+ generate_gs_set_write_offset(p, dst, src[0], src[1]);
break;
case GS_OPCODE_SET_VERTEX_COUNT:
- generate_gs_set_vertex_count(dst, src[0]);
+ generate_gs_set_vertex_count(p, dst, src[0]);
break;
case GS_OPCODE_FF_SYNC:
- generate_gs_ff_sync(inst, dst, src[0], src[1]);
+ generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
- generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]);
+ generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]);
break;
case GS_OPCODE_SET_PRIMITIVE_ID:
- generate_gs_set_primitive_id(dst);
+ generate_gs_set_primitive_id(p, dst);
break;
case GS_OPCODE_SET_DWORD_2:
- generate_gs_set_dword_2(dst, src[0]);
+ generate_gs_set_dword_2(p, dst, src[0]);
break;
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
- generate_gs_prepare_channel_masks(dst);
+ generate_gs_prepare_channel_masks(p, dst);
break;
case GS_OPCODE_SET_CHANNEL_MASKS:
- generate_gs_set_channel_masks(dst, src[0]);
+ generate_gs_set_channel_masks(p, dst, src[0]);
break;
case GS_OPCODE_GET_INSTANCE_ID:
- generate_gs_get_instance_id(dst);
+ generate_gs_get_instance_id(p, dst);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
@@ -1556,7 +1463,7 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
- generate_unpack_flags(dst);
+ generate_unpack_flags(p, dst);
break;
case VEC4_OPCODE_MOV_BYTES: {
@@ -1651,10 +1558,10 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
nir->info.label ? nir->info.label : "unnamed",
_mesa_shader_stage_to_string(nir->stage), nir->info.name);
- fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
- " bytes (%.0f%%)\n",
+ fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles."
+ "Compacted %d to %d bytes (%.0f%%)\n",
stage_abbrev,
- before_size / 16, loop_count, before_size, after_size,
+ before_size / 16, loop_count, cfg->cycle_count, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
@@ -1663,21 +1570,27 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
}
compiler->shader_debug_log(log_data,
- "%s vec4 shader: %d inst, %d loops, "
+ "%s vec4 shader: %d inst, %d loops, %u cycles, "
"compacted %d to %d bytes.\n",
- stage_abbrev, before_size / 16, loop_count,
+ stage_abbrev, before_size / 16,
+ loop_count, cfg->cycle_count,
before_size, after_size);
}
-const unsigned *
-vec4_generator::generate_assembly(const cfg_t *cfg,
- unsigned *assembly_size,
- const nir_shader *nir)
+extern "C" const unsigned *
+brw_vec4_generate_assembly(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg,
+ unsigned *out_assembly_size)
{
+ struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
+ brw_init_codegen(compiler->devinfo, p, mem_ctx);
brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(cfg, nir);
- return brw_get_program(p, assembly_size);
-}
+ generate_code(p, compiler, log_data, nir, prog_data, cfg);
-} /* namespace brw */
+ return brw_get_program(p, out_assembly_size);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 9402489e628..cfb5cd95cb1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -768,7 +768,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
output_size_bytes += 32;
assert(output_size_bytes >= 1);
- int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
@@ -824,9 +824,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- return g.generate_assembly(v.cfg, final_assembly_size, shader);
+ return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
+ shader, &prog_data->base, v.cfg,
+ final_assembly_size);
}
}
}
@@ -875,9 +875,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
+ ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
+ &prog_data->base, gs->cfg,
+ final_assembly_size);
}
delete gs;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 678237901f2..aa9a6572eee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -86,9 +86,10 @@ vec4_live_variables::setup_def_use()
}
}
}
- if (inst->reads_flag()) {
- if (!BITSET_TEST(bd->flag_def, 0)) {
- BITSET_SET(bd->flag_use, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (inst->reads_flag(c) &&
+ !BITSET_TEST(bd->flag_def, c)) {
+ BITSET_SET(bd->flag_use, c);
}
}
@@ -110,8 +111,11 @@ vec4_live_variables::setup_def_use()
}
}
if (inst->writes_flag()) {
- if (!BITSET_TEST(bd->flag_use, 0)) {
- BITSET_SET(bd->flag_def, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if ((inst->dst.writemask & (1 << c)) &&
+ !BITSET_TEST(bd->flag_use, c)) {
+ BITSET_SET(bd->flag_def, c);
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index e79a9f3b5b9..1fb1773f856 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -193,7 +193,9 @@ vec4_visitor::nir_emit_if(nir_if *if_stmt)
vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
- emit(IF(BRW_PREDICATE_NORMAL));
+ /* We can just predicate based on the X channel, as the condition only
+ * goes on its own line */
+ emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X));
nir_emit_cf_list(&if_stmt->then_list);
@@ -806,6 +808,16 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
+ case nir_intrinsic_shader_clock: {
+ /* We cannot do anything if there is an event, so ignore it for now */
+ const src_reg shader_clock = get_timestamp();
+ const enum brw_reg_type type = brw_type_for_base_type(glsl_type::uvec2_type);
+
+ dest = get_nir_dest(instr->dest, type);
+ emit(MOV(dest, shader_clock));
+ break;
+ }
+
default:
unreachable("Unknown intrinsic");
}
@@ -1144,26 +1156,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_ball_iequal3:
case nir_op_ball_fequal4:
case nir_op_ball_iequal4: {
- dst_reg tmp = dst_reg(this, glsl_type::bool_type);
-
- switch (instr->op) {
- case nir_op_ball_fequal2:
- case nir_op_ball_iequal2:
- tmp.writemask = WRITEMASK_XY;
- break;
- case nir_op_ball_fequal3:
- case nir_op_ball_iequal3:
- tmp.writemask = WRITEMASK_XYZ;
- break;
- case nir_op_ball_fequal4:
- case nir_op_ball_iequal4:
- tmp.writemask = WRITEMASK_XYZW;
- break;
- default:
- unreachable("not reached");
- }
+ unsigned swiz =
+ brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
- emit(CMP(tmp, op[0], op[1],
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
brw_conditional_for_nir_comparison(instr->op)));
emit(MOV(dst, src_reg(0)));
inst = emit(MOV(dst, src_reg(~0)));
@@ -1177,26 +1173,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bany_inequal3:
case nir_op_bany_fnequal4:
case nir_op_bany_inequal4: {
- dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+ unsigned swiz =
+ brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
- switch (instr->op) {
- case nir_op_bany_fnequal2:
- case nir_op_bany_inequal2:
- tmp.writemask = WRITEMASK_XY;
- break;
- case nir_op_bany_fnequal3:
- case nir_op_bany_inequal3:
- tmp.writemask = WRITEMASK_XYZ;
- break;
- case nir_op_bany_fnequal4:
- case nir_op_bany_inequal4:
- tmp.writemask = WRITEMASK_XYZW;
- break;
- default:
- unreachable("not reached");
- }
-
- emit(CMP(tmp, op[0], op[1],
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
brw_conditional_for_nir_comparison(instr->op)));
emit(MOV(dst, src_reg(0)));
@@ -1321,26 +1301,18 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_ufind_msb:
case nir_op_ifind_msb: {
- src_reg temp = src_reg(this, glsl_type::uint_type);
-
- inst = emit(FBH(dst_reg(temp), op[0]));
- inst->dst.writemask = WRITEMASK_XYZW;
+ emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
+ src_reg src(dst);
+ emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ));
- /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
- temp.swizzle = BRW_SWIZZLE_NOOP;
- emit(MOV(dst, temp));
-
- src_reg src_tmp = src_reg(dst);
- emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
-
- src_tmp.negate = true;
- inst = emit(ADD(dst, src_tmp, src_reg(31)));
+ inst = emit(ADD(dst, src, src_reg(31)));
inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
break;
}
@@ -1461,11 +1433,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bany2:
case nir_op_bany3:
case nir_op_bany4: {
- dst_reg tmp = dst_reg(this, glsl_type::bool_type);
- tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]);
-
- emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ unsigned swiz =
+ brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0),
+ BRW_CONDITIONAL_NZ));
emit(MOV(dst, src_reg(0)));
inst = emit(MOV(dst, src_reg(~0)));
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 6d155285820..92b089d7ff6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -883,6 +883,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
uint32_t sampler,
src_reg sampler_reg)
{
+ /* The sampler can only meaningfully compute LOD for fragment shader
+ * messages. For all other stages, we change the opcode to TXL and hardcode
+ * the LOD to 0.
+ *
+ * textureQueryLevels() is implemented in terms of TXS so we need to pass a
+ * valid LOD argument.
+ */
+ if (op == ir_tex || op == ir_query_levels) {
+ assert(lod.file == BAD_FILE);
+ lod = src_reg(0.0f);
+ }
+
enum opcode opcode;
switch (op) {
case ir_tex: opcode = SHADER_OPCODE_TXL; break;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 5db4b3a86af..0b805b1c0c4 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -311,7 +311,8 @@ brw_vs_populate_key(struct brw_context *brw,
key->program_string_id = vp->id;
if (ctx->Transform.ClipPlanesEnabled != 0 &&
- ctx->API == API_OPENGL_COMPAT &&
+ (ctx->API == API_OPENGL_COMPAT ||
+ ctx->API == API_OPENGLES) &&
vp->program.Base.ClipDistanceArraySize == 0) {
key->nr_userclip_plane_consts =
_mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index f65258a52a5..d7473845c72 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_VS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->vs.base,
- &brw->vs.prog_data->base.base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ &brw->vs.base, &brw->vs.prog_data->base.base);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6ebe6481c32..f88f8d59196 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = {
void
brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data)
{
@@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw,
uint32_t *surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.abo_start];
- for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
- struct gl_atomic_buffer_binding *binding =
- &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
- struct intel_buffer_object *intel_bo =
- intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo = intel_bufferobj_buffer(
- brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
-
- brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
- binding->Offset, BRW_SURFACEFORMAT_RAW,
- bo->size - binding->Offset, 1, true);
- }
+ if (shader && shader->NumAtomicBuffers) {
+ for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
+ struct gl_atomic_buffer_binding *binding =
+ &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo = intel_bufferobj_buffer(
+ brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
+
+ brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
+ binding->Offset, BRW_SURFACEFORMAT_RAW,
+ bo->size - binding->Offset, 1, true);
+ }
- if (prog->NumAtomicBuffers)
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ }
}
static void
@@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_FS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
- &brw->wm.prog_data->base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ &brw->wm.base, &brw->wm.prog_data->base);
}
}
@@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_CS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
- &brw->cs.prog_data->base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ &brw->cs.base, &brw->cs.prog_data->base);
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 4068f2844a2..2634e6ba6fd 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -60,6 +60,23 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
/* Find the VUE slot for this attribute. */
int slot = vue_map->varying_to_slot[fs_attr];
+ /* Viewport and Layer are stored in the VUE header. We need to override
+ * them to zero if earlier stages didn't write them, as GL requires that
+ * they read back as zero when not explicitly set.
+ */
+ if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
+ unsigned override =
+ ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W |
+ ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT;
+
+ if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
+ override |= ATTRIBUTE_0_OVERRIDE_Y;
+ if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
+ override |= ATTRIBUTE_0_OVERRIDE_Z;
+
+ return override;
+ }
+
/* If there was only a back color written but not front, use back
* as the color instead of undefined
*/
@@ -159,14 +176,30 @@ calculate_attr_overrides(const struct brw_context *brw,
uint16_t *attr_overrides,
uint32_t *point_sprite_enables,
uint32_t *flat_enables,
- uint32_t *urb_entry_read_length)
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset)
{
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint32_t max_source_attr = 0;
*point_sprite_enables = 0;
*flat_enables = 0;
+ *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM
+ *
+ * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
+ * the full vertex header. Otherwise, we can program the SF to start
+ * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
+ * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
+ * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
+ */
+
+ bool fs_needs_vue_header = brw->fragment_program->Base.InputsRead &
+ (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+ *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+
/* _NEW_LIGHT */
bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
@@ -228,7 +261,7 @@ calculate_attr_overrides(const struct brw_context *brw,
/* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
uint16_t attr_override = point_sprite ? 0 :
get_attr_override(&brw->vue_map_geom_out,
- urb_entry_read_offset, attr,
+ *urb_entry_read_offset, attr,
brw->ctx.VertexProgram._TwoSideEnabled,
&max_source_attr);
@@ -276,7 +309,6 @@ upload_sf_state(struct brw_context *brw)
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
float point_size;
uint16_t attr_overrides[16];
uint32_t point_sprite_origin;
@@ -411,8 +443,10 @@ upload_sf_state(struct brw_context *brw)
* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
*/
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &flat_enables, &urb_entry_read_length);
+ &flat_enables, &urb_entry_read_length,
+ &urb_entry_read_offset);
dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 6aeb0cb243f..2d7c04f4ad2 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -285,3 +285,34 @@ const struct brw_tracked_state gen7_cs_push_constants = {
},
.emit = gen7_upload_cs_push_constants,
};
+
+/**
+ * Creates a new CS constant buffer reflecting the current CS program's
+ * constants, if needed by the CS program.
+ */
+static void
+brw_upload_cs_pull_constants(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct brw_compute_program *cp =
+ (struct brw_compute_program *) brw->compute_program;
+
+ /* BRW_NEW_CS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = &brw->cs.prog_data->base;
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program.Base,
+ stage_state, prog_data, true);
+}
+
+const struct brw_tracked_state brw_cs_pull_constants = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = brw_upload_cs_pull_constants,
+};
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 698b3d491bc..b1f13aceba4 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -40,7 +40,6 @@ upload_sbe_state(struct brw_context *brw)
uint32_t point_sprite_enables;
uint32_t flat_enables;
int i;
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint16_t attr_overrides[16];
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
@@ -65,8 +64,10 @@ upload_sbe_state(struct brw_context *brw)
* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
*/
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &flat_enables, &urb_entry_read_length);
+ &flat_enables, &urb_entry_read_length,
+ &urb_entry_read_offset);
dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8f0507413a7..10e433b1d59 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
!brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+ if (prog_data->computed_stencil) {
+ assert(brw->gen >= 9);
+ dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
+ }
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 6b655ee493e..8b6f31f3be6 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -37,6 +37,7 @@ upload_sbe(struct brw_context *brw)
uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs;
uint16_t attr_overrides[VARYING_SLOT_MAX];
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
uint32_t point_sprite_enables;
uint32_t flat_enables;
int sbe_cmd_length;
@@ -66,7 +67,8 @@ upload_sbe(struct brw_context *brw)
calculate_attr_overrides(brw, attr_overrides,
&point_sprite_enables,
&flat_enables,
- &urb_entry_read_length);
+ &urb_entry_read_length,
+ &urb_entry_read_offset);
/* Typically, the URB entry read length and offset should be programmed in
* 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active stage
@@ -78,7 +80,7 @@ upload_sbe(struct brw_context *brw)
*/
dw1 |=
urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
- BRW_SF_URB_ENTRY_READ_OFFSET << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
+ urb_entry_read_offset << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET;
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 18b86652fd2..140a6544983 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -183,6 +183,14 @@ gen8_emit_buffer_surface_state(struct brw_context *brw,
}
static void
+gen8_emit_fast_clear_color(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t *surf)
+{
+ surf[7] |= mt->fast_clear_color_value;
+}
+
+static void
gen8_emit_texture_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *mt,
GLenum target,
@@ -284,11 +292,10 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
SET_FIELD((aux_mt->pitch / tile_w) - 1,
GEN8_SURFACE_AUX_PITCH) |
aux_mode;
- } else {
- surf[6] = 0;
}
- surf[7] = mt->fast_clear_color_value |
+ gen8_emit_fast_clear_color(brw, mt, surf);
+ surf[7] |=
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
@@ -302,11 +309,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
aux_mt->bo, 0,
I915_GEM_DOMAIN_SAMPLER,
(rw ? I915_GEM_DOMAIN_SAMPLER : 0));
- } else {
- surf[10] = 0;
- surf[11] = 0;
}
- surf[12] = 0;
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->batch.bo,
@@ -514,15 +517,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
SET_FIELD((aux_mt->pitch / tile_w) - 1,
GEN8_SURFACE_AUX_PITCH) |
aux_mode;
- } else {
- surf[6] = 0;
}
- surf[7] = mt->fast_clear_color_value |
- SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
- SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
- SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
- SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+ gen8_emit_fast_clear_color(brw, mt, surf);
+ surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
+ SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+ SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
+ SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
assert(mt->offset % mt->cpp == 0);
*((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
@@ -533,11 +534,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
offset + 10 * 4,
aux_mt->bo, 0,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
- } else {
- surf[10] = 0;
- surf[11] = 0;
}
- surf[12] = 0;
drm_intel_bo_emit_reloc(brw->batch.bo,
offset + 8 * 4,
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index f7c02c8a38d..c00d2e786f3 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -73,6 +73,8 @@ static const struct debug_control debug_control[] = {
{ "spill_fs", DEBUG_SPILL_FS },
{ "spill_vec4", DEBUG_SPILL_VEC4 },
{ "cs", DEBUG_CS },
+ { "hex", DEBUG_HEX },
+ { "nocompact", DEBUG_NO_COMPACTION },
{ NULL, 0 }
};
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 0a6e1b90b98..98bd7e93956 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -67,6 +67,8 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_SPILL_FS (1ull << 31)
#define DEBUG_SPILL_VEC4 (1ull << 32)
#define DEBUG_CS (1ull << 33)
+#define DEBUG_HEX (1ull << 34)
+#define DEBUG_NO_COMPACTION (1ull << 35)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 3f9afd16c71..4643ea3e87b 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -287,6 +287,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_conditional_render_inverted = true;
ctx->Extensions.ARB_draw_buffers_blend = true;
ctx->Extensions.ARB_ES3_compatibility = true;
+ ctx->Extensions.ARB_fragment_layer_viewport = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shading_language_420pack = true;
ctx->Extensions.ARB_shading_language_packing = true;
@@ -324,6 +325,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_framebuffer_no_attachments = true;
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_shader_atomic_counters = true;
+ ctx->Extensions.ARB_shader_clock = true;
ctx->Extensions.ARB_shader_image_load_store = true;
ctx->Extensions.ARB_shader_image_size = true;
ctx->Extensions.ARB_shader_texture_image_samples = true;
@@ -358,6 +360,7 @@ intelInitExtensions(struct gl_context *ctx)
if (brw->gen >= 9) {
ctx->Extensions.KHR_texture_compression_astc_ldr = true;
ctx->Extensions.KHR_texture_compression_astc_hdr = true;
+ ctx->Extensions.ARB_shader_stencil_export = true;
}
if (ctx->API == API_OPENGL_CORE)
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 5a6b0dd1ec5..3a4a53a07e6 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -343,19 +343,15 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
if (image->planar_format && image->planar_format->nplanes > 1) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEGLImageTargetRenderbufferStorage(planar buffers are not "
- "supported as render targets.");
+ "supported as render targets.)");
return;
}
/* __DRIimage is opaque to the core so it has to be checked here */
- switch (image->format) {
- case MESA_FORMAT_R8G8B8A8_UNORM:
+ if (!brw->format_supported_as_render_target[image->format]) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glEGLImageTargetRenderbufferStorage(unsupported image format");
+ "glEGLImageTargetRenderbufferStorage(unsupported image format)");
return;
- break;
- default:
- break;
}
irb = intel_renderbuffer(rb);
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 590c45d93ea..fb95fb629ad 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen)
}
}
-static int
+/**
+ * Return the revision (generally the revid field of the PCI header) of the
+ * graphics device.
+ *
+ * XXX: This function is useful to keep around even if it is not currently in
+ * use. It is necessary for new platforms and revision specific workarounds or
+ * features. Please don't remove it so that we know it at least continues to
+ * build.
+ */
+static __attribute__((__unused__)) int
brw_get_revision(int fd)
{
struct drm_i915_getparam gp;
@@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
return false;
intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
- intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
- brw_get_revision(psp->fd));
+ intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
if (!intelScreen->devinfo)
return false;
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 5f80f90a91d..62d39f70ec4 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -84,7 +84,7 @@ instruction(bblock_t *block, int num)
static bool
cmod_propagation(fs_visitor *v)
{
- const bool print = false;
+ const bool print = getenv("TEST_DEBUG");
if (print) {
fprintf(stderr, "= Before =\n");
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
new file mode 100644
index 00000000000..9aa2fcc7907
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -0,0 +1,822 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Based on test_fs_cmod_propagation.cpp
+ */
+
+#include <gtest/gtest.h>
+#include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_cfg.h"
+#include "program/program.h"
+
+using namespace brw;
+
+class cmod_propagation_test : public ::testing::Test {
+ virtual void SetUp();
+
+public:
+ struct brw_compiler *compiler;
+ struct brw_device_info *devinfo;
+ struct gl_context *ctx;
+ struct gl_shader_program *shader_prog;
+ struct brw_vertex_program *vp;
+ vec4_visitor *v;
+};
+
+class cmod_propagation_vec4_visitor : public vec4_visitor
+{
+public:
+ cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
+ nir_shader *shader)
+ : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+ false, -1) {}
+
+protected:
+ /* Dummy implementation for pure virtual methods */
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void setup_payload()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_prolog()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_program_code()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_thread_end()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_urb_write_header(int mrf)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+ {
+ unreachable("Not reached");
+ }
+};
+
+
+void cmod_propagation_test::SetUp()
+{
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
+
+ vp = ralloc(NULL, struct brw_vertex_program);
+
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
+
+ v = new cmod_propagation_vec4_visitor(compiler, shader);
+
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
+
+ devinfo->gen = 4;
+}
+
+static vec4_instruction *
+instruction(bblock_t *block, int num)
+{
+ vec4_instruction *inst = (vec4_instruction *)block->start();
+ for (int i = 0; i < num; i++) {
+ inst = (vec4_instruction *)inst->next;
+ }
+ return inst;
+}
+
+static bool
+cmod_propagation(vec4_visitor *v)
+{
+ const bool print = getenv("TEST_DEBUG");
+
+ if (print) {
+ fprintf(stderr, "= Before =\n");
+ v->dump_instructions();
+ }
+
+ bool ret = v->opt_cmod_propagation();
+
+ if (print) {
+ fprintf(stderr, "\n= After =\n");
+ v->dump_instructions();
+ }
+
+ return ret;
+}
+
+TEST_F(cmod_propagation_test, basic)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.ADD(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest.x src0.xxxx src1.xxxx
+ * 1: cmp.ge.f0 null.x dest.xxxx 0.0f
+ *
+ * = After =
+ * 0: add.ge.f0 dest.x src0.xxxx src1.xxxx
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, basic_different_dst_writemask)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.ADD(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest.x src0 src1
+ * 1: cmp.ge.f0 null.xyzw dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andz_one)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ src_reg one(1);
+
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_Z,
+ bld.AND(bld.null_reg_d(), src_reg(dest), one));
+
+ /* = Before =
+ * 0: cmp.l.f0 dest:F src0:F 0F
+ * 1: and.z.f0 null:D dest:D 1D
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, non_cmod_instruction)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::uint_type);
+ src_reg src0 = src_reg(v, glsl_type::uint_type);
+ src_reg zero(0u);
+ bld.FBL(dest, src0);
+ bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: fbl dest src0
+ * 1: cmp.ge.f0 null dest 0u
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_FBL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_write)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
+ bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest src0 src1
+ * 1: cmp.ge.f0 null src2 0.0f
+ * 2: cmp.ge.f0 null dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_read)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest0 = dst_reg(v, glsl_type::float_type);
+ dst_reg dest1 = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest0, src0, src1);
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ * 2: cmp.ge.f0 null dest0 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_dest_write)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::vec2_type);
+ src_reg zero(0.0f);
+ bld.ADD(offset(dest, 2), src0, src1);
+ bld.emit(SHADER_OPCODE_TEX, dest, src2)
+ ->regs_written = 4;
+ bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 2), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest+2 src0 src1
+ * 1: tex rlen 4 dest+0 src2
+ * 2: cmp.ge.f0 null dest+2 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(SHADER_OPCODE_TEX, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest0 = dst_reg(v, glsl_type::float_type);
+ dst_reg dest1 = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(dest_null, src_reg(dest0), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add.ge.f0 dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ * 2: cmp.ge.f0 null.x dest0 0.0f
+ *
+ * = After =
+ * 0: add.ge.f0 dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+}
+
+TEST_F(cmod_propagation_test, negate)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ src_reg tmp_src = src_reg(dest);
+ tmp_src.negate = true;
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+ bld.CMP(dest_null, tmp_src, zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest src0 src1
+ * 1: cmp.ge.f0 null.x -dest 0.0f
+ *
+ * = After =
+ * 0: add.le.f0 dest src0 src1
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, movnz)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.CMP(dest, src0, src1, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(dest_null, src_reg(dest)));
+
+ /* = Before =
+ *
+ * 0: cmp.l.f0 dest:F src0:F src1:F
+ * 1: mov.nz.f0 null.x dest:F
+ *
+ * = After =
+ * 0: cmp.l.f0 dest src0:F src1:F
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::int_type);
+ src_reg src1 = src_reg(v, glsl_type::int_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero,
+ BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest:D src0:D src1:D
+ * 1: cmp.ge.f0 null:F dest:F 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andnz_non_one)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ src_reg nonone(38);
+
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.null_reg_d(), src_reg(dest), nonone));
+
+ /* = Before =
+ * 0: cmp.l.f0 dest:F src0:F 0F
+ * 1: and.nz.f0 null:D dest:D 38D
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+/* Note that basic is using glsl_type:float types, while this one is using
+ * glsl_type::vec4 */
+TEST_F(cmod_propagation_test, basic_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest.xyzw src0.xyzw src1.xyzw
+ * 1: cmp.nz.f0.0 null.xyzw dest.xyzw 0.0f
+ *
+ * = After =
+ * 0: mul.nz.f0.0 dest.xyzw src0.xyzw src1.xyzw
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest.x src0 src1
+ * 1: cmp.nz.f0.0 null dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mad_one_component_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg src2 = src_reg(v, glsl_type::vec4_type);
+ src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
+ src2.negate = true;
+ src_reg zero(0.0f);
+ src_reg tmp(dest);
+ tmp.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.MAD(dest, src0, src1, src2);
+ bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ *
+ * 0: mad dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ * 1: cmp.l.f0.0 null.x:F dest.xxxx:F 0.0f
+ *
+ * = After =
+ * 0: mad.l.f0 dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mad_more_one_component_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_XW;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg src2 = src_reg(v, glsl_type::vec4_type);
+ src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
+ src2.negate = true;
+ src_reg zero(0.0f);
+ src_reg tmp(dest);
+ tmp.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.MAD(dest, src0, src1, src2);
+ bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ *
+ * 0: mad dest.xw:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ * 1: cmp.l.f0.0 null:F dest.xxxx:F zeroF
+ *
+ * = After =
+ * (No changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_mov_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::ivec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::ivec4_type);
+ src0.swizzle = BRW_SWIZZLE_XXXX;
+ src0.file = UNIFORM;
+ src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D);
+ src_reg mov_src = src_reg(dest);
+ mov_src.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_d();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.CMP(dest, src0, nonone, BRW_CONDITIONAL_GE);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(dest_null, mov_src));
+
+ /* = Before =
+ *
+ * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D
+ * 1: mov.nz.f0 null.x:D dest.xxxx:D
+ *
+ * = After =
+ * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+ src_reg cmp_src = src_reg(dest);
+ cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2);
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), cmp_src, zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest src0 src1
+ * 1: cmp.nz.f0.0 null dest.xywz 0.0f
+ *
+ * = After =
+ * (No changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c
index a049d9b8de7..cb854b81933 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -188,7 +188,7 @@ nouveau_context_init(struct gl_context *ctx, gl_api api,
ctx->Extensions.EXT_blend_minmax = true;
ctx->Extensions.EXT_texture_filter_anisotropic = true;
ctx->Extensions.NV_texture_env_combine4 = true;
- ctx->Const.MaxColorAttachments = 1;
+ ctx->Const.MaxDrawBuffers = ctx->Const.MaxColorAttachments = 1;
/* This effectively disables 3D textures */
ctx->Const.Max3DTextureLevels = 1;