summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/Makefile.sources3
-rw-r--r--src/mesa/drivers/common/driverfuncs.c3
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.am7
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_cfg.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_device_info.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_compact.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp125
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_builder.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp22
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp87
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp116
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp51
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_surface_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_fs.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_ir_vec4.h24
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp339
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp115
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h122
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp157
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp485
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp84
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_surface_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c37
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c44
-rw-r--r--src/mesa/drivers/dri/i965/gen7_cs_state.c31
-rw-r--r--src/mesa/drivers/dri/i965/gen7_sf_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen8_ps_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/gen8_sf_state.c6
-rw-r--r--src/mesa/drivers/dri/i965/gen8_surface_state.c33
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.c2
-rw-r--r--src/mesa/drivers/dri/i965/intel_debug.h2
-rw-r--r--src/mesa/drivers/dri/i965/intel_extensions.c3
-rw-r--r--src/mesa/drivers/dri/i965/intel_fbo.c10
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.c14
-rw-r--r--src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp822
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_context.c2
-rw-r--r--src/mesa/main/api_validate.c78
-rw-r--r--src/mesa/main/extensions.c3
-rw-r--r--src/mesa/main/get_hash_params.py1
-rw-r--r--src/mesa/main/lines.c7
-rw-r--r--src/mesa/main/mtypes.h7
-rw-r--r--src/mesa/main/pipelineobj.c44
-rw-r--r--src/mesa/main/program_resource.c21
-rw-r--r--src/mesa/main/rastpos.c441
-rw-r--r--src/mesa/main/rastpos.h3
-rw-r--r--src/mesa/main/shader_query.cpp82
-rw-r--r--src/mesa/main/shaderapi.c14
-rw-r--r--src/mesa/main/texcompress.c88
-rw-r--r--src/mesa/main/texgetimage.c3
-rw-r--r--src/mesa/main/vdpau.c5
-rw-r--r--src/mesa/state_tracker/st_atom_constbuf.c3
-rw-r--r--src/mesa/state_tracker/st_cb_copyimage.c582
-rw-r--r--src/mesa/state_tracker/st_cb_copyimage.h33
-rw-r--r--src/mesa/state_tracker/st_cb_rasterpos.c10
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c51
-rw-r--r--src/mesa/state_tracker/st_context.c2
-rw-r--r--src/mesa/state_tracker/st_extensions.c1
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.c68
-rw-r--r--src/mesa/tnl/t_rasterpos.c478
-rw-r--r--src/mesa/vbo/vbo_context.h3
-rw-r--r--src/mesa/vbo/vbo_exec_api.c16
-rw-r--r--src/mesa/vbo/vbo_exec_array.c13
-rw-r--r--src/mesa/vbo/vbo_save_api.c15
88 files changed, 3638 insertions, 1481 deletions
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 34fb4461985..de0e330b7d1 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -345,7 +345,6 @@ TNL_FILES = \
tnl/tnl.h \
tnl/t_pipeline.c \
tnl/t_pipeline.h \
- tnl/t_rasterpos.c \
tnl/t_vb_cliptmp.h \
tnl/t_vb_fog.c \
tnl/t_vb_light.c \
@@ -424,6 +423,8 @@ STATETRACKER_FILES = \
state_tracker/st_cb_clear.h \
state_tracker/st_cb_condrender.c \
state_tracker/st_cb_condrender.h \
+ state_tracker/st_cb_copyimage.c \
+ state_tracker/st_cb_copyimage.h \
state_tracker/st_cb_drawpixels.c \
state_tracker/st_cb_drawpixels.h \
state_tracker/st_cb_drawpixels_shader.c \
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 3d1fccb3ab4..752aaf6c006 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -33,6 +33,7 @@
#include "main/mipmap.h"
#include "main/queryobj.h"
#include "main/readpix.h"
+#include "main/rastpos.h"
#include "main/renderbuffer.h"
#include "main/shaderobj.h"
#include "main/texcompress.h"
@@ -81,7 +82,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
/* framebuffer/image functions */
driver->Clear = _swrast_Clear;
- driver->RasterPos = _tnl_RasterPos;
+ driver->RasterPos = _mesa_RasterPos;
driver->DrawPixels = _swrast_DrawPixels;
driver->ReadPixels = _mesa_readpixels;
driver->CopyPixels = _swrast_CopyPixels;
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index 04b3f9cc8ce..9d003e48bd8 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -59,6 +59,7 @@ TESTS = \
test_fs_saturate_propagation \
test_eu_compact \
test_vf_float_conversions \
+ test_vec4_cmod_propagation \
test_vec4_copy_propagation \
test_vec4_register_coalesce
@@ -94,6 +95,12 @@ test_vec4_copy_propagation_LDADD = \
$(top_builddir)/src/gtest/libgtest.la \
$(TEST_LIBS)
+test_vec4_cmod_propagation_SOURCES = \
+ test_vec4_cmod_propagation.cpp
+test_vec4_cmod_propagation_LDADD = \
+ $(top_builddir)/src/gtest/libgtest.la \
+ $(TEST_LIBS)
+
test_eu_compact_SOURCES = \
test_eu_compact.c
nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index ccd540dabca..ed2654ef329 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -58,6 +58,7 @@ i965_compiler_FILES = \
brw_util.c \
brw_util.h \
brw_vec4_builder.h \
+ brw_vec4_cmod_propagation.cpp \
brw_vec4_copy_propagation.cpp \
brw_vec4.cpp \
brw_vec4_cse.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 10bcd4bafd4..5d46615bc7b 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -528,7 +528,9 @@ cfg_t::dump_domtree()
{
printf("digraph DominanceTree {\n");
foreach_block(block, this) {
- printf("\t%d -> %d\n", block->idom->num, block->num);
+ if (block->idom) {
+ printf("\t%d -> %d\n", block->idom->num, block->num);
+ }
}
printf("}\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index a06b0aa1cd0..69e39e8964d 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -90,6 +90,8 @@ struct bblock_t {
struct exec_list parents;
struct exec_list children;
int num;
+
+ unsigned cycle_count;
};
static inline struct backend_instruction *
@@ -285,6 +287,8 @@ struct cfg_t {
int num_blocks;
bool idom_dirty;
+
+ unsigned cycle_count;
};
/* Note that this is implemented with a double for loop -- break will
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index d9967143d8a..e5133ef5a3d 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -338,6 +338,7 @@ struct brw_wm_prog_data {
} binding_table;
uint8_t computed_depth_mode;
+ bool computed_stencil;
bool early_fragment_tests;
bool no_8;
@@ -443,9 +444,7 @@ struct brw_vue_map {
* directly correspond to a gl_varying_slot, the value comes from
* brw_varying_slot.
*
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
+ * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
*/
signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
@@ -467,8 +466,8 @@ static inline GLuint brw_vue_slot_to_offset(GLuint slot)
* Convert a vertex output (brw_varying_slot) into a byte offset within the
* VUE.
*/
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
+static inline
+GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
{
return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4f503ae4869..c83f47bdff7 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -501,8 +501,6 @@ struct brw_cache_item {
};
-typedef void (*cache_aux_free_func)(const void *aux);
-
struct brw_cache {
struct brw_context *brw;
@@ -512,9 +510,6 @@ struct brw_cache {
uint32_t next_offset;
bool bo_used_by_gpu;
-
- /** Optional functions for freeing other pointers attached to a prog_data. */
- cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
@@ -1177,7 +1172,7 @@ struct brw_context
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[60];
- const struct brw_tracked_state compute_atoms[8];
+ const struct brw_tracked_state compute_atoms[9];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
@@ -1463,7 +1458,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
struct brw_stage_prog_data *prog_data,
bool dword_pitch);
void brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data);
void brw_upload_image_surfaces(struct brw_context *brw,
@@ -1680,6 +1675,7 @@ struct opcode_desc {
extern const struct opcode_desc opcode_descs[128];
extern const char * const conditional_modifier[16];
+extern const char *const pred_ctrl_align16[16];
void
brw_emit_depthbuffer(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 169d092f90e..754da9fc3da 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -913,20 +913,15 @@ enum opcode {
/**
* Same as FS_OPCODE_FB_WRITE but expects its arguments separately as
- * individual sources instead of as a single payload blob:
- *
- * Source 0: [required] Color 0.
- * Source 1: [optional] Color 1 (for dual source blend messages).
- * Source 2: [optional] Src0 Alpha.
- * Source 3: [optional] Source Depth (gl_FragDepth)
- * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread
- * Source 5: [optional] Sample Mask (gl_SampleMask).
- * Source 6: [required] Number of color components (as a UD immediate).
+ * individual sources instead of as a single payload blob. The
+ * position/ordering of the arguments are defined by the enum
+ * fb_write_logical_srcs.
*/
FS_OPCODE_FB_WRITE_LOGICAL,
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
+ FS_OPCODE_PACK_STENCIL_REF,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
@@ -1332,6 +1327,17 @@ enum brw_urb_write_flags {
BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE,
};
+enum fb_write_logical_srcs {
+ FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */
+ FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */
+ FB_WRITE_LOGICAL_SRC_SRC0_ALPHA,
+ FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
+ FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */
+ FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
+ FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
+ FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
+};
+
#ifdef __cplusplus
/**
* Allow brw_urb_write_flags enums to be ORed together.
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 65172490da3..6372fb5c55f 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -311,7 +311,7 @@ static const struct brw_device_info brw_device_info_chv = {
.max_gs_threads = 336, \
.max_hs_threads = 336, \
.max_ds_threads = 336, \
- .max_wm_threads = 64 * 6, \
+ .max_wm_threads = 64 * 9, \
.max_cs_threads = 56, \
.urb = { \
.size = 384, \
@@ -335,6 +335,10 @@ static const struct brw_device_info brw_device_info_skl_gt3 = {
GEN9_FEATURES, .gt = 3,
};
+static const struct brw_device_info brw_device_info_skl_gt4 = {
+ GEN9_FEATURES, .gt = 4,
+};
+
static const struct brw_device_info brw_device_info_bxt = {
GEN9_FEATURES,
.is_broxton = 1,
@@ -359,7 +363,7 @@ static const struct brw_device_info brw_device_info_bxt = {
};
const struct brw_device_info *
-brw_get_device_info(int devid, int revision)
+brw_get_device_info(int devid)
{
const struct brw_device_info *devinfo;
switch (devid) {
diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h
index 7bab5716b43..6f4a250e874 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.h
+++ b/src/mesa/drivers/dri/i965/brw_device_info.h
@@ -86,5 +86,5 @@ struct brw_device_info
/** @} */
};
-const struct brw_device_info *brw_get_device_info(int devid, int revision);
+const struct brw_device_info *brw_get_device_info(int devid);
const char *brw_get_device_name(int devid);
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index db23a187a93..df747107188 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -252,7 +252,7 @@ static const char *const pred_inv[2] = {
[1] = "-"
};
-static const char *const pred_ctrl_align16[16] = {
+const char *const pred_ctrl_align16[16] = {
[1] = "",
[2] = ".x",
[3] = ".y",
@@ -726,7 +726,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
string(file, "null");
- return -1;
+ break;
case BRW_ARF_ADDRESS:
format(file, "a%d", _reg_nr & 0x0f);
break;
@@ -908,7 +908,6 @@ src_ia1(FILE *file,
unsigned _addr_subreg_nr,
unsigned _negate,
unsigned __abs,
- unsigned _addr_mode,
unsigned _horiz_stride, unsigned _width, unsigned _vert_stride)
{
int err = 0;
@@ -1143,7 +1142,6 @@ src0(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_src0_ia_subreg_nr(devinfo, inst),
brw_inst_src0_negate(devinfo, inst),
brw_inst_src0_abs(devinfo, inst),
- brw_inst_src0_address_mode(devinfo, inst),
brw_inst_src0_hstride(devinfo, inst),
brw_inst_src0_width(devinfo, inst),
brw_inst_src0_vstride(devinfo, inst));
@@ -1200,7 +1198,6 @@ src1(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst)
brw_inst_src1_ia_subreg_nr(devinfo, inst),
brw_inst_src1_negate(devinfo, inst),
brw_inst_src1_abs(devinfo, inst),
- brw_inst_src1_address_mode(devinfo, inst),
brw_inst_src1_hstride(devinfo, inst),
brw_inst_src1_width(devinfo, inst),
brw_inst_src1_vstride(devinfo, inst));
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 1f4a3516fa2..40ec87d38f0 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -261,7 +261,7 @@ void
brw_disassemble(const struct brw_device_info *devinfo,
void *assembly, int start, int end, FILE *out)
{
- bool dump_hex = false;
+ bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
for (int offset = start; offset < end;) {
brw_inst *insn = assembly + offset;
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index f787ea3d4f8..07ace6bfbcb 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -1407,6 +1407,9 @@ void
brw_compact_instructions(struct brw_codegen *p, int start_offset,
int num_annotations, struct annotation *annotation)
{
+ if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
+ return;
+
const struct brw_device_info *devinfo = p->devinfo;
void *store = p->store + start_offset / 16;
/* For an instruction at byte offset 16*i before compaction, this is the
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index bf2fee9ed48..a6fbb542919 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -410,7 +410,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
} else {
- brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset);
+ brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
}
}
@@ -2511,12 +2511,20 @@ brw_send_indirect_message(struct brw_codegen *p,
struct brw_reg desc)
{
const struct brw_device_info *devinfo = p->devinfo;
- struct brw_inst *send, *setup;
+ struct brw_inst *send;
+ int setup;
assert(desc.type == BRW_REGISTER_TYPE_UD);
+ /* We hold on to the setup instruction (the SEND in the direct case, the OR
+ * in the indirect case) by its index in the instruction store. The
+ * pointer returned by next_insn() may become invalid if emitting the SEND
+ * in the indirect case reallocs the store.
+ */
+
if (desc.file == BRW_IMMEDIATE_VALUE) {
- setup = send = next_insn(p, BRW_OPCODE_SEND);
+ setup = p->nr_insn;
+ send = next_insn(p, BRW_OPCODE_SEND);
brw_set_src1(p, send, desc);
} else {
@@ -2531,7 +2539,8 @@ brw_send_indirect_message(struct brw_codegen *p,
* caller can specify additional descriptor bits with the usual
* brw_set_*_message() helper functions.
*/
- setup = brw_OR(p, addr, desc, brw_imm_ud(0));
+ setup = p->nr_insn;
+ brw_OR(p, addr, desc, brw_imm_ud(0));
brw_pop_insn_state(p);
@@ -2543,7 +2552,7 @@ brw_send_indirect_message(struct brw_codegen *p,
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
brw_inst_set_sfid(devinfo, send, sfid);
- return setup;
+ return &p->store[setup];
}
static struct brw_inst *
@@ -2906,11 +2915,10 @@ brw_untyped_surface_read(struct brw_codegen *p,
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
- const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
struct brw_inst *insn = brw_send_indirect_surface_message(
p, sfid, dst, payload, surface, msg_length,
brw_surface_payload_size(p, num_channels, true, true),
- align1);
+ false);
brw_set_dp_untyped_surface_read_message(
p, insn, num_channels);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8320cd77299..e218a85a363 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -88,8 +88,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
case IMM:
case UNIFORM:
unreachable("Invalid destination register file");
- default:
- unreachable("Invalid register file");
}
this->writes_accumulator = false;
@@ -538,18 +536,6 @@ fs_visitor::get_timestamp(const fs_builder &bld)
*/
bld.group(4, 0).exec_all().MOV(dst, ts);
- /* The caller wants the low 32 bits of the timestamp. Since it's running
- * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
- * which is plenty of time for our purposes. It is identical across the
- * EUs, but since it's tracking GPU core speed it will increment at a
- * varying rate as render P-states change.
- *
- * The caller could also check if render P-states have changed (or anything
- * else that might disrupt timing) by setting smear to 2 and checking if
- * that field is != 0.
- */
- dst.set_smear(0);
-
return dst;
}
@@ -557,6 +543,14 @@ void
fs_visitor::emit_shader_time_begin()
{
shader_start_time = get_timestamp(bld.annotate("shader time start"));
+
+ /* We want only the low 32 bits of the timestamp. Since it's running
+ * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
+ * which is plenty of time for our purposes. It is identical across the
+ * EUs, but since it's tracking GPU core speed it will increment at a
+ * varying rate as render P-states change.
+ */
+ shader_start_time.set_smear(0);
}
void
@@ -570,6 +564,15 @@ fs_visitor::emit_shader_time_end()
fs_reg shader_end_time = get_timestamp(ibld);
+ /* We only use the low 32 bits of the timestamp - see
+ * emit_shader_time_begin()).
+ *
+ * We could also check if render P-states have changed (or anything
+ * else that might disrupt timing) by setting smear to 2 and checking if
+ * that field is != 0.
+ */
+ shader_end_time.set_smear(0);
+
/* Check that there weren't any timestamp reset events (assuming these
* were the only two timestamp reads that happened).
*/
@@ -700,10 +703,10 @@ fs_inst::components_read(unsigned i) const
return 2;
case FS_OPCODE_FB_WRITE_LOGICAL:
- assert(src[6].file == IMM);
+ assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
/* First/second FB write color. */
if (i < 2)
- return src[6].fixed_hw_reg.dw1.ud;
+ return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
else
return 1;
@@ -841,9 +844,8 @@ fs_inst::regs_read(int arg) const
REG_SIZE);
case MRF:
unreachable("MRF registers are not allowed as sources");
- default:
- unreachable("Invalid register file");
}
+ return 0;
}
bool
@@ -1283,9 +1285,9 @@ fs_visitor::emit_sampleid_setup()
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
if (key->compute_sample_id) {
- fs_reg t1 = vgrf(glsl_type::int_type);
- fs_reg t2 = vgrf(glsl_type::int_type);
- t2.type = BRW_REGISTER_TYPE_UW;
+ fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
+ t1.set_smear(0);
+ fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
/* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
* 8x multisampling, subspan 0 will represent sample N (where N
@@ -1306,13 +1308,13 @@ fs_visitor::emit_sampleid_setup()
* are sample 1 of subspan 0; the third group is sample 0 of
* subspan 1, and finally sample 1 of subspan 1.
*/
- abld.exec_all()
- .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ abld.exec_all().group(1, 0)
+ .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
fs_reg(0xc0));
- abld.exec_all().SHR(t1, t1, fs_reg(5));
+ abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
/* This works for both SIMD8 and SIMD16 */
- abld.exec_all()
+ abld.exec_all().group(4, 0)
.MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210));
/* This special instruction takes care of setting vstride=1,
@@ -1443,6 +1445,9 @@ fs_visitor::calculate_urb_setup()
}
}
} else {
+ bool include_vue_header =
+ nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
/* We have enough input varyings that the SF/SBE pipeline stage can't
* arbitrarily rearrange them to suit our whim; we have to put them
* in an order that matches the output of the previous pipeline stage
@@ -1452,15 +1457,14 @@ fs_visitor::calculate_urb_setup()
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
nir->info.separate_shader);
- int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+ int first_slot =
+ include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
slot++) {
int varying = prev_stage_vue_map.slot_to_varying[slot];
- /* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is
- * unused.
- */
- if (varying != BRW_VARYING_SLOT_COUNT &&
+ if (varying != BRW_VARYING_SLOT_PAD &&
(nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(varying))) {
prog_data->urb_setup[varying] = slot - first_slot;
@@ -2615,7 +2619,7 @@ fs_visitor::eliminate_find_live_channel()
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
if (depth == 0) {
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = fs_reg(0);
+ inst->src[0] = fs_reg(0u);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
@@ -2643,8 +2647,9 @@ fs_visitor::emit_repclear_shader()
fs_inst *mov;
if (uniforms == 1) {
- mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ mov = bld.exec_all().group(4, 0)
+ .MOV(brw_message_reg(color_mrf),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
} else {
struct brw_reg reg =
brw_reg(BRW_GENERAL_REGISTER_FILE,
@@ -2653,8 +2658,8 @@ fs_visitor::emit_repclear_shader()
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(reg));
+ mov = bld.exec_all().group(4, 0)
+ .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
}
fs_inst *write;
@@ -3366,15 +3371,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const brw_wm_prog_key *key,
const fs_visitor::thread_payload &payload)
{
- assert(inst->src[6].file == IMM);
+ assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
const brw_device_info *devinfo = bld.shader->devinfo;
- const fs_reg &color0 = inst->src[0];
- const fs_reg &color1 = inst->src[1];
- const fs_reg &src0_alpha = inst->src[2];
- const fs_reg &src_depth = inst->src[3];
- const fs_reg &dst_depth = inst->src[4];
- fs_reg sample_mask = inst->src[5];
- const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud;
+ const fs_reg &color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];
+ const fs_reg &color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];
+ const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
+ const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
+ const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH];
+ const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
+ fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
+ const unsigned components =
+ inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3464,6 +3471,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
length++;
}
+ if (src_stencil.file != BAD_FILE) {
+ assert(devinfo->gen >= 9);
+ assert(bld.dispatch_width() != 16);
+
+ sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.exec_all().annotate("FB write OS")
+ .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
+ retype(src_stencil, BRW_REGISTER_TYPE_UB));
+ length++;
+ }
+
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
@@ -4073,7 +4091,7 @@ fs_visitor::lower_logical_sends()
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
- fs_reg(0xffff));
+ fs_reg());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
@@ -4202,10 +4220,12 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
/* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
* here.
*/
- assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE ||
+ assert(devinfo->gen != 6 ||
+ inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH].file == BAD_FILE ||
inst->exec_size == 8);
/* Dual-source FB writes are unsupported in SIMD16 mode. */
- return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size);
+ return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ?
+ 8 : inst->exec_size);
case SHADER_OPCODE_TXD_LOGICAL:
/* TXD is unsupported in SIMD16 mode. */
@@ -4499,9 +4519,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->dst.fixed_hw_reg.subnr)
fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
break;
- default:
- fprintf(file, "???");
- break;
+ case IMM:
+ unreachable("not reached");
}
fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
@@ -4594,9 +4613,6 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
if (inst->src[i].fixed_hw_reg.abs)
fprintf(file, "|");
break;
- default:
- fprintf(file, "???");
- break;
}
if (inst->src[i].abs)
fprintf(file, "|");
@@ -4977,8 +4993,7 @@ fs_visitor::allocate_registers()
if (failed)
return;
- if (!allocated_without_spills)
- schedule_instructions(SCHEDULE_POST);
+ schedule_instructions(SCHEDULE_POST);
if (last_scratch > 0)
prog_data->total_scratch = brw_get_scratch_size(last_scratch);
@@ -5236,6 +5251,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->uses_omask =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data->computed_depth_mode = computed_depth_mode(shader);
+ prog_data->computed_stencil =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 50e98becf03..8058b344b7a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -145,6 +145,8 @@ public:
void assign_vs_urb_setup();
bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
+ void calculate_payload_ranges(int payload_node_count,
+ int *payload_last_use_ip);
void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
int first_payload_node);
int choose_spill_reg(struct ra_graph *g);
@@ -337,6 +339,7 @@ public:
int *push_constant_loc;
fs_reg frag_depth;
+ fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
unsigned output_components[VARYING_SLOT_MAX];
@@ -427,6 +430,8 @@ private:
void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+ void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg src);
void generate_barrier(fs_inst *inst, struct brw_reg src);
void generate_blorp_fb_write(fs_inst *inst);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index df10a9de293..f121f3463d3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -390,14 +390,21 @@ namespace brw {
src_reg
emit_uniformize(const src_reg &src) const
{
+ /* FIXME: We use a vector chan_index and dst to allow constant and
+ * copy propagration to move result all the way into the consuming
+ * instruction (typically a surface index or sampler index for a
+ * send). This uses 1 or 3 extra hw registers in 16 or 32 wide
+ * dispatch. Once we teach const/copy propagation about scalars we
+ * should go back to scalar destinations here.
+ */
const fs_builder ubld = exec_all();
- const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
- const dst_reg dst = component(vgrf(src.type), 0);
+ const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+ const dst_reg dst = vgrf(src.type);
ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
- ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
+ ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0));
- return src_reg(dst);
+ return src_reg(component(dst, 0));
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 5589716239a..26204827156 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -416,9 +416,10 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
inst->src[arg].subreg_offset = offset % 32;
}
break;
- default:
- unreachable("Invalid register file");
- break;
+
+ case MRF:
+ case IMM:
+ unreachable("not reached");
}
if (has_source_modifiers) {
@@ -612,6 +613,21 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ /* We only propagate into the surface argument of the
+ * instruction. Everything else goes through LOAD_PAYLOAD.
+ */
+ if (i == 1) {
+ inst->src[i] = val;
+ progress = true;
+ }
+ break;
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
case SHADER_OPCODE_BROADCAST:
inst->src[i] = val;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index c7628dcc2f4..3a28c8d591d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -93,7 +93,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case SHADER_OPCODE_LOAD_PAYLOAD:
return !inst->is_copy_payload(v->alloc);
default:
- return inst->is_send_from_grf() && !inst->has_side_effects();
+ return inst->is_send_from_grf() && !inst->has_side_effects() &&
+ !inst->is_volatile();
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index bb7e792044f..e207a77fdc1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -42,9 +42,13 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
return BRW_MESSAGE_REGISTER_FILE;
case IMM:
return BRW_IMMEDIATE_VALUE;
- default:
+ case BAD_FILE:
+ case HW_REG:
+ case ATTR:
+ case UNIFORM:
unreachable("not reached");
}
+ return 0;
}
static struct brw_reg
@@ -116,7 +120,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
/* Probably unused. */
brw_reg = brw_null_reg();
break;
- default:
+ case ATTR:
+ case UNIFORM:
unreachable("not reached");
}
if (reg->abs)
@@ -317,6 +322,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
brw_imm_ud(inst->target));
}
+ /* Set computes stencil to render target */
+ if (prog_data->computed_stencil) {
+ brw_OR(p,
+ vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
+ vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(0x1 << 14));
+ }
+
implied_header = brw_null_reg();
} else {
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -437,6 +450,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
}
void
+fs_generator::generate_stencil_ref_packing(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ assert(dispatch_width == 8);
+ assert(devinfo->gen >= 9);
+
+ /* Stencil value updates are provided in 8 slots of 1 byte per slot.
+ * Presumably, in order to save memory bandwidth, the stencil reference
+ * values written from the FS need to be packed into 2 dwords (this makes
+ * sense because the stencil values are limited to 1 byte each and a SIMD8
+ * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
+ *
+ * The spec is confusing here because in the payload definition of MDP_RTW_S8
+ * (Message Data Payload for Render Target Writes with Stencil 8b) the
+ * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
+ * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
+ * packed values specified above and diagrammed below:
+ *
+ * 31 0
+ * --------------------------------
+ * DW | |
+ * 2-7 | IGNORED |
+ * | |
+ * --------------------------------
+ * DW1 | STC | STC | STC | STC |
+ * | slot7 | slot6 | slot5 | slot4|
+ * --------------------------------
+ * DW0 | STC | STC | STC | STC |
+ * | slot3 | slot2 | slot1 | slot0|
+ * --------------------------------
+ */
+
+ src.vstride = BRW_VERTICAL_STRIDE_4;
+ src.width = BRW_WIDTH_1;
+ src.hstride = BRW_HORIZONTAL_STRIDE_0;
+ assert(src.type == BRW_REGISTER_TYPE_UB);
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src);
+}
+
+void
fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
{
brw_barrier(p, src);
@@ -1455,18 +1509,18 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
assert(src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD);
- brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW);
- if (dispatch_width == 8) {
+ struct brw_reg reg = stride(src1, 1, 4, 0);
+ if (devinfo->gen >= 8 || dispatch_width == 8) {
brw_ADD(p, dst, src0, reg);
} else if (dispatch_width == 16) {
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_ADD(p, firsthalf(dst), firsthalf(src0), reg);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2));
+ brw_pop_insn_state(p);
}
- brw_pop_insn_state(p);
}
void
@@ -2182,6 +2236,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
generate_barrier(inst, src[0]);
break;
+ case FS_OPCODE_PACK_STENCIL_REF:
+ generate_stencil_ref_packing(inst, dst, src[0]);
+ break;
+
default:
unreachable("Unsupported opcode");
@@ -2216,9 +2274,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
if (unlikely(debug_flag)) {
fprintf(stderr, "Native code for %s\n"
- "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
+ "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
" bytes (%.0f%%)\n",
- shader_name, dispatch_width, before_size / 16, loop_count,
+ shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count,
spill_count, fill_count, promoted_constants, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
@@ -2228,12 +2286,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
}
compiler->shader_debug_log(log_data,
- "%s SIMD%d shader: %d inst, %d loops, "
+ "%s SIMD%d shader: %d inst, %d loops, %u cycles, "
"%d:%d spills:fills, Promoted %u constants, "
"compacted %d to %d bytes.\n",
stage_abbrev, dispatch_width, before_size / 16,
- loop_count, spill_count, fill_count,
- promoted_constants, before_size, after_size);
+ loop_count, cfg->cycle_count, spill_count,
+ fill_count, promoted_constants, before_size,
+ after_size);
return start_offset;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7b5a0482519..486741bea31 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -71,6 +71,14 @@ fs_visitor::nir_setup_inputs()
var->data.origin_upper_left);
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
input, reg), 0xF);
+ } else if (var->data.location == VARYING_SLOT_LAYER) {
+ struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3);
+ reg.type = BRW_REGISTER_TYPE_D;
+ bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
+ } else if (var->data.location == VARYING_SLOT_VIEWPORT) {
+ struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3);
+ reg.type = BRW_REGISTER_TYPE_D;
+ bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
} else {
emit_general_interpolation(input, var->name, var->type,
(glsl_interp_qualifier) var->data.interpolation,
@@ -114,6 +122,8 @@ fs_visitor::nir_setup_outputs()
}
} else if (var->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = reg;
+ } else if (var->data.location == FRAG_RESULT_STENCIL) {
+ this->frag_stencil = reg;
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
this->sample_mask = reg;
} else {
@@ -896,12 +906,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
-
bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ);
- fs_reg neg_result(result);
- neg_result.negate = true;
- inst = bld.ADD(result, neg_result, fs_reg(31));
+
+ inst = bld.ADD(result, result, fs_reg(31));
inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
break;
}
@@ -1322,6 +1331,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
+ case nir_intrinsic_shader_clock: {
+ /* We cannot do anything if there is an event, so ignore it for now */
+ fs_reg shader_clock = get_timestamp(bld);
+ const fs_reg srcs[] = { shader_clock.set_smear(0), shader_clock.set_smear(1) };
+
+ bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0);
+ break;
+ }
+
case nir_intrinsic_image_size: {
/* Get the referenced image variable and type. */
const nir_variable *var = instr->variables[0]->var;
@@ -1509,7 +1527,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[0]),
fs_reg(stage_prog_data->binding_table.ssbo_start));
- surf_index = bld.emit_uniformize(surf_index);
/* Assume this may touch any UBO. It would be nice to provide
* a tighter bound, but the array information is already lowered away.
@@ -1520,34 +1537,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
/* Get the offset to read from */
- fs_reg offset_reg = vgrf(glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
+ fs_reg offset_reg;
if (has_indirect) {
- bld.MOV(offset_reg, get_nir_src(instr->src[1]));
+ offset_reg = get_nir_src(instr->src[1]);
} else {
- const_offset_bytes = instr->const_index[0];
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
+ offset_reg = fs_reg(instr->const_index[0]);
}
/* Read the vector */
- for (int i = 0; i < instr->num_components; i++) {
- fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
- read_result.type = dest.type;
- bld.MOV(dest, read_result);
- dest = offset(dest, bld, 1);
-
- /* Vector components are stored contiguous in memory */
- if (i < instr->num_components) {
- if (!has_indirect) {
- const_offset_bytes += 4;
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- } else {
- bld.ADD(offset_reg, offset_reg, brw_imm_ud(4));
- }
- }
- }
+ fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg,
+ 1 /* dims */,
+ instr->num_components,
+ BRW_PREDICATE_NONE);
+ read_result.type = dest.type;
+ for (int i = 0; i < instr->num_components; i++)
+ bld.MOV(offset(dest, bld, i), offset(read_result, bld, i));
break;
}
@@ -1765,52 +1769,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
surf_index = vgrf(glsl_type::uint_type);
bld.ADD(surf_index, get_nir_src(instr->src[1]),
fs_reg(stage_prog_data->binding_table.ssbo_start));
- surf_index = bld.emit_uniformize(surf_index);
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
nir->info.num_ssbos - 1);
}
- /* Offset */
- fs_reg offset_reg = vgrf(glsl_type::uint_type);
- unsigned const_offset_bytes = 0;
- if (has_indirect) {
- bld.MOV(offset_reg, get_nir_src(instr->src[2]));
- } else {
- const_offset_bytes = instr->const_index[0];
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- }
-
/* Value */
fs_reg val_reg = get_nir_src(instr->src[0]);
/* Writemask */
unsigned writemask = instr->const_index[1];
- /* Write each component present in the writemask */
- unsigned skipped_channels = 0;
- for (int i = 0; i < instr->num_components; i++) {
- int component_mask = 1 << i;
- if (writemask & component_mask) {
- if (skipped_channels) {
- if (!has_indirect) {
- const_offset_bytes += 4 * skipped_channels;
- bld.MOV(offset_reg, fs_reg(const_offset_bytes));
- } else {
- bld.ADD(offset_reg, offset_reg,
- brw_imm_ud(4 * skipped_channels));
- }
- skipped_channels = 0;
- }
+ /* Combine groups of consecutive enabled channels in one write
+ * message. We use ffs to find the first enabled channel and then ffs on
+ * the bit-inverse, down-shifted writemask to determine the length of
+ * the block of enabled bits.
+ */
+ while (writemask) {
+ unsigned first_component = ffs(writemask) - 1;
+ unsigned length = ffs(~(writemask >> first_component)) - 1;
+ fs_reg offset_reg;
- emit_untyped_write(bld, surf_index, offset_reg,
- offset(val_reg, bld, i),
- 1 /* dims */, 1 /* size */,
- BRW_PREDICATE_NONE);
+ if (!has_indirect) {
+ offset_reg = fs_reg(instr->const_index[0] + 4 * first_component);
+ } else {
+ offset_reg = vgrf(glsl_type::uint_type);
+ bld.ADD(offset_reg,
+ retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD),
+ fs_reg(4 * first_component));
}
- skipped_channels++;
+ emit_untyped_write(bld, surf_index, offset_reg,
+ offset(val_reg, bld, first_component),
+ 1 /* dims */, length,
+ BRW_PREDICATE_NONE);
+
+ /* Clear the bits in the writemask that we just wrote, then try
+ * again to see if more channels are left.
+ */
+ writemask &= (15 << (first_component + length));
}
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 36388fad98d..9251d9552a5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -330,32 +330,12 @@ count_to_loop_end(const bblock_t *block)
unreachable("not reached");
}
-/**
- * Sets up interference between thread payload registers and the virtual GRFs
- * to be allocated for program temporaries.
- *
- * We want to be able to reallocate the payload for our virtual GRFs, notably
- * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
- * our 128 registers.
- *
- * The layout of the payload registers is:
- *
- * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
- * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
- * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
- *
- * And we have payload_node_count nodes covering these registers in order
- * (note that in SIMD16, a node is two registers).
- */
-void
-fs_visitor::setup_payload_interference(struct ra_graph *g,
- int payload_node_count,
- int first_payload_node)
+void fs_visitor::calculate_payload_ranges(int payload_node_count,
+ int *payload_last_use_ip)
{
int loop_depth = 0;
int loop_end_ip = 0;
- int payload_last_use_ip[payload_node_count];
for (int i = 0; i < payload_node_count; i++)
payload_last_use_ip[i] = -1;
@@ -426,6 +406,33 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
ip++;
}
+}
+
+
+/**
+ * Sets up interference between thread payload registers and the virtual GRFs
+ * to be allocated for program temporaries.
+ *
+ * We want to be able to reallocate the payload for our virtual GRFs, notably
+ * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
+ * our 128 registers.
+ *
+ * The layout of the payload registers is:
+ *
+ * 0..payload.num_regs-1: fixed function setup (including bary coordinates).
+ * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data
+ * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
+ *
+ * And we have payload_node_count nodes covering these registers in order
+ * (note that in SIMD16, a node is two registers).
+ */
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+ int payload_node_count,
+ int first_payload_node)
+{
+ int payload_last_use_ip[payload_node_count];
+ calculate_payload_ranges(payload_node_count, payload_last_use_ip);
for (int i = 0; i < payload_node_count; i++) {
if (payload_last_use_ip[i] == -1)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 7cc4f3c927a..5c57944ca39 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
const fs_reg dst_depth = (payload.dest_depth_reg ?
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
fs_reg());
- fs_reg src_depth;
+ fs_reg src_depth, src_stencil;
if (source_depth_to_render_target) {
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@@ -706,10 +706,14 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
+ src_stencil = frag_stencil;
+
const fs_reg sources[] = {
- color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
- fs_reg(components)
+ color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
+ sample_mask, fs_reg(components)
};
+ assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS);
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
sources, ARRAY_SIZE(sources));
@@ -740,6 +744,16 @@ fs_visitor::emit_fb_writes()
no16("Missing support for simd16 depth writes on gen6\n");
}
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+ /* From the 'Render Target Write message' section of the docs:
+ * "Output Stencil is not supported with SIMD16 Render Target Write
+ * Messages."
+ *
+ * FINISHME: split 16 into 2 8s
+ */
+ no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+ }
+
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 00125c0f405..76ed237d88a 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_GS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->gs.base,
- &brw->gs.prog_data->base.base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ &brw->gs.base, &brw->gs.prog_data->base.base);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 7726e4b78a0..4417555f18e 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -97,7 +97,9 @@ byte_offset(fs_reg reg, unsigned delta)
case MRF:
reg.reg += delta / 32;
break;
- default:
+ case IMM:
+ case HW_REG:
+ case UNIFORM:
assert(delta == 0);
}
reg.subreg_offset += delta % 32;
@@ -119,7 +121,7 @@ horiz_offset(fs_reg reg, unsigned delta)
case MRF:
case ATTR:
return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
- default:
+ case HW_REG:
assert(delta == 0);
}
return reg;
@@ -163,7 +165,6 @@ half(fs_reg reg, unsigned idx)
case ATTR:
case HW_REG:
- default:
unreachable("Cannot take half of this register type");
}
return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 1b57b65db27..29642c6d2a4 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -161,9 +161,6 @@ public:
const src_reg &src1 = src_reg(),
const src_reg &src2 = src_reg());
- struct brw_reg get_dst(unsigned gen);
- struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i);
-
dst_reg dst;
src_reg src[3];
@@ -186,6 +183,27 @@ public:
return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
}
+ bool reads_flag(unsigned c)
+ {
+ if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2)
+ return true;
+
+ switch (predicate) {
+ case BRW_PREDICATE_NONE:
+ return false;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_X:
+ return c == 0;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_Y:
+ return c == 1;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_Z:
+ return c == 2;
+ case BRW_PREDICATE_ALIGN16_REPLICATE_W:
+ return c == 3;
+ default:
+ return true;
+ }
+ }
+
bool writes_flag()
{
return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 9a33188cb5c..8c1a34ee17a 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -205,6 +205,9 @@ brw_create_nir(struct brw_context *brw,
if (shader_prog) {
nir_lower_samplers(nir, shader_prog);
nir_validate_shader(nir);
+
+ nir_lower_atomics(nir, shader_prog);
+ nir_validate_shader(nir);
}
brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar);
@@ -278,9 +281,6 @@ brw_postprocess_nir(nir_shader *nir,
nir_lower_system_values(nir);
nir_validate_shader(nir);
- nir_lower_atomics(nir);
- nir_validate_shader(nir);
-
nir_optimize(nir, is_scalar);
if (devinfo->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 87e7e011541..083c46a3726 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -205,7 +205,7 @@ enum PACKED brw_reg_type {
/** @} */
/** Immediates only: @{ */
- BRW_REGISTER_TYPE_UV,
+ BRW_REGISTER_TYPE_UV, /* Gen6+ */
BRW_REGISTER_TYPE_V,
BRW_REGISTER_TYPE_VF,
/** @} */
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index b710c60148c..88c45f74333 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -26,6 +26,7 @@
*/
#include "brw_fs.h"
+#include "brw_fs_live_variables.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
#include "brw_shader.h"
@@ -400,22 +401,49 @@ schedule_node::set_latency_gen7(bool is_haswell)
class instruction_scheduler {
public:
instruction_scheduler(backend_shader *s, int grf_count,
+ int hw_reg_count, int block_count,
instruction_scheduler_mode mode)
{
this->bs = s;
this->mem_ctx = ralloc_context(NULL);
this->grf_count = grf_count;
+ this->hw_reg_count = hw_reg_count;
this->instructions.make_empty();
this->instructions_to_schedule = 0;
this->post_reg_alloc = (mode == SCHEDULE_POST);
this->mode = mode;
this->time = 0;
if (!post_reg_alloc) {
- this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count);
- this->grf_active = rzalloc_array(mem_ctx, bool, grf_count);
+ this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count);
+
+ this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
+ for (int i = 0; i < block_count; i++)
+ this->livein[i] = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(grf_count));
+
+ this->liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
+ for (int i = 0; i < block_count; i++)
+ this->liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(grf_count));
+
+ this->hw_liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count);
+ for (int i = 0; i < block_count; i++)
+ this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(hw_reg_count));
+
+ this->written = rzalloc_array(mem_ctx, bool, grf_count);
+
+ this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count);
+
+ this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count);
} else {
- this->remaining_grf_uses = NULL;
- this->grf_active = NULL;
+ this->reg_pressure_in = NULL;
+ this->livein = NULL;
+ this->liveout = NULL;
+ this->hw_liveout = NULL;
+ this->written = NULL;
+ this->reads_remaining = NULL;
+ this->hw_reads_remaining = NULL;
}
}
@@ -442,7 +470,8 @@ public:
*/
virtual int issue_time(backend_instruction *inst) = 0;
- virtual void count_remaining_grf_uses(backend_instruction *inst) = 0;
+ virtual void count_reads_remaining(backend_instruction *inst) = 0;
+ virtual void setup_liveness(cfg_t *cfg) = 0;
virtual void update_register_pressure(backend_instruction *inst) = 0;
virtual int get_register_pressure_benefit(backend_instruction *inst) = 0;
@@ -453,33 +482,63 @@ public:
bool post_reg_alloc;
int instructions_to_schedule;
int grf_count;
+ int hw_reg_count;
int time;
+ int reg_pressure;
+ int block_idx;
exec_list instructions;
backend_shader *bs;
instruction_scheduler_mode mode;
- /**
- * Number of instructions left to schedule that reference each vgrf.
- *
- * Used so that we can prefer scheduling instructions that will end the
- * live intervals of multiple variables, to reduce register pressure.
+ /*
+ * The register pressure at the beginning of each basic block.
*/
- int *remaining_grf_uses;
- /**
- * Tracks whether each VGRF has had an instruction scheduled that uses it.
- *
- * This is used to estimate whether scheduling a new instruction will
- * increase register pressure.
+ int *reg_pressure_in;
+
+ /*
+ * The virtual GRF's whose range overlaps the beginning of each basic block.
+ */
+
+ BITSET_WORD **livein;
+
+ /*
+ * The virtual GRF's whose range overlaps the end of each basic block.
+ */
+
+ BITSET_WORD **liveout;
+
+ /*
+ * The hardware GRF's whose range overlaps the end of each basic block.
+ */
+
+ BITSET_WORD **hw_liveout;
+
+ /*
+ * Whether we've scheduled a write for this virtual GRF yet.
+ */
+
+ bool *written;
+
+ /*
+ * How many reads we haven't scheduled for this virtual GRF yet.
+ */
+
+ int *reads_remaining;
+
+ /*
+ * How many reads we haven't scheduled for this hardware GRF yet.
*/
- bool *grf_active;
+
+ int *hw_reads_remaining;
};
class fs_instruction_scheduler : public instruction_scheduler
{
public:
- fs_instruction_scheduler(fs_visitor *v, int grf_count,
+ fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count,
+ int block_count,
instruction_scheduler_mode mode);
void calculate_deps();
bool is_compressed(fs_inst *inst);
@@ -487,35 +546,109 @@ public:
int issue_time(backend_instruction *inst);
fs_visitor *v;
- void count_remaining_grf_uses(backend_instruction *inst);
+ void count_reads_remaining(backend_instruction *inst);
+ void setup_liveness(cfg_t *cfg);
void update_register_pressure(backend_instruction *inst);
int get_register_pressure_benefit(backend_instruction *inst);
};
fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
- int grf_count,
+ int grf_count, int hw_reg_count,
+ int block_count,
instruction_scheduler_mode mode)
- : instruction_scheduler(v, grf_count, mode),
+ : instruction_scheduler(v, grf_count, hw_reg_count, block_count, mode),
v(v)
{
}
+static bool
+is_src_duplicate(fs_inst *inst, int src)
+{
+ for (int i = 0; i < src; i++)
+ if (inst->src[i].equals(inst->src[src]))
+ return true;
+
+ return false;
+}
+
void
-fs_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be)
+fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
{
fs_inst *inst = (fs_inst *)be;
- if (!remaining_grf_uses)
+ if (!reads_remaining)
return;
- if (inst->dst.file == GRF)
- remaining_grf_uses[inst->dst.reg]++;
-
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file != GRF)
+ if (is_src_duplicate(inst, i))
continue;
- remaining_grf_uses[inst->src[i].reg]++;
+ if (inst->src[i].file == GRF) {
+ reads_remaining[inst->src[i].reg]++;
+ } else if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ if (inst->src[i].fixed_hw_reg.nr >= hw_reg_count)
+ continue;
+
+ for (int j = 0; j < inst->regs_read(i); j++)
+ hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + j]++;
+ }
+ }
+}
+
+void
+fs_instruction_scheduler::setup_liveness(cfg_t *cfg)
+{
+ /* First, compute liveness on a per-GRF level using the in/out sets from
+ * liveness calculation.
+ */
+ for (int block = 0; block < cfg->num_blocks; block++) {
+ for (int i = 0; i < v->live_intervals->num_vars; i++) {
+ if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) {
+ int vgrf = v->live_intervals->vgrf_from_var[i];
+ if (!BITSET_TEST(livein[block], vgrf)) {
+ reg_pressure_in[block] += v->alloc.sizes[vgrf];
+ BITSET_SET(livein[block], vgrf);
+ }
+ }
+
+ if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i))
+ BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]);
+ }
+ }
+
+ /* Now, extend the live in/live out sets for when a range crosses a block
+ * boundary, which matches what our register allocator/interference code
+ * does to account for force_writemask_all and incompatible exec_mask's.
+ */
+ for (int block = 0; block < cfg->num_blocks - 1; block++) {
+ for (int i = 0; i < grf_count; i++) {
+ if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip &&
+ v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) {
+ if (!BITSET_TEST(livein[block + 1], i)) {
+ reg_pressure_in[block + 1] += v->alloc.sizes[i];
+ BITSET_SET(livein[block + 1], i);
+ }
+
+ BITSET_SET(liveout[block], i);
+ }
+ }
+ }
+
+ int payload_last_use_ip[hw_reg_count];
+ v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip);
+
+ for (int i = 0; i < hw_reg_count; i++) {
+ if (payload_last_use_ip[i] == -1)
+ continue;
+
+ for (int block = 0; block < cfg->num_blocks; block++) {
+ if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i])
+ reg_pressure_in[block]++;
+
+ if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i])
+ BITSET_SET(hw_liveout[block], i);
+ }
}
}
@@ -524,18 +657,24 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
{
fs_inst *inst = (fs_inst *)be;
- if (!remaining_grf_uses)
+ if (!reads_remaining)
return;
if (inst->dst.file == GRF) {
- remaining_grf_uses[inst->dst.reg]--;
- grf_active[inst->dst.reg] = true;
+ written[inst->dst.reg] = true;
}
for (int i = 0; i < inst->sources; i++) {
+ if (is_src_duplicate(inst, i))
+ continue;
+
if (inst->src[i].file == GRF) {
- remaining_grf_uses[inst->src[i].reg]--;
- grf_active[inst->src[i].reg] = true;
+ reads_remaining[inst->src[i].reg]--;
+ } else if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
+ inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+ for (int off = 0; off < inst->regs_read(i); off++)
+ hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + off]--;
}
}
}
@@ -547,20 +686,31 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
int benefit = 0;
if (inst->dst.file == GRF) {
- if (remaining_grf_uses[inst->dst.reg] == 1)
- benefit += v->alloc.sizes[inst->dst.reg];
- if (!grf_active[inst->dst.reg])
+ if (!BITSET_TEST(livein[block_idx], inst->dst.reg) &&
+ !written[inst->dst.reg])
benefit -= v->alloc.sizes[inst->dst.reg];
}
for (int i = 0; i < inst->sources; i++) {
- if (inst->src[i].file != GRF)
+ if (is_src_duplicate(inst, i))
continue;
- if (remaining_grf_uses[inst->src[i].reg] == 1)
+ if (inst->src[i].file == GRF &&
+ !BITSET_TEST(liveout[block_idx], inst->src[i].reg) &&
+ reads_remaining[inst->src[i].reg] == 1)
benefit += v->alloc.sizes[inst->src[i].reg];
- if (!grf_active[inst->src[i].reg])
- benefit -= v->alloc.sizes[inst->src[i].reg];
+
+ if (inst->src[i].file == HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
+ inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+ for (int off = 0; off < inst->regs_read(i); off++) {
+ int reg = inst->src[i].fixed_hw_reg.nr + off;
+ if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
+ hw_reads_remaining[reg] == 1) {
+ benefit++;
+ }
+ }
+ }
}
return benefit;
@@ -575,20 +725,26 @@ public:
int issue_time(backend_instruction *inst);
vec4_visitor *v;
- void count_remaining_grf_uses(backend_instruction *inst);
+ void count_reads_remaining(backend_instruction *inst);
+ void setup_liveness(cfg_t *cfg);
void update_register_pressure(backend_instruction *inst);
int get_register_pressure_benefit(backend_instruction *inst);
};
vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
int grf_count)
- : instruction_scheduler(v, grf_count, SCHEDULE_POST),
+ : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST),
v(v)
{
}
void
-vec4_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be)
+vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be)
+{
+}
+
+void
+vec4_instruction_scheduler::setup_liveness(cfg_t *cfg)
{
}
@@ -822,7 +978,7 @@ fs_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -927,10 +1083,10 @@ fs_instruction_scheduler::calculate_deps()
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
for (int r = 0; r < inst->regs_read(i); r++)
- add_dep(n, last_grf_write[inst->src[i].reg + r]);
+ add_dep(n, last_grf_write[inst->src[i].reg + r], 0);
} else {
for (int r = 0; r < inst->regs_read(i); r++) {
- add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]);
+ add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0);
}
}
} else if (inst->src[i].file == HW_REG &&
@@ -941,17 +1097,17 @@ fs_instruction_scheduler::calculate_deps()
if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
size = 1;
for (int r = 0; r < size; r++)
- add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
+ add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
} else {
- add_dep(n, last_fixed_grf_write);
+ add_dep(n, last_fixed_grf_write, 0);
}
} else if (inst->src[i].is_accumulator()) {
- add_dep(n, last_accumulator_write);
+ add_dep(n, last_accumulator_write, 0);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF);
add_barrier_deps(n);
}
@@ -1080,7 +1236,7 @@ vec4_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
/* No reads from MRF, and ATTR is already translated away */
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
@@ -1177,7 +1333,7 @@ vec4_instruction_scheduler::calculate_deps()
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM &&
(inst->src[i].file != HW_REG ||
- inst->src[i].fixed_hw_reg.file != IMM)) {
+ inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
assert(inst->src[i].file != MRF &&
inst->src[i].file != ATTR);
add_barrier_deps(n);
@@ -1387,6 +1543,9 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
const struct brw_device_info *devinfo = bs->devinfo;
backend_instruction *inst = block->end();
time = 0;
+ if (!post_reg_alloc)
+ reg_pressure = reg_pressure_in[block->num];
+ block_idx = block->num;
/* Remove non-DAG heads from the list. */
foreach_in_list_safe(schedule_node, n, &instructions) {
@@ -1403,23 +1562,30 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
chosen->remove();
inst->insert_before(block, chosen->inst);
instructions_to_schedule--;
- update_register_pressure(chosen->inst);
- /* Update the clock for how soon an instruction could start after the
- * chosen one.
- */
- time += issue_time(chosen->inst);
+ if (!post_reg_alloc) {
+ reg_pressure -= get_register_pressure_benefit(chosen->inst);
+ update_register_pressure(chosen->inst);
+ }
/* If we expected a delay for scheduling, then bump the clock to reflect
- * that as well. In reality, the hardware will switch to another
- * hyperthread and may not return to dispatching our thread for a while
- * even after we're unblocked.
+ * that. In reality, the hardware will switch to another hyperthread
+ * and may not return to dispatching our thread for a while even after
+ * we're unblocked. After this, we have the time when the chosen
+ * instruction will start executing.
*/
time = MAX2(time, chosen->unblocked_time);
+ /* Update the clock for how soon an instruction could start after the
+ * chosen one.
+ */
+ time += issue_time(chosen->inst);
+
if (debug) {
fprintf(stderr, "clock %4d, scheduled: ", time);
bs->dump_instruction(chosen->inst);
+ if (!post_reg_alloc)
+ fprintf(stderr, "(register pressure %d)\n", reg_pressure);
}
/* Now that we've scheduled a new instruction, some of its
@@ -1466,30 +1632,53 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
if (block->end()->opcode == BRW_OPCODE_NOP)
block->end()->remove(block);
assert(instructions_to_schedule == 0);
+
+ block->cycle_count = time;
+}
+
+static unsigned get_cycle_count(cfg_t *cfg)
+{
+ unsigned count = 0, multiplier = 1;
+ foreach_block(block, cfg) {
+ if (block->start()->opcode == BRW_OPCODE_DO)
+ multiplier *= 10; /* assume that loops execute ~10 times */
+
+ count += block->cycle_count * multiplier;
+
+ if (block->end()->opcode == BRW_OPCODE_WHILE)
+ multiplier /= 10;
+ }
+
+ return count;
}
void
instruction_scheduler::run(cfg_t *cfg)
{
- if (debug) {
+ if (debug && !post_reg_alloc) {
fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n",
post_reg_alloc);
- bs->dump_instructions();
+ bs->dump_instructions();
}
- /* Populate the remaining GRF uses array to improve the pre-regalloc
- * scheduling.
- */
- if (remaining_grf_uses) {
- foreach_block_and_inst(block, backend_instruction, inst, cfg) {
- count_remaining_grf_uses(inst);
- }
- }
+ if (!post_reg_alloc)
+ setup_liveness(cfg);
foreach_block(block, cfg) {
if (block->end_ip - block->start_ip <= 1)
continue;
+ if (reads_remaining) {
+ memset(reads_remaining, 0,
+ grf_count * sizeof(*reads_remaining));
+ memset(hw_reads_remaining, 0,
+ hw_reg_count * sizeof(*hw_reads_remaining));
+ memset(written, 0, grf_count * sizeof(*written));
+
+ foreach_inst_in_block(fs_inst, inst, block)
+ count_reads_remaining(inst);
+ }
+
add_insts_from_block(block);
calculate_deps();
@@ -1501,23 +1690,29 @@ instruction_scheduler::run(cfg_t *cfg)
schedule_instructions(block);
}
- if (debug) {
+ if (debug && !post_reg_alloc) {
fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n",
post_reg_alloc);
bs->dump_instructions();
}
+
+ cfg->cycle_count = get_cycle_count(cfg);
}
void
fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
{
+ if (mode != SCHEDULE_POST)
+ calculate_live_intervals();
+
int grf_count;
if (mode == SCHEDULE_POST)
grf_count = grf_used;
else
grf_count = alloc.count;
- fs_instruction_scheduler sched(this, grf_count, mode);
+ fs_instruction_scheduler sched(this, grf_count, first_non_payload_grf,
+ cfg->num_blocks, mode);
sched.run(cfg);
if (unlikely(debug_enabled) && mode == SCHEDULE_POST) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index e48f559afa7..063cb84a958 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -298,6 +298,8 @@ brw_instruction_name(enum opcode op)
return "fb_write";
case FS_OPCODE_FB_WRITE_LOGICAL:
return "fb_write_logical";
+ case FS_OPCODE_PACK_STENCIL_REF:
+ return "pack_stencil_ref";
case FS_OPCODE_BLORP_FB_WRITE:
return "blorp_fb_write";
case FS_OPCODE_REP_FB_WRITE:
@@ -988,6 +990,20 @@ backend_instruction::has_side_effects() const
}
}
+bool
+backend_instruction::is_volatile() const
+{
+ switch (opcode) {
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ return true;
+ default:
+ return false;
+ }
+}
+
#ifndef NDEBUG
static bool
inst_is_in_block(const bblock_t *block, const backend_instruction *inst)
@@ -1178,9 +1194,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
}
- if (shader_prog && shader_prog->NumAtomicBuffers) {
+ if (shader && shader->NumAtomicBuffers) {
stage_prog_data->binding_table.abo_start = next_binding_table_offset;
- next_binding_table_offset += shader_prog->NumAtomicBuffers;
+ next_binding_table_offset += shader->NumAtomicBuffers;
} else {
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 8899b30c1ae..f4647cca4f9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -115,6 +115,12 @@ struct backend_instruction : public exec_node {
* optimize these out unless you know what you are doing.
*/
bool has_side_effects() const;
+
+ /**
+ * True if the instruction might be affected by side effects of other
+ * instructions.
+ */
+ bool is_volatile() const;
#else
struct backend_instruction {
struct exec_node link;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index dc2b9415673..2aa1248fea6 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -49,6 +49,7 @@ extern const struct brw_tracked_state brw_clip_unit;
extern const struct brw_tracked_state brw_vs_pull_constants;
extern const struct brw_tracked_state brw_gs_pull_constants;
extern const struct brw_tracked_state brw_wm_pull_constants;
+extern const struct brw_tracked_state brw_cs_pull_constants;
extern const struct brw_tracked_state brw_constant_buffer;
extern const struct brw_tracked_state brw_curbe_offsets;
extern const struct brw_tracked_state brw_invariant_state;
@@ -220,7 +221,7 @@ bool brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
- uint32_t *inout_offset, void *out_aux);
+ uint32_t *inout_offset, void *inout_aux);
void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
@@ -345,7 +346,8 @@ calculate_attr_overrides(const struct brw_context *brw,
uint16_t *attr_overrides,
uint32_t *point_sprite_enables,
uint32_t *flat_enables,
- uint32_t *urb_entry_read_length);
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset);
/* gen6_surface_state.c */
void gen6_init_vtable_surface_functions(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 2fbcd146750..f7c0a2037d9 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -137,7 +137,7 @@ bool
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key, GLuint key_size,
- uint32_t *inout_offset, void *out_aux)
+ uint32_t *inout_offset, void *inout_aux)
{
struct brw_context *brw = cache->brw;
struct brw_cache_item *item;
@@ -155,11 +155,12 @@ brw_search_cache(struct brw_cache *cache,
if (item == NULL)
return false;
- *(void **)out_aux = ((char *)item->key + item->key_size);
+ void *aux = ((char *) item->key) + item->key_size;
- if (item->offset != *inout_offset) {
+ if (item->offset != *inout_offset || aux != *((void **) inout_aux)) {
brw->ctx.NewDriverState |= (1 << cache_id);
*inout_offset = item->offset;
+ *((void **) inout_aux) = aux;
}
return true;
@@ -349,11 +350,6 @@ brw_init_caches(struct brw_context *brw)
4096, 64);
if (brw->has_llc)
drm_intel_gem_bo_map_unsynchronized(cache->bo);
-
- cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
- cache->aux_free[BRW_CACHE_CS_PROG] = brw_stage_prog_data_free;
}
static void
@@ -367,9 +363,12 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
next = c->next;
- if (cache->aux_free[c->cache_id]) {
+ if (c->cache_id == BRW_CACHE_VS_PROG ||
+ c->cache_id == BRW_CACHE_GS_PROG ||
+ c->cache_id == BRW_CACHE_FS_PROG ||
+ c->cache_id == BRW_CACHE_CS_PROG) {
const void *item_aux = c->key + c->key_size;
- cache->aux_free[c->cache_id](item_aux);
+ brw_stage_prog_data_free(item_aux);
}
free((void *)c->key);
free(c);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 79b8301954e..0344b8a7fb0 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
&brw_state_base_address,
&brw_cs_image_surfaces,
&gen7_cs_push_constants,
+ &brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
&brw_texture_surfaces,
@@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
&gen8_state_base_address,
&brw_cs_image_surfaces,
&gen7_cs_push_constants,
+ &brw_cs_pull_constants,
&brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
&brw_texture_surfaces,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3e7078d0b32..01eb1580953 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1370,9 +1370,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
vec4_instruction *inst = (vec4_instruction *)be_inst;
if (inst->predicate) {
- fprintf(file, "(%cf0.%d) ",
+ fprintf(file, "(%cf0.%d%s) ",
inst->predicate_inverse ? '-' : '+',
- inst->flag_subreg);
+ inst->flag_subreg,
+ pred_ctrl_align16[inst->predicate]);
}
fprintf(file, "%s", brw_instruction_name(inst->opcode));
@@ -1426,9 +1427,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case BAD_FILE:
fprintf(file, "(null)");
break;
- default:
- fprintf(file, "???");
- break;
+ case IMM:
+ case ATTR:
+ case UNIFORM:
+ unreachable("not reached");
}
if (inst->dst.writemask != WRITEMASK_XYZW) {
fprintf(file, ".");
@@ -1520,9 +1522,8 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
case BAD_FILE:
fprintf(file, "(null)");
break;
- default:
- fprintf(file, "???");
- break;
+ case MRF:
+ unreachable("not reached");
}
/* Don't print .0; and only VGRFs have reg_offsets and sizes */
@@ -1787,13 +1788,100 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(time, src_reg(value)));
+ emit(MOV(time, value));
vec4_instruction *inst =
emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
inst->mlen = 2;
}
+void
+vec4_visitor::convert_to_hw_regs()
+{
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ for (int i = 0; i < 3; i++) {
+ struct src_reg &src = inst->src[i];
+ struct brw_reg reg;
+ switch (src.file) {
+ case GRF:
+ reg = brw_vec8_grf(src.reg + src.reg_offset, 0);
+ reg.type = src.type;
+ reg.dw1.bits.swizzle = src.swizzle;
+ reg.abs = src.abs;
+ reg.negate = src.negate;
+ break;
+
+ case IMM:
+ reg = brw_imm_reg(src.type);
+ reg.dw1.ud = src.fixed_hw_reg.dw1.ud;
+ break;
+
+ case UNIFORM:
+ reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
+ (src.reg + src.reg_offset) / 2,
+ ((src.reg + src.reg_offset) % 2) * 4),
+ 0, 4, 1);
+ reg.type = src.type;
+ reg.dw1.bits.swizzle = src.swizzle;
+ reg.abs = src.abs;
+ reg.negate = src.negate;
+
+ /* This should have been moved to pull constants. */
+ assert(!src.reladdr);
+ break;
+
+ case HW_REG:
+ assert(src.type == src.fixed_hw_reg.type);
+ continue;
+
+ case BAD_FILE:
+ /* Probably unused. */
+ reg = brw_null_reg();
+ break;
+
+ case MRF:
+ case ATTR:
+ unreachable("not reached");
+ }
+ src.fixed_hw_reg = reg;
+ }
+
+ dst_reg &dst = inst->dst;
+ struct brw_reg reg;
+
+ switch (inst->dst.file) {
+ case GRF:
+ reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ reg.type = dst.type;
+ reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case MRF:
+ assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+ reg = brw_message_reg(dst.reg + dst.reg_offset);
+ reg.type = dst.type;
+ reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case HW_REG:
+ assert(dst.type == dst.fixed_hw_reg.type);
+ reg = dst.fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ reg = brw_null_reg();
+ break;
+
+ case IMM:
+ case ATTR:
+ case UNIFORM:
+ unreachable("not reached");
+ }
+
+ dst.fixed_hw_reg = reg;
+ }
+}
+
bool
vec4_visitor::run()
{
@@ -1862,6 +1950,7 @@ vec4_visitor::run()
OPT(dead_code_eliminate);
OPT(dead_control_flow_eliminate, this);
OPT(opt_copy_propagation);
+ OPT(opt_cmod_propagation);
OPT(opt_cse);
OPT(opt_algebraic);
OPT(opt_register_coalesce);
@@ -1914,6 +2003,8 @@ vec4_visitor::run()
opt_set_dependency_control();
+ convert_to_hw_regs();
+
if (last_scratch > 0) {
prog_data->base.total_scratch =
brw_get_scratch_size(last_scratch * REG_SIZE);
@@ -2020,9 +2111,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
return NULL;
}
- vec4_generator g(compiler, log_data, &prog_data->base,
- mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
- assembly = g.generate_assembly(v.cfg, final_assembly_size, shader);
+ assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
+ shader, &prog_data->base, v.cfg,
+ final_assembly_size);
}
return assembly;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index d861b2e85df..ec8abf49cd8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -52,6 +52,15 @@ extern "C" {
extern "C" {
#endif
+const unsigned *
+brw_vec4_generate_assembly(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg,
+ unsigned *out_assembly_size);
+
#ifdef __cplusplus
} /* extern "C" */
@@ -149,6 +158,7 @@ public:
int var_range_start(unsigned v, unsigned n) const;
int var_range_end(unsigned v, unsigned n) const;
bool virtual_grf_interferes(int a, int b);
+ bool opt_cmod_propagation();
bool opt_copy_propagation(bool do_constant_prop = true);
bool opt_cse_local(bblock_t *block);
bool opt_cse();
@@ -158,6 +168,7 @@ public:
bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
void opt_set_dependency_control();
void opt_schedule_instructions();
+ void convert_to_hw_regs();
vec4_instruction *emit(vec4_instruction *inst);
@@ -381,117 +392,6 @@ private:
unsigned last_scratch; /**< measured in 32-byte (register size) units */
};
-
-/**
- * The vertex shader code generator.
- *
- * Translates VS IR to actual i965 assembly code.
- */
-class vec4_generator
-{
-public:
- vec4_generator(const struct brw_compiler *compiler, void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev);
- ~vec4_generator();
-
- const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size,
- const nir_shader *nir);
-
-private:
- void generate_code(const cfg_t *cfg, const nir_shader *nir);
-
- void generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
-
- void generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index);
-
- void generate_vs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write(vec4_instruction *inst);
- void generate_gs_urb_write_allocate(vec4_instruction *inst);
- void generate_gs_thread_end(vec4_instruction *inst);
- void generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src);
- void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src);
- void generate_gs_prepare_channel_masks(struct brw_reg dst);
- void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
- void generate_gs_get_instance_id(struct brw_reg dst);
- void generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2);
- void generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1);
- void generate_gs_set_primitive_id(struct brw_reg dst);
- void generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index);
- void generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
- void generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index);
- void generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
- void generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset);
- void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst);
-
- void generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index);
-
- void generate_unpack_flags(struct brw_reg dst);
-
- const struct brw_compiler *compiler;
- void *log_data; /* Passed to compiler->*_log functions */
-
- const struct brw_device_info *devinfo;
-
- struct brw_codegen *p;
-
- struct brw_vue_prog_data *prog_data;
-
- void *mem_ctx;
- const char *stage_name;
- const char *stage_abbrev;
- const bool debug_flag;
-};
-
} /* namespace brw */
#endif /* __cplusplus */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
new file mode 100644
index 00000000000..329f24269ce
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/** @file brw_vec4_cmod_propagation.cpp
+ *
+ * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check
+ * brw_fs_cmod_propagation for further details on the rationale behind this
+ * optimization.
+ */
+
+#include "brw_vec4.h"
+#include "brw_cfg.h"
+
+namespace brw {
+
+static bool
+opt_cmod_propagation_local(bblock_t *block)
+{
+ bool progress = false;
+ int ip = block->end_ip + 1;
+
+ foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
+ ip--;
+
+ if ((inst->opcode != BRW_OPCODE_AND &&
+ inst->opcode != BRW_OPCODE_CMP &&
+ inst->opcode != BRW_OPCODE_MOV) ||
+ inst->predicate != BRW_PREDICATE_NONE ||
+ !inst->dst.is_null() ||
+ inst->src[0].file != GRF ||
+ inst->src[0].abs)
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_AND &&
+ !(inst->src[1].is_one() &&
+ inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ !inst->src[0].negate))
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
+ continue;
+
+ if (inst->opcode == BRW_OPCODE_MOV &&
+ inst->conditional_mod != BRW_CONDITIONAL_NZ)
+ continue;
+
+ bool read_flag = false;
+ foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
+ if (inst->src[0].in_range(scan_inst->dst,
+ scan_inst->regs_written)) {
+ if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
+ scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
+ (scan_inst->dst.writemask != WRITEMASK_X &&
+ scan_inst->dst.writemask != WRITEMASK_XYZW) ||
+ (scan_inst->dst.writemask == WRITEMASK_XYZW &&
+ inst->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
+ (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) {
+ break;
+ }
+
+ /* CMP's result is the same regardless of dest type. */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ scan_inst->opcode == BRW_OPCODE_CMP &&
+ (inst->dst.type == BRW_REGISTER_TYPE_D ||
+ inst->dst.type == BRW_REGISTER_TYPE_UD)) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+
+ /* If the AND wasn't handled by the previous case, it isn't safe
+ * to remove it.
+ */
+ if (inst->opcode == BRW_OPCODE_AND)
+ break;
+
+ /* Comparisons operate differently for ints and floats */
+ if (scan_inst->dst.type != inst->dst.type &&
+ (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
+ inst->dst.type == BRW_REGISTER_TYPE_F))
+ break;
+
+ /* If the instruction generating inst's source also wrote the
+ * flag, and inst is doing a simple .nz comparison, then inst
+ * is redundant - the appropriate value is already in the flag
+ * register. Delete inst.
+ */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+ !inst->src[0].negate &&
+ scan_inst->writes_flag()) {
+ inst->remove(block);
+ progress = true;
+ break;
+ }
+
+ /* Otherwise, try propagating the conditional. */
+ enum brw_conditional_mod cond =
+ inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
+ : inst->conditional_mod;
+
+ if (scan_inst->can_do_cmod() &&
+ ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
+ scan_inst->conditional_mod == cond)) {
+ scan_inst->conditional_mod = cond;
+ inst->remove(block);
+ progress = true;
+ }
+ break;
+ }
+
+ if (scan_inst->writes_flag())
+ break;
+
+ read_flag = read_flag || scan_inst->reads_flag();
+ }
+ }
+
+ return progress;
+}
+
+bool
+vec4_visitor::opt_cmod_propagation()
+{
+ bool progress = false;
+
+ foreach_block_reverse(block, cfg) {
+ progress = opt_cmod_propagation_local(block) || progress;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 8fc7a365bfc..284e0a8d0a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -78,13 +78,19 @@ vec4_visitor::dead_code_eliminate()
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
- if (inst->dst.file == GRF && !inst->has_side_effects()) {
+ if ((inst->dst.file == GRF && !inst->has_side_effects()) ||
+ (inst->dst.is_null() && inst->writes_flag())){
bool result_live[4] = { false };
- for (unsigned i = 0; i < inst->regs_written; i++) {
- for (int c = 0; c < 4; c++)
- result_live[c] |= BITSET_TEST(
- live, var_from_reg(alloc, offset(inst->dst, i), c));
+ if (inst->dst.file == GRF) {
+ for (unsigned i = 0; i < inst->regs_written; i++) {
+ for (int c = 0; c < 4; c++)
+ result_live[c] |= BITSET_TEST(
+ live, var_from_reg(alloc, offset(inst->dst, i), c));
+ }
+ } else {
+ for (unsigned c = 0; c < 4; c++)
+ result_live[c] = BITSET_TEST(flag_live, c);
}
/* If the instruction can't do writemasking, then it's all or
@@ -117,7 +123,11 @@ vec4_visitor::dead_code_eliminate()
}
if (inst->dst.is_null() && inst->writes_flag()) {
- if (!BITSET_TEST(flag_live, 0)) {
+ bool combined_live = false;
+ for (unsigned c = 0; c < 4; c++)
+ combined_live |= BITSET_TEST(flag_live, c);
+
+ if (!combined_live) {
inst->opcode = BRW_OPCODE_NOP;
progress = true;
continue;
@@ -136,7 +146,8 @@ vec4_visitor::dead_code_eliminate()
}
if (inst->writes_flag()) {
- BITSET_CLEAR(flag_live, 0);
+ for (unsigned c = 0; c < 4; c++)
+ BITSET_CLEAR(flag_live, c);
}
for (int i = 0; i < 3; i++) {
@@ -150,8 +161,10 @@ vec4_visitor::dead_code_eliminate()
}
}
- if (inst->reads_flag()) {
- BITSET_SET(flag_live, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (inst->reads_flag(c)) {
+ BITSET_SET(flag_live, c);
+ }
}
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a84f6c47471..8bc21df5ffc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -20,146 +20,17 @@
* IN THE SOFTWARE.
*/
-#include <ctype.h>
#include "glsl/glsl_parser_extras.h"
#include "brw_vec4.h"
#include "brw_cfg.h"
-extern "C" {
-#include "brw_eu.h"
-#include "main/macros.h"
-#include "program/prog_print.h"
-#include "program/prog_parameter.h"
-};
+using namespace brw;
-namespace brw {
-
-struct brw_reg
-vec4_instruction::get_dst(unsigned gen)
-{
- struct brw_reg brw_reg;
-
- switch (dst.file) {
- case GRF:
- brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
- brw_reg = retype(brw_reg, dst.type);
- brw_reg.dw1.bits.writemask = dst.writemask;
- break;
-
- case MRF:
- assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen));
- brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
- brw_reg = retype(brw_reg, dst.type);
- brw_reg.dw1.bits.writemask = dst.writemask;
- break;
-
- case HW_REG:
- assert(dst.type == dst.fixed_hw_reg.type);
- brw_reg = dst.fixed_hw_reg;
- break;
-
- case BAD_FILE:
- brw_reg = brw_null_reg();
- break;
-
- default:
- unreachable("not reached");
- }
- return brw_reg;
-}
-
-struct brw_reg
-vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
-{
- struct brw_reg brw_reg;
-
- switch (src[i].file) {
- case GRF:
- brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
- brw_reg = retype(brw_reg, src[i].type);
- brw_reg.dw1.bits.swizzle = src[i].swizzle;
- if (src[i].abs)
- brw_reg = brw_abs(brw_reg);
- if (src[i].negate)
- brw_reg = negate(brw_reg);
- break;
-
- case IMM:
- switch (src[i].type) {
- case BRW_REGISTER_TYPE_F:
- brw_reg = brw_imm_f(src[i].fixed_hw_reg.dw1.f);
- break;
- case BRW_REGISTER_TYPE_D:
- brw_reg = brw_imm_d(src[i].fixed_hw_reg.dw1.d);
- break;
- case BRW_REGISTER_TYPE_UD:
- brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud);
- break;
- case BRW_REGISTER_TYPE_VF:
- brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud);
- break;
- default:
- unreachable("not reached");
- }
- break;
-
- case UNIFORM:
- brw_reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
- (src[i].reg + src[i].reg_offset) / 2,
- ((src[i].reg + src[i].reg_offset) % 2) * 4),
- 0, 4, 1);
- brw_reg = retype(brw_reg, src[i].type);
- brw_reg.dw1.bits.swizzle = src[i].swizzle;
- if (src[i].abs)
- brw_reg = brw_abs(brw_reg);
- if (src[i].negate)
- brw_reg = negate(brw_reg);
-
- /* This should have been moved to pull constants. */
- assert(!src[i].reladdr);
- break;
-
- case HW_REG:
- assert(src[i].type == src[i].fixed_hw_reg.type);
- brw_reg = src[i].fixed_hw_reg;
- break;
-
- case BAD_FILE:
- /* Probably unused. */
- brw_reg = brw_null_reg();
- break;
- case ATTR:
- default:
- unreachable("not reached");
- }
-
- return brw_reg;
-}
-
-vec4_generator::vec4_generator(const struct brw_compiler *compiler,
- void *log_data,
- struct brw_vue_prog_data *prog_data,
- void *mem_ctx,
- bool debug_flag,
- const char *stage_name,
- const char *stage_abbrev)
- : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
- prog_data(prog_data),
- mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
- debug_flag(debug_flag)
-{
- p = rzalloc(mem_ctx, struct brw_codegen);
- brw_init_codegen(devinfo, p, mem_ctx);
-}
-
-vec4_generator::~vec4_generator()
-{
-}
-
-void
-vec4_generator::generate_math1_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_math1_gen4(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
{
gen4_math(p,
dst,
@@ -178,11 +49,12 @@ check_gen6_math_src_arg(struct brw_reg src)
assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
}
-void
-vec4_generator::generate_math_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_math_gen6(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* Can't do writemask because math can't be align16. */
assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
@@ -196,11 +68,12 @@ vec4_generator::generate_math_gen6(vec4_instruction *inst,
brw_set_default_access_mode(p, BRW_ALIGN_16);
}
-void
-vec4_generator::generate_math2_gen4(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_math2_gen4(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
* "Message Payload":
@@ -229,12 +102,15 @@ vec4_generator::generate_math2_gen4(vec4_instruction *inst,
BRW_MATH_PRECISION_FULL);
}
-void
-vec4_generator::generate_tex(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg sampler_index)
+static void
+generate_tex(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg sampler_index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
int msg_type = -1;
if (devinfo->gen >= 5) {
@@ -440,8 +316,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
}
}
-void
-vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
+static void
+generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -454,8 +330,8 @@ vec4_generator::generate_vs_urb_write(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
+static void
+generate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
brw_urb_WRITE(p,
@@ -469,14 +345,14 @@ vec4_generator::generate_gs_urb_write(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
+static void
+generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
/* We pass the temporary passed in src0 as the writeback register */
brw_urb_WRITE(p,
- inst->get_src(this->prog_data, 0), /* dest */
+ inst->src[0].fixed_hw_reg, /* dest */
inst->base_mrf, /* starting mrf reg nr */
src,
BRW_URB_WRITE_ALLOCATE_COMPLETE,
@@ -489,14 +365,13 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst)
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0),
- get_element_ud(inst->get_src(this->prog_data, 0), 0));
- brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0),
+ get_element_ud(inst->src[0].fixed_hw_reg, 0));
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
+static void
+generate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst)
{
struct brw_reg src = brw_message_reg(inst->base_mrf);
brw_urb_WRITE(p,
@@ -510,10 +385,11 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst)
BRW_URB_SWIZZLE_INTERLEAVE);
}
-void
-vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_set_write_offset(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.3):
@@ -536,29 +412,29 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- assert(devinfo->gen >= 7 &&
+ assert(p->devinfo->gen >= 7 &&
src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_UD &&
src1.dw1.ud <= USHRT_MAX);
- if (src0.file == IMM) {
+ if (src0.file == BRW_IMMEDIATE_VALUE) {
brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
} else {
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
retype(src1, BRW_REGISTER_TYPE_UW));
}
- brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_set_vertex_count(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- if (devinfo->gen >= 8) {
+ if (p->devinfo->gen >= 8) {
/* Move the vertex count into the second MRF for the EOT write. */
brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
src);
@@ -580,16 +456,17 @@ vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
brw_MOV(p,
suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
- brw_set_default_access_mode(p, BRW_ALIGN_16);
}
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_svb_write(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
int binding = inst->sol_binding;
bool final_write = inst->sol_final_write;
@@ -623,12 +500,12 @@ vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_svb_set_destination_index(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
{
-
int vertex = inst->sol_vertex;
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -637,8 +514,10 @@ vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
+static void
+generate_gs_set_dword_2(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -647,8 +526,9 @@ vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
+static void
+generate_gs_prepare_channel_masks(struct brw_codegen *p,
+ struct brw_reg dst)
{
/* We want to left shift just DWORD 4 (the x component belonging to the
* second geometry shader invocation) by 4 bits. So generate the
@@ -664,9 +544,10 @@ vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
- struct brw_reg src)
+static void
+generate_gs_set_channel_masks(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src)
{
/* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
* Header: M0.5):
@@ -727,8 +608,9 @@ vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
+static void
+generate_gs_get_instance_id(struct brw_codegen *p,
+ struct brw_reg dst)
{
/* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
* and store into dst.0 & dst.4. So generate the instruction:
@@ -744,11 +626,12 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1,
- struct brw_reg src2)
+static void
+generate_gs_ff_sync_set_primitives(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
{
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
@@ -765,11 +648,12 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+static void
+generate_gs_ff_sync(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* This opcode uses an implied MRF register for:
* - the header of the ff_sync message. And as such it is expected to be
@@ -811,8 +695,8 @@ vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
+static void
+generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
{
/* In gen6, PrimitiveID is delivered in R0.1 of the payload */
struct brw_reg src = brw_vec8_grf(0, 0);
@@ -823,13 +707,14 @@ vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
- struct brw_reg index)
+static void
+generate_oword_dual_block_offsets(struct brw_codegen *p,
+ struct brw_reg m1,
+ struct brw_reg index)
{
int second_vertex_offset;
- if (devinfo->gen >= 6)
+ if (p->devinfo->gen >= 6)
second_vertex_offset = 1;
else
second_vertex_offset = 16;
@@ -860,8 +745,9 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_unpack_flags(struct brw_reg dst)
+static void
+generate_unpack_flags(struct brw_codegen *p,
+ struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -878,16 +764,18 @@ vec4_generator::generate_unpack_flags(struct brw_reg dst)
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_scratch_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index)
+static void
+generate_scratch_read(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
- generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
index);
uint32_t msg_type;
@@ -906,7 +794,7 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst,
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
if (devinfo->gen < 6)
- brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
+ brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf);
brw_set_dp_read_message(p, send,
255, /* binding table index: stateless access */
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
@@ -917,12 +805,14 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst,
1 /* rlen */);
}
-void
-vec4_generator::generate_scratch_write(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg index)
+static void
+generate_scratch_write(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index)
{
+ const struct brw_device_info *devinfo = p->devinfo;
struct brw_reg header = brw_vec8_grf(0, 0);
bool write_commit;
@@ -933,7 +823,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst,
gen6_resolve_implied_move(p, &header, inst->base_mrf);
- generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+ generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1),
index);
brw_MOV(p,
@@ -990,12 +880,15 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst,
write_commit);
}
-void
-vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
+static void
+generate_pull_constant_load(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset)
{
+ const struct brw_device_info *devinfo = p->devinfo;
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
@@ -1036,13 +929,15 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, surf_index);
}
-void
-vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src,
- struct brw_reg surf_index)
+static void
+generate_get_buffer_size(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
{
- assert(devinfo->gen >= 7);
+ assert(p->devinfo->gen >= 7);
assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
surf_index.file == BRW_IMMEDIATE_VALUE);
@@ -1062,11 +957,13 @@ vec4_generator::generate_get_buffer_size(vec4_instruction *inst,
brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
-void
-vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index,
- struct brw_reg offset)
+static void
+generate_pull_constant_load_gen7(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset)
{
assert(surf_index.type == BRW_REGISTER_TYPE_UD);
@@ -1123,9 +1020,10 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
}
}
-void
-vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
- struct brw_reg dst)
+static void
+generate_set_simd4x2_header_gen9(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
@@ -1140,9 +1038,18 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
brw_pop_insn_state(p);
}
-void
-vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
+static void
+generate_code(struct brw_codegen *p,
+ const struct brw_compiler *compiler,
+ void *log_data,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg)
{
+ const struct brw_device_info *devinfo = p->devinfo;
+ const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage);
+ bool debug_flag = INTEL_DEBUG &
+ intel_debug_flag_for_shader_stage(nir->stage);
struct annotation_info annotation;
memset(&annotation, 0, sizeof(annotation));
int loop_count = 0;
@@ -1154,9 +1061,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
- src[i] = inst->get_src(this->prog_data, i);
+ src[i] = inst->src[i].fixed_hw_reg;
}
- dst = inst->get_dst(devinfo->gen);
+ dst = inst->dst.fixed_hw_reg;
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1383,9 +1290,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
brw_null_reg());
} else if (devinfo->gen == 6) {
- generate_math_gen6(inst, dst, src[0], brw_null_reg());
+ generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
} else {
- generate_math1_gen4(inst, dst, src[0]);
+ generate_math1_gen4(p, inst, dst, src[0]);
}
break;
@@ -1396,9 +1303,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
if (devinfo->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
} else if (devinfo->gen == 6) {
- generate_math_gen6(inst, dst, src[0], src[1]);
+ generate_math_gen6(p, inst, dst, src[0], src[1]);
} else {
- generate_math2_gen4(inst, dst, src[0], src[1]);
+ generate_math2_gen4(p, inst, dst, src[0], src[1]);
}
break;
@@ -1412,92 +1319,92 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_SAMPLEINFO:
- generate_tex(inst, dst, src[0], src[1]);
+ generate_tex(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_URB_WRITE:
- generate_vs_urb_write(inst);
+ generate_vs_urb_write(p, inst);
break;
case SHADER_OPCODE_GEN4_SCRATCH_READ:
- generate_scratch_read(inst, dst, src[0]);
+ generate_scratch_read(p, inst, dst, src[0]);
break;
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- generate_scratch_write(inst, dst, src[0], src[1]);
+ generate_scratch_write(p, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD:
- generate_pull_constant_load(inst, dst, src[0], src[1]);
+ generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
- generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
break;
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
- generate_set_simd4x2_header_gen9(inst, dst);
+ generate_set_simd4x2_header_gen9(p, inst, dst);
break;
case VS_OPCODE_GET_BUFFER_SIZE:
- generate_get_buffer_size(inst, dst, src[0], src[1]);
+ generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_URB_WRITE:
- generate_gs_urb_write(inst);
+ generate_gs_urb_write(p, inst);
break;
case GS_OPCODE_URB_WRITE_ALLOCATE:
- generate_gs_urb_write_allocate(inst);
+ generate_gs_urb_write_allocate(p, inst);
break;
case GS_OPCODE_SVB_WRITE:
- generate_gs_svb_write(inst, dst, src[0], src[1]);
+ generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_SVB_SET_DST_INDEX:
- generate_gs_svb_set_destination_index(inst, dst, src[0]);
+ generate_gs_svb_set_destination_index(p, inst, dst, src[0]);
break;
case GS_OPCODE_THREAD_END:
- generate_gs_thread_end(inst);
+ generate_gs_thread_end(p, inst);
break;
case GS_OPCODE_SET_WRITE_OFFSET:
- generate_gs_set_write_offset(dst, src[0], src[1]);
+ generate_gs_set_write_offset(p, dst, src[0], src[1]);
break;
case GS_OPCODE_SET_VERTEX_COUNT:
- generate_gs_set_vertex_count(dst, src[0]);
+ generate_gs_set_vertex_count(p, dst, src[0]);
break;
case GS_OPCODE_FF_SYNC:
- generate_gs_ff_sync(inst, dst, src[0], src[1]);
+ generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
break;
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
- generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]);
+ generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]);
break;
case GS_OPCODE_SET_PRIMITIVE_ID:
- generate_gs_set_primitive_id(dst);
+ generate_gs_set_primitive_id(p, dst);
break;
case GS_OPCODE_SET_DWORD_2:
- generate_gs_set_dword_2(dst, src[0]);
+ generate_gs_set_dword_2(p, dst, src[0]);
break;
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
- generate_gs_prepare_channel_masks(dst);
+ generate_gs_prepare_channel_masks(p, dst);
break;
case GS_OPCODE_SET_CHANNEL_MASKS:
- generate_gs_set_channel_masks(dst, src[0]);
+ generate_gs_set_channel_masks(p, dst, src[0]);
break;
case GS_OPCODE_GET_INSTANCE_ID:
- generate_gs_get_instance_id(dst);
+ generate_gs_get_instance_id(p, dst);
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
@@ -1556,7 +1463,7 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
- generate_unpack_flags(dst);
+ generate_unpack_flags(p, dst);
break;
case VEC4_OPCODE_MOV_BYTES: {
@@ -1651,10 +1558,10 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
nir->info.label ? nir->info.label : "unnamed",
_mesa_shader_stage_to_string(nir->stage), nir->info.name);
- fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
- " bytes (%.0f%%)\n",
+ fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles."
+ "Compacted %d to %d bytes (%.0f%%)\n",
stage_abbrev,
- before_size / 16, loop_count, before_size, after_size,
+ before_size / 16, loop_count, cfg->cycle_count, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
dump_assembly(p->store, annotation.ann_count, annotation.ann,
@@ -1663,21 +1570,27 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir)
}
compiler->shader_debug_log(log_data,
- "%s vec4 shader: %d inst, %d loops, "
+ "%s vec4 shader: %d inst, %d loops, %u cycles, "
"compacted %d to %d bytes.\n",
- stage_abbrev, before_size / 16, loop_count,
+ stage_abbrev, before_size / 16,
+ loop_count, cfg->cycle_count,
before_size, after_size);
}
-const unsigned *
-vec4_generator::generate_assembly(const cfg_t *cfg,
- unsigned *assembly_size,
- const nir_shader *nir)
+extern "C" const unsigned *
+brw_vec4_generate_assembly(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const nir_shader *nir,
+ struct brw_vue_prog_data *prog_data,
+ const struct cfg_t *cfg,
+ unsigned *out_assembly_size)
{
+ struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
+ brw_init_codegen(compiler->devinfo, p, mem_ctx);
brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(cfg, nir);
- return brw_get_program(p, assembly_size);
-}
+ generate_code(p, compiler, log_data, nir, prog_data, cfg);
-} /* namespace brw */
+ return brw_get_program(p, out_assembly_size);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 9402489e628..cfb5cd95cb1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -768,7 +768,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
output_size_bytes += 32;
assert(output_size_bytes >= 1);
- int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
+ unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
@@ -824,9 +824,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- return g.generate_assembly(v.cfg, final_assembly_size, shader);
+ return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
+ shader, &prog_data->base, v.cfg,
+ final_assembly_size);
}
}
}
@@ -875,9 +875,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
- INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
- ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
+ ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
+ &prog_data->base, gs->cfg,
+ final_assembly_size);
}
delete gs;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index 678237901f2..aa9a6572eee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -86,9 +86,10 @@ vec4_live_variables::setup_def_use()
}
}
}
- if (inst->reads_flag()) {
- if (!BITSET_TEST(bd->flag_def, 0)) {
- BITSET_SET(bd->flag_use, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if (inst->reads_flag(c) &&
+ !BITSET_TEST(bd->flag_def, c)) {
+ BITSET_SET(bd->flag_use, c);
}
}
@@ -110,8 +111,11 @@ vec4_live_variables::setup_def_use()
}
}
if (inst->writes_flag()) {
- if (!BITSET_TEST(bd->flag_use, 0)) {
- BITSET_SET(bd->flag_def, 0);
+ for (unsigned c = 0; c < 4; c++) {
+ if ((inst->dst.writemask & (1 << c)) &&
+ !BITSET_TEST(bd->flag_use, c)) {
+ BITSET_SET(bd->flag_def, c);
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index e79a9f3b5b9..1fb1773f856 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -193,7 +193,9 @@ vec4_visitor::nir_emit_if(nir_if *if_stmt)
vec4_instruction *inst = emit(MOV(dst_null_d(), condition));
inst->conditional_mod = BRW_CONDITIONAL_NZ;
- emit(IF(BRW_PREDICATE_NORMAL));
+ /* We can just predicate based on the X channel, as the condition only
+ * goes on its own line */
+ emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X));
nir_emit_cf_list(&if_stmt->then_list);
@@ -806,6 +808,16 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
+ case nir_intrinsic_shader_clock: {
+ /* We cannot do anything if there is an event, so ignore it for now */
+ const src_reg shader_clock = get_timestamp();
+ const enum brw_reg_type type = brw_type_for_base_type(glsl_type::uvec2_type);
+
+ dest = get_nir_dest(instr->dest, type);
+ emit(MOV(dest, shader_clock));
+ break;
+ }
+
default:
unreachable("Unknown intrinsic");
}
@@ -1144,26 +1156,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_ball_iequal3:
case nir_op_ball_fequal4:
case nir_op_ball_iequal4: {
- dst_reg tmp = dst_reg(this, glsl_type::bool_type);
-
- switch (instr->op) {
- case nir_op_ball_fequal2:
- case nir_op_ball_iequal2:
- tmp.writemask = WRITEMASK_XY;
- break;
- case nir_op_ball_fequal3:
- case nir_op_ball_iequal3:
- tmp.writemask = WRITEMASK_XYZ;
- break;
- case nir_op_ball_fequal4:
- case nir_op_ball_iequal4:
- tmp.writemask = WRITEMASK_XYZW;
- break;
- default:
- unreachable("not reached");
- }
+ unsigned swiz =
+ brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
- emit(CMP(tmp, op[0], op[1],
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
brw_conditional_for_nir_comparison(instr->op)));
emit(MOV(dst, src_reg(0)));
inst = emit(MOV(dst, src_reg(~0)));
@@ -1177,26 +1173,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bany_inequal3:
case nir_op_bany_fnequal4:
case nir_op_bany_inequal4: {
- dst_reg tmp = dst_reg(this, glsl_type::bool_type);
+ unsigned swiz =
+ brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
- switch (instr->op) {
- case nir_op_bany_fnequal2:
- case nir_op_bany_inequal2:
- tmp.writemask = WRITEMASK_XY;
- break;
- case nir_op_bany_fnequal3:
- case nir_op_bany_inequal3:
- tmp.writemask = WRITEMASK_XYZ;
- break;
- case nir_op_bany_fnequal4:
- case nir_op_bany_inequal4:
- tmp.writemask = WRITEMASK_XYZW;
- break;
- default:
- unreachable("not reached");
- }
-
- emit(CMP(tmp, op[0], op[1],
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz),
brw_conditional_for_nir_comparison(instr->op)));
emit(MOV(dst, src_reg(0)));
@@ -1321,26 +1301,18 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_ufind_msb:
case nir_op_ifind_msb: {
- src_reg temp = src_reg(this, glsl_type::uint_type);
-
- inst = emit(FBH(dst_reg(temp), op[0]));
- inst->dst.writemask = WRITEMASK_XYZW;
+ emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
+ src_reg src(dst);
+ emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ));
- /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
- temp.swizzle = BRW_SWIZZLE_NOOP;
- emit(MOV(dst, temp));
-
- src_reg src_tmp = src_reg(dst);
- emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
-
- src_tmp.negate = true;
- inst = emit(ADD(dst, src_tmp, src_reg(31)));
+ inst = emit(ADD(dst, src, src_reg(31)));
inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
break;
}
@@ -1461,11 +1433,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_bany2:
case nir_op_bany3:
case nir_op_bany4: {
- dst_reg tmp = dst_reg(this, glsl_type::bool_type);
- tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]);
-
- emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ unsigned swiz =
+ brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]);
+ emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0),
+ BRW_CONDITIONAL_NZ));
emit(MOV(dst, src_reg(0)));
inst = emit(MOV(dst, src_reg(~0)));
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 6d155285820..92b089d7ff6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -883,6 +883,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
uint32_t sampler,
src_reg sampler_reg)
{
+ /* The sampler can only meaningfully compute LOD for fragment shader
+ * messages. For all other stages, we change the opcode to TXL and hardcode
+ * the LOD to 0.
+ *
+ * textureQueryLevels() is implemented in terms of TXS so we need to pass a
+ * valid LOD argument.
+ */
+ if (op == ir_tex || op == ir_query_levels) {
+ assert(lod.file == BAD_FILE);
+ lod = src_reg(0.0f);
+ }
+
enum opcode opcode;
switch (op) {
case ir_tex: opcode = SHADER_OPCODE_TXL; break;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 5db4b3a86af..0b805b1c0c4 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -311,7 +311,8 @@ brw_vs_populate_key(struct brw_context *brw,
key->program_string_id = vp->id;
if (ctx->Transform.ClipPlanesEnabled != 0 &&
- ctx->API == API_OPENGL_COMPAT &&
+ (ctx->API == API_OPENGL_COMPAT ||
+ ctx->API == API_OPENGLES) &&
vp->program.Base.ClipDistanceArraySize == 0) {
key->nr_userclip_plane_consts =
_mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index f65258a52a5..d7473845c72 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_VS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->vs.base,
- &brw->vs.prog_data->base.base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ &brw->vs.base, &brw->vs.prog_data->base.base);
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 6ebe6481c32..f88f8d59196 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = {
void
brw_upload_abo_surfaces(struct brw_context *brw,
- struct gl_shader_program *prog,
+ struct gl_shader *shader,
struct brw_stage_state *stage_state,
struct brw_stage_prog_data *prog_data)
{
@@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw,
uint32_t *surf_offsets =
&stage_state->surf_offset[prog_data->binding_table.abo_start];
- for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
- struct gl_atomic_buffer_binding *binding =
- &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
- struct intel_buffer_object *intel_bo =
- intel_buffer_object(binding->BufferObject);
- drm_intel_bo *bo = intel_bufferobj_buffer(
- brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
-
- brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
- binding->Offset, BRW_SURFACEFORMAT_RAW,
- bo->size - binding->Offset, 1, true);
- }
+ if (shader && shader->NumAtomicBuffers) {
+ for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
+ struct gl_atomic_buffer_binding *binding =
+ &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
+ struct intel_buffer_object *intel_bo =
+ intel_buffer_object(binding->BufferObject);
+ drm_intel_bo *bo = intel_bufferobj_buffer(
+ brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
+
+ brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
+ binding->Offset, BRW_SURFACEFORMAT_RAW,
+ bo->size - binding->Offset, 1, true);
+ }
- if (prog->NumAtomicBuffers)
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+ }
}
static void
@@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_FS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
- &brw->wm.prog_data->base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ &brw->wm.base, &brw->wm.prog_data->base);
}
}
@@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw)
if (prog) {
/* BRW_NEW_CS_PROG_DATA */
- brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
- &brw->cs.prog_data->base);
+ brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ &brw->cs.base, &brw->cs.prog_data->base);
}
}
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 4068f2844a2..2634e6ba6fd 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -60,6 +60,23 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
/* Find the VUE slot for this attribute. */
int slot = vue_map->varying_to_slot[fs_attr];
+ /* Viewport and Layer are stored in the VUE header. We need to override
+ * them to zero if earlier stages didn't write them, as GL requires that
+ * they read back as zero when not explicitly set.
+ */
+ if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) {
+ unsigned override =
+ ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W |
+ ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT;
+
+ if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
+ override |= ATTRIBUTE_0_OVERRIDE_Y;
+ if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
+ override |= ATTRIBUTE_0_OVERRIDE_Z;
+
+ return override;
+ }
+
/* If there was only a back color written but not front, use back
* as the color instead of undefined
*/
@@ -159,14 +176,30 @@ calculate_attr_overrides(const struct brw_context *brw,
uint16_t *attr_overrides,
uint32_t *point_sprite_enables,
uint32_t *flat_enables,
- uint32_t *urb_entry_read_length)
+ uint32_t *urb_entry_read_length,
+ uint32_t *urb_entry_read_offset)
{
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint32_t max_source_attr = 0;
*point_sprite_enables = 0;
*flat_enables = 0;
+ *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+
+ /* BRW_NEW_FRAGMENT_PROGRAM
+ *
+ * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
+ * the full vertex header. Otherwise, we can program the SF to start
+ * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
+ * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
+ * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
+ */
+
+ bool fs_needs_vue_header = brw->fragment_program->Base.InputsRead &
+ (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+
+ *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+
/* _NEW_LIGHT */
bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
@@ -228,7 +261,7 @@ calculate_attr_overrides(const struct brw_context *brw,
/* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
uint16_t attr_override = point_sprite ? 0 :
get_attr_override(&brw->vue_map_geom_out,
- urb_entry_read_offset, attr,
+ *urb_entry_read_offset, attr,
brw->ctx.VertexProgram._TwoSideEnabled,
&max_source_attr);
@@ -276,7 +309,6 @@ upload_sf_state(struct brw_context *brw)
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
float point_size;
uint16_t attr_overrides[16];
uint32_t point_sprite_origin;
@@ -411,8 +443,10 @@ upload_sf_state(struct brw_context *brw)
* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
*/
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &flat_enables, &urb_entry_read_length);
+ &flat_enables, &urb_entry_read_length,
+ &urb_entry_read_offset);
dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c
index 6aeb0cb243f..2d7c04f4ad2 100644
--- a/src/mesa/drivers/dri/i965/gen7_cs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c
@@ -285,3 +285,34 @@ const struct brw_tracked_state gen7_cs_push_constants = {
},
.emit = gen7_upload_cs_push_constants,
};
+
+/**
+ * Creates a new CS constant buffer reflecting the current CS program's
+ * constants, if needed by the CS program.
+ */
+static void
+brw_upload_cs_pull_constants(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct brw_compute_program *cp =
+ (struct brw_compute_program *) brw->compute_program;
+
+ /* BRW_NEW_CS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = &brw->cs.prog_data->base;
+
+ /* _NEW_PROGRAM_CONSTANTS */
+ brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program.Base,
+ stage_state, prog_data, true);
+}
+
+const struct brw_tracked_state brw_cs_pull_constants = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = brw_upload_cs_pull_constants,
+};
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 698b3d491bc..b1f13aceba4 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -40,7 +40,6 @@ upload_sbe_state(struct brw_context *brw)
uint32_t point_sprite_enables;
uint32_t flat_enables;
int i;
- const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
uint16_t attr_overrides[16];
/* _NEW_BUFFERS */
bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
@@ -65,8 +64,10 @@ upload_sbe_state(struct brw_context *brw)
* _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA
*/
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables,
- &flat_enables, &urb_entry_read_length);
+ &flat_enables, &urb_entry_read_length,
+ &urb_entry_read_offset);
dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8f0507413a7..10e433b1d59 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
!brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+ if (prog_data->computed_stencil) {
+ assert(brw->gen >= 9);
+ dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
+ }
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);
diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 6b655ee493e..8b6f31f3be6 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -37,6 +37,7 @@ upload_sbe(struct brw_context *brw)
uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs;
uint16_t attr_overrides[VARYING_SLOT_MAX];
uint32_t urb_entry_read_length;
+ uint32_t urb_entry_read_offset;
uint32_t point_sprite_enables;
uint32_t flat_enables;
int sbe_cmd_length;
@@ -66,7 +67,8 @@ upload_sbe(struct brw_context *brw)
calculate_attr_overrides(brw, attr_overrides,
&point_sprite_enables,
&flat_enables,
- &urb_entry_read_length);
+ &urb_entry_read_length,
+ &urb_entry_read_offset);
/* Typically, the URB entry read length and offset should be programmed in
* 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active stage
@@ -78,7 +80,7 @@ upload_sbe(struct brw_context *brw)
*/
dw1 |=
urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
- BRW_SF_URB_ENTRY_READ_OFFSET << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
+ urb_entry_read_offset << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT |
GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH |
GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET;
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 18b86652fd2..140a6544983 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -183,6 +183,14 @@ gen8_emit_buffer_surface_state(struct brw_context *brw,
}
static void
+gen8_emit_fast_clear_color(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t *surf)
+{
+ surf[7] |= mt->fast_clear_color_value;
+}
+
+static void
gen8_emit_texture_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *mt,
GLenum target,
@@ -284,11 +292,10 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
SET_FIELD((aux_mt->pitch / tile_w) - 1,
GEN8_SURFACE_AUX_PITCH) |
aux_mode;
- } else {
- surf[6] = 0;
}
- surf[7] = mt->fast_clear_color_value |
+ gen8_emit_fast_clear_color(brw, mt, surf);
+ surf[7] |=
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
@@ -302,11 +309,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
aux_mt->bo, 0,
I915_GEM_DOMAIN_SAMPLER,
(rw ? I915_GEM_DOMAIN_SAMPLER : 0));
- } else {
- surf[10] = 0;
- surf[11] = 0;
}
- surf[12] = 0;
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->batch.bo,
@@ -514,15 +517,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
SET_FIELD((aux_mt->pitch / tile_w) - 1,
GEN8_SURFACE_AUX_PITCH) |
aux_mode;
- } else {
- surf[6] = 0;
}
- surf[7] = mt->fast_clear_color_value |
- SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
- SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
- SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
- SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+ gen8_emit_fast_clear_color(brw, mt, surf);
+ surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) |
+ SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+ SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) |
+ SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
assert(mt->offset % mt->cpp == 0);
*((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
@@ -533,11 +534,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
offset + 10 * 4,
aux_mt->bo, 0,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
- } else {
- surf[10] = 0;
- surf[11] = 0;
}
- surf[12] = 0;
drm_intel_bo_emit_reloc(brw->batch.bo,
offset + 8 * 4,
diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c
index f7c02c8a38d..c00d2e786f3 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.c
+++ b/src/mesa/drivers/dri/i965/intel_debug.c
@@ -73,6 +73,8 @@ static const struct debug_control debug_control[] = {
{ "spill_fs", DEBUG_SPILL_FS },
{ "spill_vec4", DEBUG_SPILL_VEC4 },
{ "cs", DEBUG_CS },
+ { "hex", DEBUG_HEX },
+ { "nocompact", DEBUG_NO_COMPACTION },
{ NULL, 0 }
};
diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h
index 0a6e1b90b98..98bd7e93956 100644
--- a/src/mesa/drivers/dri/i965/intel_debug.h
+++ b/src/mesa/drivers/dri/i965/intel_debug.h
@@ -67,6 +67,8 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_SPILL_FS (1ull << 31)
#define DEBUG_SPILL_VEC4 (1ull << 32)
#define DEBUG_CS (1ull << 33)
+#define DEBUG_HEX (1ull << 34)
+#define DEBUG_NO_COMPACTION (1ull << 35)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 3f9afd16c71..4643ea3e87b 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -287,6 +287,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_conditional_render_inverted = true;
ctx->Extensions.ARB_draw_buffers_blend = true;
ctx->Extensions.ARB_ES3_compatibility = true;
+ ctx->Extensions.ARB_fragment_layer_viewport = true;
ctx->Extensions.ARB_sample_shading = true;
ctx->Extensions.ARB_shading_language_420pack = true;
ctx->Extensions.ARB_shading_language_packing = true;
@@ -324,6 +325,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_framebuffer_no_attachments = true;
ctx->Extensions.ARB_gpu_shader5 = true;
ctx->Extensions.ARB_shader_atomic_counters = true;
+ ctx->Extensions.ARB_shader_clock = true;
ctx->Extensions.ARB_shader_image_load_store = true;
ctx->Extensions.ARB_shader_image_size = true;
ctx->Extensions.ARB_shader_texture_image_samples = true;
@@ -358,6 +360,7 @@ intelInitExtensions(struct gl_context *ctx)
if (brw->gen >= 9) {
ctx->Extensions.KHR_texture_compression_astc_ldr = true;
ctx->Extensions.KHR_texture_compression_astc_hdr = true;
+ ctx->Extensions.ARB_shader_stencil_export = true;
}
if (ctx->API == API_OPENGL_CORE)
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
index 5a6b0dd1ec5..3a4a53a07e6 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -343,19 +343,15 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
if (image->planar_format && image->planar_format->nplanes > 1) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEGLImageTargetRenderbufferStorage(planar buffers are not "
- "supported as render targets.");
+ "supported as render targets.)");
return;
}
/* __DRIimage is opaque to the core so it has to be checked here */
- switch (image->format) {
- case MESA_FORMAT_R8G8B8A8_UNORM:
+ if (!brw->format_supported_as_render_target[image->format]) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glEGLImageTargetRenderbufferStorage(unsupported image format");
+ "glEGLImageTargetRenderbufferStorage(unsupported image format)");
return;
- break;
- default:
- break;
}
irb = intel_renderbuffer(rb);
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 590c45d93ea..fb95fb629ad 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen)
}
}
-static int
+/**
+ * Return the revision (generally the revid field of the PCI header) of the
+ * graphics device.
+ *
+ * XXX: This function is useful to keep around even if it is not currently in
+ * use. It is necessary for new platforms and revision specific workarounds or
+ * features. Please don't remove it so that we know it at least continues to
+ * build.
+ */
+static __attribute__((__unused__)) int
brw_get_revision(int fd)
{
struct drm_i915_getparam gp;
@@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
return false;
intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
- intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
- brw_get_revision(psp->fd));
+ intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
if (!intelScreen->devinfo)
return false;
diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
index 5f80f90a91d..62d39f70ec4 100644
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -84,7 +84,7 @@ instruction(bblock_t *block, int num)
static bool
cmod_propagation(fs_visitor *v)
{
- const bool print = false;
+ const bool print = getenv("TEST_DEBUG");
if (print) {
fprintf(stderr, "= Before =\n");
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
new file mode 100644
index 00000000000..9aa2fcc7907
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -0,0 +1,822 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Based on test_fs_cmod_propagation.cpp
+ */
+
+#include <gtest/gtest.h>
+#include "brw_vec4.h"
+#include "brw_vec4_builder.h"
+#include "brw_cfg.h"
+#include "program/program.h"
+
+using namespace brw;
+
+class cmod_propagation_test : public ::testing::Test {
+ virtual void SetUp();
+
+public:
+ struct brw_compiler *compiler;
+ struct brw_device_info *devinfo;
+ struct gl_context *ctx;
+ struct gl_shader_program *shader_prog;
+ struct brw_vertex_program *vp;
+ vec4_visitor *v;
+};
+
+class cmod_propagation_vec4_visitor : public vec4_visitor
+{
+public:
+ cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
+ nir_shader *shader)
+ : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+ false, -1) {}
+
+protected:
+ /* Dummy implementation for pure virtual methods */
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void setup_payload()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_prolog()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_program_code()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_thread_end()
+ {
+ unreachable("Not reached");
+ }
+
+ virtual void emit_urb_write_header(int mrf)
+ {
+ unreachable("Not reached");
+ }
+
+ virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+ {
+ unreachable("Not reached");
+ }
+};
+
+
+void cmod_propagation_test::SetUp()
+{
+ ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+ compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ compiler->devinfo = devinfo;
+
+ vp = ralloc(NULL, struct brw_vertex_program);
+
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
+
+ v = new cmod_propagation_vec4_visitor(compiler, shader);
+
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
+
+ devinfo->gen = 4;
+}
+
+static vec4_instruction *
+instruction(bblock_t *block, int num)
+{
+ vec4_instruction *inst = (vec4_instruction *)block->start();
+ for (int i = 0; i < num; i++) {
+ inst = (vec4_instruction *)inst->next;
+ }
+ return inst;
+}
+
+static bool
+cmod_propagation(vec4_visitor *v)
+{
+ const bool print = getenv("TEST_DEBUG");
+
+ if (print) {
+ fprintf(stderr, "= Before =\n");
+ v->dump_instructions();
+ }
+
+ bool ret = v->opt_cmod_propagation();
+
+ if (print) {
+ fprintf(stderr, "\n= After =\n");
+ v->dump_instructions();
+ }
+
+ return ret;
+}
+
+TEST_F(cmod_propagation_test, basic)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.ADD(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest.x src0.xxxx src1.xxxx
+ * 1: cmp.ge.f0 null.x dest.xxxx 0.0f
+ *
+ * = After =
+ * 0: add.ge.f0 dest.x src0.xxxx src1.xxxx
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, basic_different_dst_writemask)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.ADD(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest.x src0 src1
+ * 1: cmp.ge.f0 null.xyzw dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andz_one)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ src_reg one(1);
+
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_Z,
+ bld.AND(bld.null_reg_d(), src_reg(dest), one));
+
+ /* = Before =
+ * 0: cmp.l.f0 dest:F src0:F 0F
+ * 1: and.z.f0 null:D dest:D 1D
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, non_cmod_instruction)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::uint_type);
+ src_reg src0 = src_reg(v, glsl_type::uint_type);
+ src_reg zero(0u);
+ bld.FBL(dest, src0);
+ bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: fbl dest src0
+ * 1: cmp.ge.f0 null dest 0u
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_FBL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_write)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE);
+ bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest src0 src1
+ * 1: cmp.ge.f0 null src2 0.0f
+ * 2: cmp.ge.f0 null dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_read)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest0 = dst_reg(v, glsl_type::float_type);
+ dst_reg dest1 = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest0, src0, src1);
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ * 2: cmp.ge.f0 null dest0 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_dest_write)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::vec2_type);
+ src_reg zero(0.0f);
+ bld.ADD(offset(dest, 2), src0, src1);
+ bld.emit(SHADER_OPCODE_TEX, dest, src2)
+ ->regs_written = 4;
+ bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 2), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest+2 src0 src1
+ * 1: tex rlen 4 dest+0 src2
+ * 2: cmp.ge.f0 null dest+2 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(2, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(SHADER_OPCODE_TEX, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest0 = dst_reg(v, glsl_type::float_type);
+ dst_reg dest1 = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg src2 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1));
+ set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
+ bld.CMP(dest_null, src_reg(dest0), zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add.ge.f0 dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ * 2: cmp.ge.f0 null.x dest0 0.0f
+ *
+ * = After =
+ * 0: add.ge.f0 dest0 src0 src1
+ * 1: (+f0) sel dest1 src2 0.0f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(2, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+}
+
+TEST_F(cmod_propagation_test, negate)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ src_reg tmp_src = src_reg(dest);
+ tmp_src.negate = true;
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+ bld.CMP(dest_null, tmp_src, zero, BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest src0 src1
+ * 1: cmp.ge.f0 null.x -dest 0.0f
+ *
+ * = After =
+ * 0: add.le.f0 dest src0 src1
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, movnz)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::float_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg src1 = src_reg(v, glsl_type::float_type);
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.CMP(dest, src0, src1, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(dest_null, src_reg(dest)));
+
+ /* = Before =
+ *
+ * 0: cmp.l.f0 dest:F src0:F src1:F
+ * 1: mov.nz.f0 null.x dest:F
+ *
+ * = After =
+ * 0: cmp.l.f0 dest src0:F src1:F
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::int_type);
+ src_reg src1 = src_reg(v, glsl_type::int_type);
+ src_reg zero(0.0f);
+ bld.ADD(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero,
+ BRW_CONDITIONAL_GE);
+
+ /* = Before =
+ *
+ * 0: add dest:D src0:D src1:D
+ * 1: cmp.ge.f0 null:F dest:F 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andnz_non_one)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::int_type);
+ src_reg src0 = src_reg(v, glsl_type::float_type);
+ src_reg zero(0.0f);
+ src_reg nonone(38);
+
+ bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.AND(bld.null_reg_d(), src_reg(dest), nonone));
+
+ /* = Before =
+ * 0: cmp.l.f0 dest:F src0:F 0F
+ * 1: and.nz.f0 null:D dest:D 38D
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+/* Note that basic is using glsl_type:float types, while this one is using
+ * glsl_type::vec4 */
+TEST_F(cmod_propagation_test, basic_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest.xyzw src0.xyzw src1.xyzw
+ * 1: cmp.nz.f0.0 null.xyzw dest.xyzw 0.0f
+ *
+ * = After =
+ * 0: mul.nz.f0.0 dest.xyzw src0.xyzw src1.xyzw
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest.x src0 src1
+ * 1: cmp.nz.f0.0 null dest 0.0f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mad_one_component_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg src2 = src_reg(v, glsl_type::vec4_type);
+ src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
+ src2.negate = true;
+ src_reg zero(0.0f);
+ src_reg tmp(dest);
+ tmp.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_f();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.MAD(dest, src0, src1, src2);
+ bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ *
+ * 0: mad dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ * 1: cmp.l.f0.0 null.x:F dest.xxxx:F 0.0f
+ *
+ * = After =
+ * 0: mad.l.f0 dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mad_more_one_component_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ dest.writemask = WRITEMASK_XW;
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg src2 = src_reg(v, glsl_type::vec4_type);
+ src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX;
+ src2.negate = true;
+ src_reg zero(0.0f);
+ src_reg tmp(dest);
+ tmp.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_f();
+
+ bld.MAD(dest, src0, src1, src2);
+ bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L);
+
+ /* = Before =
+ *
+ * 0: mad dest.xw:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F
+ * 1: cmp.l.f0.0 null:F dest.xxxx:F zeroF
+ *
+ * = After =
+ * (No changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_mov_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::ivec4_type);
+ dest.writemask = WRITEMASK_X;
+ src_reg src0 = src_reg(v, glsl_type::ivec4_type);
+ src0.swizzle = BRW_SWIZZLE_XXXX;
+ src0.file = UNIFORM;
+ src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D);
+ src_reg mov_src = src_reg(dest);
+ mov_src.swizzle = BRW_SWIZZLE_XXXX;
+ dst_reg dest_null = bld.null_reg_d();
+ dest_null.writemask = WRITEMASK_X;
+
+ bld.CMP(dest, src0, nonone, BRW_CONDITIONAL_GE);
+ set_condmod(BRW_CONDITIONAL_NZ,
+ bld.MOV(dest_null, mov_src));
+
+ /* = Before =
+ *
+ * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D
+ * 1: mov.nz.f0 null.x:D dest.xxxx:D
+ *
+ * = After =
+ * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_TRUE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4)
+{
+ const vec4_builder bld = vec4_builder(v).at_end();
+ dst_reg dest = dst_reg(v, glsl_type::vec4_type);
+ src_reg src0 = src_reg(v, glsl_type::vec4_type);
+ src_reg src1 = src_reg(v, glsl_type::vec4_type);
+ src_reg zero(0.0f);
+ src_reg cmp_src = src_reg(dest);
+ cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2);
+
+ bld.MUL(dest, src0, src1);
+ bld.CMP(bld.null_reg_f(), cmp_src, zero, BRW_CONDITIONAL_NZ);
+
+ /* = Before =
+ * 0: mul dest src0 src1
+ * 1: cmp.nz.f0.0 null dest.xywz 0.0f
+ *
+ * = After =
+ * (No changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+
+ EXPECT_FALSE(cmod_propagation(v));
+
+ ASSERT_EQ(0, block0->start_ip);
+ ASSERT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c
index a049d9b8de7..cb854b81933 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -188,7 +188,7 @@ nouveau_context_init(struct gl_context *ctx, gl_api api,
ctx->Extensions.EXT_blend_minmax = true;
ctx->Extensions.EXT_texture_filter_anisotropic = true;
ctx->Extensions.NV_texture_env_combine4 = true;
- ctx->Const.MaxColorAttachments = 1;
+ ctx->Const.MaxDrawBuffers = ctx->Const.MaxColorAttachments = 1;
/* This effectively disables 3D textures */
ctx->Const.Max3DTextureLevels = 1;
diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c
index a46c1944e96..a49018953ae 100644
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -698,16 +698,39 @@ valid_draw_indirect(struct gl_context *ctx,
{
const GLsizeiptr end = (GLsizeiptr)indirect + size;
+ /* OpenGL ES 3.1 spec. section 10.5:
+ *
+ * "DrawArraysIndirect requires that all data sourced for the
+ * command, including the DrawArraysIndirectCommand
+ * structure, be in buffer objects, and may not be called when
+ * the default vertex array object is bound."
+ */
+ if (ctx->Array.VAO == ctx->Array.DefaultVAO) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "(no VAO bound)");
+ return GL_FALSE;
+ }
+
if (!_mesa_valid_prim_mode(ctx, mode, name))
return GL_FALSE;
+ /* OpenGL ES 3.1 specification, section 10.5:
+ *
+ * "An INVALID_OPERATION error is generated if
+ * transform feedback is active and not paused."
+ */
+ if (_mesa_is_gles31(ctx) && _mesa_is_xfb_active_and_unpaused(ctx)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(TransformFeedback is active and not paused)", name);
+ }
- /* From the ARB_draw_indirect specification:
- * "An INVALID_OPERATION error is generated [...] if <indirect> is no
- * word aligned."
+ /* From OpenGL version 4.4. section 10.5
+ * and OpenGL ES 3.1, section 10.6:
+ *
+ * "An INVALID_VALUE error is generated if indirect is not a
+ * multiple of the size, in basic machine units, of uint."
*/
if ((GLsizeiptr)indirect & (sizeof(GLuint) - 1)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, GL_INVALID_VALUE,
"%s(indirect is not aligned)", name);
return GL_FALSE;
}
@@ -895,7 +918,12 @@ check_valid_to_compute(struct gl_context *ctx, const char *function)
return false;
}
- prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE];
+ /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
+ *
+ * "An INVALID_OPERATION error is generated if there is no active program
+ * for the compute shader stage."
+ */
+ prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(no active compute shader)",
@@ -917,6 +945,24 @@ _mesa_validate_DispatchCompute(struct gl_context *ctx,
return GL_FALSE;
for (i = 0; i < 3; i++) {
+ /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
+ *
+ * "An INVALID_VALUE error is generated if any of num_groups_x,
+ * num_groups_y and num_groups_z are greater than or equal to the
+ * maximum work group count for the corresponding dimension."
+ *
+ * However, the "or equal to" portions appears to be a specification
+ * bug. In all other areas, the specification appears to indicate that
+ * the number of workgroups can match the MAX_COMPUTE_WORK_GROUP_COUNT
+ * value. For example, under DispatchComputeIndirect:
+ *
+ * "If any of num_groups_x, num_groups_y or num_groups_z is greater than
+ * the value of MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding
+ * dimension then the results are undefined."
+ *
+ * Additionally, the OpenGLES 3.1 specification does not contain "or
+ * equal to" as an error condition.
+ */
if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glDispatchCompute(num_groups_%c)", 'x' + i);
@@ -937,24 +983,29 @@ valid_dispatch_indirect(struct gl_context *ctx,
if (!check_valid_to_compute(ctx, name))
return GL_FALSE;
- /* From the ARB_compute_shader specification:
+ /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
*
- * "An INVALID_OPERATION error is generated [...] if <indirect> is less
- * than zero or not a multiple of the size, in basic machine units, of
- * uint."
+ * "An INVALID_VALUE error is generated if indirect is negative or is not a
+ * multiple of four."
*/
if ((GLintptr)indirect & (sizeof(GLuint) - 1)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, GL_INVALID_VALUE,
"%s(indirect is not aligned)", name);
return GL_FALSE;
}
if ((GLintptr)indirect < 0) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
+ _mesa_error(ctx, GL_INVALID_VALUE,
"%s(indirect is less than zero)", name);
return GL_FALSE;
}
+ /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
+ *
+ * "An INVALID_OPERATION error is generated if no buffer is bound to the
+ * DRAW_INDIRECT_BUFFER binding, or if the command would source data
+ * beyond the end of the buffer object."
+ */
if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
@@ -967,11 +1018,6 @@ valid_dispatch_indirect(struct gl_context *ctx,
return GL_FALSE;
}
- /* From the ARB_compute_shader specification:
- *
- * "An INVALID_OPERATION error is generated if this command sources data
- * beyond the end of the buffer object [...]"
- */
if (ctx->DispatchIndirectBuffer->Size < end) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"%s(DISPATCH_INDIRECT_BUFFER too small)", name);
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index b2c88c37366..d964f030ecb 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -152,6 +152,7 @@ static const struct extension extension_table[] = {
{ "GL_ARB_separate_shader_objects", o(dummy_true), GL, 2010 },
{ "GL_ARB_shader_atomic_counters", o(ARB_shader_atomic_counters), GL, 2011 },
{ "GL_ARB_shader_bit_encoding", o(ARB_shader_bit_encoding), GL, 2010 },
+ { "GL_ARB_shader_clock", o(ARB_shader_clock), GL, 2015 },
{ "GL_ARB_shader_image_load_store", o(ARB_shader_image_load_store), GL, 2011 },
{ "GL_ARB_shader_image_size", o(ARB_shader_image_size), GL, 2012 },
{ "GL_ARB_shader_objects", o(dummy_true), GL, 2002 },
@@ -229,6 +230,7 @@ static const struct extension extension_table[] = {
{ "GL_EXT_depth_bounds_test", o(EXT_depth_bounds_test), GL, 2002 },
{ "GL_EXT_draw_buffers", o(dummy_true), ES2, 2012 },
{ "GL_EXT_draw_buffers2", o(EXT_draw_buffers2), GL, 2006 },
+ { "GL_EXT_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 },
{ "GL_EXT_draw_instanced", o(ARB_draw_instanced), GL, 2006 },
{ "GL_EXT_draw_range_elements", o(dummy_true), GLL, 1997 },
{ "GL_EXT_fog_coord", o(dummy_true), GLL, 1999 },
@@ -305,6 +307,7 @@ static const struct extension extension_table[] = {
{ "GL_OES_depth32", o(dummy_false), DISABLE, 2005 },
{ "GL_OES_depth_texture", o(ARB_depth_texture), ES2, 2006 },
{ "GL_OES_depth_texture_cube_map", o(OES_depth_texture_cube_map), ES2, 2012 },
+ { "GL_OES_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 },
{ "GL_OES_draw_texture", o(OES_draw_texture), ES1, 2004 },
{ "GL_OES_EGL_sync", o(dummy_true), ES1 | ES2, 2010 },
/* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index c295615b475..fbc7b8f8602 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -460,6 +460,7 @@ descriptor=[
[ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ],
[ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ],
[ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ],
+ [ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ],
# GL_ARB_framebuffer_no_attachments / GLES 3.1
["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"],
diff --git a/src/mesa/main/lines.c b/src/mesa/main/lines.c
index c020fb3eb9e..93b80af0dc4 100644
--- a/src/mesa/main/lines.c
+++ b/src/mesa/main/lines.c
@@ -45,6 +45,10 @@ _mesa_LineWidth( GLfloat width )
if (MESA_VERBOSE & VERBOSE_API)
_mesa_debug(ctx, "glLineWidth %f\n", width);
+ /* If width is unchanged, there can't be an error */
+ if (ctx->Line.Width == width)
+ return;
+
if (width <= 0.0F) {
_mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" );
return;
@@ -68,9 +72,6 @@ _mesa_LineWidth( GLfloat width )
return;
}
- if (ctx->Line.Width == width)
- return;
-
FLUSH_VERTICES(ctx, _NEW_LINE);
ctx->Line.Width = width;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index ab4fa083672..02dd257d79d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2292,6 +2292,7 @@ struct gl_shader
struct exec_list *ir;
struct exec_list *packed_varyings;
+ struct exec_list *fragdata_arrays;
struct glsl_symbol_table *symbols;
bool uses_builtin_functions;
@@ -2389,6 +2390,9 @@ struct gl_shader
*/
GLuint NumImages;
+ struct gl_active_atomic_buffer **AtomicBuffers;
+ unsigned NumAtomicBuffers;
+
/**
* Whether early fragment tests are enabled as defined by
* ARB_shader_image_load_store.
@@ -3680,6 +3684,7 @@ struct gl_extensions
GLboolean ARB_seamless_cube_map;
GLboolean ARB_shader_atomic_counters;
GLboolean ARB_shader_bit_encoding;
+ GLboolean ARB_shader_clock;
GLboolean ARB_shader_image_load_store;
GLboolean ARB_shader_image_size;
GLboolean ARB_shader_precision;
@@ -4501,7 +4506,7 @@ static inline bool
_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
{
return ctx->Shader._CurrentFragmentProgram != NULL &&
- ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0;
+ ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0;
}
#ifdef __cplusplus
diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 51ee10ff858..699a2ae47eb 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -230,6 +230,10 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program)
struct gl_shader_program *shProg = NULL;
GLbitfield any_valid_stages;
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glUseProgramStages(%u, 0x%x, %u)\n",
+ pipeline, stages, program);
+
if (!pipe) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glUseProgramStages(pipeline)");
return;
@@ -251,6 +255,8 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program)
if (_mesa_has_tessellation(ctx))
any_valid_stages |= GL_TESS_CONTROL_SHADER_BIT |
GL_TESS_EVALUATION_SHADER_BIT;
+ if (_mesa_has_compute_shaders(ctx))
+ any_valid_stages |= GL_COMPUTE_SHADER_BIT;
if (stages != GL_ALL_SHADER_BITS && (stages & ~any_valid_stages) != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glUseProgramStages(Stages)");
@@ -332,6 +338,9 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program)
if ((stages & GL_TESS_EVALUATION_SHADER_BIT) != 0)
_mesa_use_shader_program(ctx, GL_TESS_EVALUATION_SHADER, shProg, pipe);
+
+ if ((stages & GL_COMPUTE_SHADER_BIT) != 0)
+ _mesa_use_shader_program(ctx, GL_COMPUTE_SHADER, shProg, pipe);
}
/**
@@ -345,6 +354,9 @@ _mesa_ActiveShaderProgram(GLuint pipeline, GLuint program)
struct gl_shader_program *shProg = NULL;
struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glActiveShaderProgram(%u, %u)\n", pipeline, program);
+
if (program != 0) {
shProg = _mesa_lookup_shader_program_err(ctx, program,
"glActiveShaderProgram(program)");
@@ -380,6 +392,9 @@ _mesa_BindProgramPipeline(GLuint pipeline)
GET_CURRENT_CONTEXT(ctx);
struct gl_pipeline_object *newObj = NULL;
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glBindProgramPipeline(%u)\n", pipeline);
+
/* Rebinding the same pipeline object: no change.
*/
if (ctx->_Shader->Name == pipeline)
@@ -467,6 +482,9 @@ _mesa_DeleteProgramPipelines(GLsizei n, const GLuint *pipelines)
GET_CURRENT_CONTEXT(ctx);
GLsizei i;
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glDeleteProgramPipelines(%d, %p)\n", n, pipelines);
+
if (n < 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glDeleteProgramPipelines(n<0)");
return;
@@ -551,6 +569,9 @@ _mesa_GenProgramPipelines(GLsizei n, GLuint *pipelines)
{
GET_CURRENT_CONTEXT(ctx);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glGenProgramPipelines(%d, %p)\n", n, pipelines);
+
create_program_pipelines(ctx, n, pipelines, false);
}
@@ -559,6 +580,9 @@ _mesa_CreateProgramPipelines(GLsizei n, GLuint *pipelines)
{
GET_CURRENT_CONTEXT(ctx);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glCreateProgramPipelines(%d, %p)\n", n, pipelines);
+
create_program_pipelines(ctx, n, pipelines, true);
}
@@ -574,6 +598,9 @@ _mesa_IsProgramPipeline(GLuint pipeline)
{
GET_CURRENT_CONTEXT(ctx);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glIsProgramPipeline(%u)\n", pipeline);
+
struct gl_pipeline_object *obj = _mesa_lookup_pipeline_object(ctx, pipeline);
if (obj == NULL)
return GL_FALSE;
@@ -590,6 +617,10 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
GET_CURRENT_CONTEXT(ctx);
struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glGetProgramPipelineiv(%u, %d, %p)\n",
+ pipeline, pname, params);
+
/* Are geometry shaders available in this context?
*/
const bool has_gs = _mesa_has_geometry_shaders(ctx);
@@ -643,6 +674,12 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params)
*params = pipe->CurrentProgram[MESA_SHADER_FRAGMENT]
? pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name : 0;
return;
+ case GL_COMPUTE_SHADER:
+ if (!_mesa_has_compute_shaders(ctx))
+ break;
+ *params = pipe->CurrentProgram[MESA_SHADER_COMPUTE]
+ ? pipe->CurrentProgram[MESA_SHADER_COMPUTE]->Name : 0;
+ return;
default:
break;
}
@@ -857,6 +894,9 @@ _mesa_ValidateProgramPipeline(GLuint pipeline)
{
GET_CURRENT_CONTEXT(ctx);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glValidateProgramPipeline(%u)\n", pipeline);
+
struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline);
if (!pipe) {
@@ -875,6 +915,10 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize,
{
GET_CURRENT_CONTEXT(ctx);
+ if (MESA_VERBOSE & VERBOSE_API)
+ _mesa_debug(ctx, "glGetProgramPipelineInfoLog(%u, %d, %p, %p)\n",
+ pipeline, bufSize, length, infoLog);
+
struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline);
if (!pipe) {
diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c
index eb71fdde703..b7e25fe3840 100644
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -119,7 +119,6 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
case GL_MAX_NUM_ACTIVE_VARIABLES:
switch (programInterface) {
case GL_UNIFORM_BLOCK:
- case GL_SHADER_STORAGE_BLOCK:
for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
if (shProg->ProgramResourceList[i].Type == programInterface) {
struct gl_uniform_block *block =
@@ -129,6 +128,26 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
}
}
break;
+ case GL_SHADER_STORAGE_BLOCK:
+ for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
+ if (shProg->ProgramResourceList[i].Type == programInterface) {
+ struct gl_uniform_block *block =
+ (struct gl_uniform_block *)
+ shProg->ProgramResourceList[i].Data;
+ GLint block_params = 0;
+ for (unsigned j = 0; j < block->NumUniforms; j++) {
+ const char *iname = block->Uniforms[j].IndexName;
+ struct gl_program_resource *uni =
+ _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE,
+ iname, NULL);
+ if (!uni)
+ continue;
+ block_params++;
+ }
+ *params = MAX2(*params, block_params);
+ }
+ }
+ break;
case GL_ATOMIC_COUNTER_BUFFER:
for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
if (shProg->ProgramResourceList[i].Type == programInterface) {
diff --git a/src/mesa/main/rastpos.c b/src/mesa/main/rastpos.c
index 54b2125a80f..b468219e688 100644
--- a/src/mesa/main/rastpos.c
+++ b/src/mesa/main/rastpos.c
@@ -36,6 +36,447 @@
#include "rastpos.h"
#include "state.h"
#include "main/dispatch.h"
+#include "main/viewport.h"
+#include "util/simple_list.h"
+
+
+
+/**
+ * Clip a point against the view volume.
+ *
+ * \param v vertex vector describing the point to clip.
+ *
+ * \return zero if outside view volume, or one if inside.
+ */
+static GLuint
+viewclip_point_xy( const GLfloat v[] )
+{
+ if ( v[0] > v[3] || v[0] < -v[3]
+ || v[1] > v[3] || v[1] < -v[3] ) {
+ return 0;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/**
+ * Clip a point against the far/near Z clipping planes.
+ *
+ * \param v vertex vector describing the point to clip.
+ *
+ * \return zero if outside view volume, or one if inside.
+ */
+static GLuint
+viewclip_point_z( const GLfloat v[] )
+{
+ if (v[2] > v[3] || v[2] < -v[3] ) {
+ return 0;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/**
+ * Clip a point against the user clipping planes.
+ *
+ * \param ctx GL context.
+ * \param v vertex vector describing the point to clip.
+ *
+ * \return zero if the point was clipped, or one otherwise.
+ */
+static GLuint
+userclip_point( struct gl_context *ctx, const GLfloat v[] )
+{
+ GLuint p;
+
+ for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+ if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+ GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0]
+ + v[1] * ctx->Transform._ClipUserPlane[p][1]
+ + v[2] * ctx->Transform._ClipUserPlane[p][2]
+ + v[3] * ctx->Transform._ClipUserPlane[p][3];
+ if (dot < 0.0F) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+/**
+ * Compute lighting for the raster position. RGB modes computed.
+ * \param ctx the context
+ * \param vertex vertex location
+ * \param normal normal vector
+ * \param Rcolor returned color
+ * \param Rspec returned specular color (if separate specular enabled)
+ */
+static void
+shade_rastpos(struct gl_context *ctx,
+ const GLfloat vertex[4],
+ const GLfloat normal[3],
+ GLfloat Rcolor[4],
+ GLfloat Rspec[4])
+{
+ /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor;
+ const struct gl_light *light;
+ GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */
+
+ COPY_3V(diffuseColor, base[0]);
+ diffuseColor[3] = CLAMP(
+ ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F );
+ ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0);
+
+ foreach (light, &ctx->Light.EnabledList) {
+ GLfloat attenuation = 1.0;
+ GLfloat VP[3]; /* vector from vertex to light pos */
+ GLfloat n_dot_VP;
+ GLfloat diffuseContrib[3], specularContrib[3];
+
+ if (!(light->_Flags & LIGHT_POSITIONAL)) {
+ /* light at infinity */
+ COPY_3V(VP, light->_VP_inf_norm);
+ attenuation = light->_VP_inf_spot_attenuation;
+ }
+ else {
+ /* local/positional light */
+ GLfloat d;
+
+ /* VP = vector from vertex pos to light[i].pos */
+ SUB_3V(VP, light->_Position, vertex);
+ /* d = length(VP) */
+ d = (GLfloat) LEN_3FV( VP );
+ if (d > 1.0e-6F) {
+ /* normalize VP */
+ GLfloat invd = 1.0F / d;
+ SELF_SCALE_SCALAR_3V(VP, invd);
+ }
+
+ /* atti */
+ attenuation = 1.0F / (light->ConstantAttenuation + d *
+ (light->LinearAttenuation + d *
+ light->QuadraticAttenuation));
+
+ if (light->_Flags & LIGHT_SPOT) {
+ GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection);
+
+ if (PV_dot_dir<light->_CosCutoff) {
+ continue;
+ }
+ else {
+ GLfloat spot = powf(PV_dot_dir, light->SpotExponent);
+ attenuation *= spot;
+ }
+ }
+ }
+
+ if (attenuation < 1e-3F)
+ continue;
+
+ n_dot_VP = DOT3( normal, VP );
+
+ if (n_dot_VP < 0.0F) {
+ ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]);
+ continue;
+ }
+
+ /* Ambient + diffuse */
+ COPY_3V(diffuseContrib, light->_MatAmbient[0]);
+ ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]);
+
+ /* Specular */
+ {
+ const GLfloat *h;
+ GLfloat n_dot_h;
+
+ ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0);
+
+ if (ctx->Light.Model.LocalViewer) {
+ GLfloat v[3];
+ COPY_3V(v, vertex);
+ NORMALIZE_3FV(v);
+ SUB_3V(VP, VP, v);
+ NORMALIZE_3FV(VP);
+ h = VP;
+ }
+ else if (light->_Flags & LIGHT_POSITIONAL) {
+ ACC_3V(VP, ctx->_EyeZDir);
+ NORMALIZE_3FV(VP);
+ h = VP;
+ }
+ else {
+ h = light->_h_inf_norm;
+ }
+
+ n_dot_h = DOT3(normal, h);
+
+ if (n_dot_h > 0.0F) {
+ GLfloat shine;
+ GLfloat spec_coef;
+
+ shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0];
+ spec_coef = powf(n_dot_h, shine);
+
+ if (spec_coef > 1.0e-10F) {
+ if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) {
+ ACC_SCALE_SCALAR_3V( specularContrib, spec_coef,
+ light->_MatSpecular[0]);
+ }
+ else {
+ ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef,
+ light->_MatSpecular[0]);
+ }
+ }
+ }
+ }
+
+ ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib );
+ ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib );
+ }
+
+ Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F);
+ Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F);
+ Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F);
+ Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F);
+ Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F);
+ Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F);
+ Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F);
+ Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F);
+}
+
+
+/**
+ * Do texgen needed for glRasterPos.
+ * \param ctx rendering context
+ * \param vObj object-space vertex coordinate
+ * \param vEye eye-space vertex coordinate
+ * \param normal vertex normal
+ * \param unit texture unit number
+ * \param texcoord incoming texcoord and resulting texcoord
+ */
+static void
+compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4],
+ const GLfloat normal[3], GLuint unit, GLfloat texcoord[4])
+{
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+ /* always compute sphere map terms, just in case */
+ GLfloat u[3], two_nu, rx, ry, rz, m, mInv;
+ COPY_3V(u, vEye);
+ NORMALIZE_3FV(u);
+ two_nu = 2.0F * DOT3(normal, u);
+ rx = u[0] - normal[0] * two_nu;
+ ry = u[1] - normal[1] * two_nu;
+ rz = u[2] - normal[2] * two_nu;
+ m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F);
+ if (m > 0.0F)
+ mInv = 0.5F * (1.0f / sqrtf(m));
+ else
+ mInv = 0.0F;
+
+ if (texUnit->TexGenEnabled & S_BIT) {
+ switch (texUnit->GenS.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane);
+ break;
+ case GL_SPHERE_MAP:
+ texcoord[0] = rx * mInv + 0.5F;
+ break;
+ case GL_REFLECTION_MAP:
+ texcoord[0] = rx;
+ break;
+ case GL_NORMAL_MAP:
+ texcoord[0] = normal[0];
+ break;
+ default:
+ _mesa_problem(ctx, "Bad S texgen in compute_texgen()");
+ return;
+ }
+ }
+
+ if (texUnit->TexGenEnabled & T_BIT) {
+ switch (texUnit->GenT.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane);
+ break;
+ case GL_SPHERE_MAP:
+ texcoord[1] = ry * mInv + 0.5F;
+ break;
+ case GL_REFLECTION_MAP:
+ texcoord[1] = ry;
+ break;
+ case GL_NORMAL_MAP:
+ texcoord[1] = normal[1];
+ break;
+ default:
+ _mesa_problem(ctx, "Bad T texgen in compute_texgen()");
+ return;
+ }
+ }
+
+ if (texUnit->TexGenEnabled & R_BIT) {
+ switch (texUnit->GenR.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane);
+ break;
+ case GL_REFLECTION_MAP:
+ texcoord[2] = rz;
+ break;
+ case GL_NORMAL_MAP:
+ texcoord[2] = normal[2];
+ break;
+ default:
+ _mesa_problem(ctx, "Bad R texgen in compute_texgen()");
+ return;
+ }
+ }
+
+ if (texUnit->TexGenEnabled & Q_BIT) {
+ switch (texUnit->GenQ.Mode) {
+ case GL_OBJECT_LINEAR:
+ texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane);
+ break;
+ case GL_EYE_LINEAR:
+ texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane);
+ break;
+ default:
+ _mesa_problem(ctx, "Bad Q texgen in compute_texgen()");
+ return;
+ }
+ }
+}
+
+
+/**
+ * glRasterPos transformation. Typically called via ctx->Driver.RasterPos().
+ *
+ * \param vObj vertex position in object space
+ */
+void
+_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4])
+{
+ if (ctx->VertexProgram._Enabled) {
+ /* XXX implement this */
+ _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos");
+ return;
+ }
+ else {
+ GLfloat eye[4], clip[4], ndc[3], d;
+ GLfloat *norm, eyenorm[3];
+ GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+ float scale[3], translate[3];
+
+ /* apply modelview matrix: eye = MV * obj */
+ TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj );
+ /* apply projection matrix: clip = Proj * eye */
+ TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye );
+
+ /* clip to view volume. */
+ if (!ctx->Transform.DepthClamp) {
+ if (viewclip_point_z(clip) == 0) {
+ ctx->Current.RasterPosValid = GL_FALSE;
+ return;
+ }
+ }
+ if (!ctx->Transform.RasterPositionUnclipped) {
+ if (viewclip_point_xy(clip) == 0) {
+ ctx->Current.RasterPosValid = GL_FALSE;
+ return;
+ }
+ }
+
+ /* clip to user clipping planes */
+ if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) {
+ ctx->Current.RasterPosValid = GL_FALSE;
+ return;
+ }
+
+ /* ndc = clip / W */
+ d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3];
+ ndc[0] = clip[0] * d;
+ ndc[1] = clip[1] * d;
+ ndc[2] = clip[2] * d;
+ /* wincoord = viewport_mapping(ndc) */
+ _mesa_get_viewport_xform(ctx, 0, scale, translate);
+ ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0];
+ ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1];
+ ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2];
+ ctx->Current.RasterPos[3] = clip[3];
+
+ if (ctx->Transform.DepthClamp) {
+ ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3],
+ ctx->ViewportArray[0].Near,
+ ctx->ViewportArray[0].Far);
+ }
+
+ /* compute raster distance */
+ if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
+ ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0];
+ else
+ ctx->Current.RasterDistance =
+ sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] );
+
+ /* compute transformed normal vector (for lighting or texgen) */
+ if (ctx->_NeedEyeCoords) {
+ const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv;
+ TRANSFORM_NORMAL( eyenorm, objnorm, inv );
+ norm = eyenorm;
+ }
+ else {
+ norm = objnorm;
+ }
+
+ /* update raster color */
+ if (ctx->Light.Enabled) {
+ /* lighting */
+ shade_rastpos( ctx, vObj, norm,
+ ctx->Current.RasterColor,
+ ctx->Current.RasterSecondaryColor );
+ }
+ else {
+ /* use current color */
+ COPY_4FV(ctx->Current.RasterColor,
+ ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
+ COPY_4FV(ctx->Current.RasterSecondaryColor,
+ ctx->Current.Attrib[VERT_ATTRIB_COLOR1]);
+ }
+
+ /* texture coords */
+ {
+ GLuint u;
+ for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+ GLfloat tc[4];
+ COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]);
+ if (ctx->Texture.Unit[u].TexGenEnabled) {
+ compute_texgen(ctx, vObj, eye, norm, u, tc);
+ }
+ TRANSFORM_POINT(ctx->Current.RasterTexCoords[u],
+ ctx->TextureMatrixStack[u].Top->m, tc);
+ }
+ }
+
+ ctx->Current.RasterPosValid = GL_TRUE;
+ }
+
+ if (ctx->RenderMode == GL_SELECT) {
+ _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] );
+ }
+}
/**
diff --git a/src/mesa/main/rastpos.h b/src/mesa/main/rastpos.h
index dc28c68d41b..90b8f957b9f 100644
--- a/src/mesa/main/rastpos.h
+++ b/src/mesa/main/rastpos.h
@@ -41,6 +41,9 @@ struct gl_context;
extern void
_mesa_init_rastpos(struct gl_context *ctx);
+void
+_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]);
+
void GLAPIENTRY
_mesa_RasterPos2d(GLdouble x, GLdouble y);
void GLAPIENTRY
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 8182d3dcc04..dd51bba3386 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -543,13 +543,55 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
/* Resource basename. */
const char *rname = _mesa_program_resource_name(res);
unsigned baselen = strlen(rname);
+ unsigned baselen_without_array_index = baselen;
+ const char *rname_last_square_bracket = strrchr(rname, '[');
+ bool found = false;
+ bool rname_has_array_index_zero = false;
+ /* From ARB_program_interface_query spec:
+ *
+ * "uint GetProgramResourceIndex(uint program, enum programInterface,
+ * const char *name);
+ * [...]
+ * If <name> exactly matches the name string of one of the active
+ * resources for <programInterface>, the index of the matched resource is
+ * returned. Additionally, if <name> would exactly match the name string
+ * of an active resource if "[0]" were appended to <name>, the index of
+ * the matched resource is returned. [...]"
+ *
+ * "A string provided to GetProgramResourceLocation or
+ * GetProgramResourceLocationIndex is considered to match an active variable
+ * if:
+ *
+ * * the string exactly matches the name of the active variable;
+ *
+ * * if the string identifies the base name of an active array, where the
+ * string would exactly match the name of the variable if the suffix
+ * "[0]" were appended to the string; [...]"
+ */
+ /* Remove array's index from interface block name comparison only if
+ * array's index is zero and the resulting string length is the same
+ * than the provided name's length.
+ */
+ if (rname_last_square_bracket) {
+ baselen_without_array_index -= strlen(rname_last_square_bracket);
+ rname_has_array_index_zero =
+ (strncmp(rname_last_square_bracket, "[0]\0", 4) == 0) &&
+ (baselen_without_array_index == strlen(name));
+ }
+
+ if (strncmp(rname, name, baselen) == 0)
+ found = true;
+ else if (rname_has_array_index_zero &&
+ strncmp(rname, name, baselen_without_array_index) == 0)
+ found = true;
- if (strncmp(rname, name, baselen) == 0) {
+ if (found) {
switch (programInterface) {
case GL_UNIFORM_BLOCK:
case GL_SHADER_STORAGE_BLOCK:
/* Basename match, check if array or struct. */
- if (name[baselen] == '\0' ||
+ if (rname_has_array_index_zero ||
+ name[baselen] == '\0' ||
name[baselen] == '[' ||
name[baselen] == '.') {
return res;
@@ -627,6 +669,20 @@ _mesa_program_resource_index(struct gl_shader_program *shProg,
}
}
+/**
+ * Find a program resource that points to given data.
+ */
+static struct gl_program_resource*
+program_resource_find_data(struct gl_shader_program *shProg, void *data)
+{
+ struct gl_program_resource *res = shProg->ProgramResourceList;
+ for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) {
+ if (res->Data == data)
+ return res;
+ }
+ return NULL;
+}
+
/* Find a program resource with specific index in given interface.
*/
struct gl_program_resource *
@@ -808,6 +864,14 @@ program_resource_location(struct gl_shader_program *shProg,
if (RESOURCE_UNI(res)->builtin)
return -1;
+ /* From page 79 of the OpenGL 4.2 spec:
+ *
+ * "A valid name cannot be a structure, an array of structures, or any
+ * portion of a single vector or a matrix."
+ */
+ if (RESOURCE_UNI(res)->type->without_array()->is_record())
+ return -1;
+
/* From the GL_ARB_uniform_buffer_object spec:
*
* "The value -1 will be returned if <name> does not correspond to an
@@ -1016,8 +1080,18 @@ get_buffer_property(struct gl_shader_program *shProg,
*val = RESOURCE_ATC(res)->NumUniforms;
return 1;
case GL_ACTIVE_VARIABLES:
- for (unsigned i = 0; i < RESOURCE_ATC(res)->NumUniforms; i++)
- *val++ = RESOURCE_ATC(res)->Uniforms[i];
+ for (unsigned i = 0; i < RESOURCE_ATC(res)->NumUniforms; i++) {
+ /* Active atomic buffer contains index to UniformStorage. Find
+ * out gl_program_resource via data pointer and then calculate
+ * index of that uniform.
+ */
+ unsigned idx = RESOURCE_ATC(res)->Uniforms[i];
+ struct gl_program_resource *uni =
+ program_resource_find_data(shProg,
+ &shProg->UniformStorage[idx]);
+ assert(uni);
+ *val++ = _mesa_program_resource_index(shProg, uni);
+ }
return RESOURCE_ATC(res)->NumUniforms;
}
}
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 765602e50db..ac40891f435 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -630,9 +630,16 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
case GL_ACTIVE_ATTRIBUTE_MAX_LENGTH:
*params = _mesa_longest_attribute_name_length(shProg);
return;
- case GL_ACTIVE_UNIFORMS:
- *params = shProg->NumUniformStorage - shProg->NumHiddenUniforms;
+ case GL_ACTIVE_UNIFORMS: {
+ unsigned i;
+ const unsigned num_uniforms =
+ shProg->NumUniformStorage - shProg->NumHiddenUniforms;
+ for (*params = 0, i = 0; i < num_uniforms; i++) {
+ if (!shProg->UniformStorage[i].is_shader_storage)
+ (*params)++;
+ }
return;
+ }
case GL_ACTIVE_UNIFORM_MAX_LENGTH: {
unsigned i;
GLint max_len = 0;
@@ -640,6 +647,9 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
shProg->NumUniformStorage - shProg->NumHiddenUniforms;
for (i = 0; i < num_uniforms; i++) {
+ if (shProg->UniformStorage[i].is_shader_storage)
+ continue;
+
/* Add one for the terminating NUL character for a non-array, and
* 4 for the "[0]" and the NUL for an array.
*/
diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c
index 84973d3fe5d..a8ac19e40d7 100644
--- a/src/mesa/main/texcompress.c
+++ b/src/mesa/main/texcompress.c
@@ -243,28 +243,6 @@ _mesa_gl_compressed_format_base_format(GLenum format)
* what GL_NUM_COMPRESSED_TEXTURE_FORMATS and
* GL_COMPRESSED_TEXTURE_FORMATS return."
*
- * The KHR_texture_compression_astc_hdr spec says:
- *
- * "Interactions with OpenGL 4.2
- *
- * OpenGL 4.2 supports the feature that compressed textures can be
- * compressed online, by passing the compressed texture format enum as
- * the internal format when uploading a texture using TexImage1D,
- * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image
- * Specification, subsection Encoding of Special Internal Formats).
- *
- * Due to the complexity of the ASTC compression algorithm, it is not
- * usually suitable for online use, and therefore ASTC support will be
- * limited to pre-compressed textures only. Where on-device compression
- * is required, a domain-specific limited compressor will typically
- * be used, and this is therefore not suitable for implementation in
- * the driver.
- *
- * In particular, the ASTC format specifiers will not be added to
- * Table 3.14, and thus will not be accepted by the TexImage*D
- * functions, and will not be returned by the (already deprecated)
- * COMPRESSED_TEXTURE_FORMATS query."
- *
* There is no formal spec for GL_ATI_texture_compression_3dc. Since the
* formats added by this extension are luminance-alpha formats, it is
* reasonable to expect them to follow the same rules as
@@ -286,7 +264,8 @@ GLuint
_mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
{
GLuint n = 0;
- if (ctx->Extensions.TDFX_texture_compression_FXT1) {
+ if (_mesa_is_desktop_gl(ctx) &&
+ ctx->Extensions.TDFX_texture_compression_FXT1) {
if (formats) {
formats[n++] = GL_COMPRESSED_RGB_FXT1_3DFX;
formats[n++] = GL_COMPRESSED_RGBA_FXT1_3DFX;
@@ -396,6 +375,69 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats)
n += 10;
}
}
+
+ /* The KHR_texture_compression_astc_hdr spec says:
+ *
+ * "Interactions with OpenGL 4.2
+ *
+ * OpenGL 4.2 supports the feature that compressed textures can be
+ * compressed online, by passing the compressed texture format enum as
+ * the internal format when uploading a texture using TexImage1D,
+ * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image
+ * Specification, subsection Encoding of Special Internal Formats).
+ *
+ * Due to the complexity of the ASTC compression algorithm, it is not
+ * usually suitable for online use, and therefore ASTC support will be
+ * limited to pre-compressed textures only. Where on-device compression
+ * is required, a domain-specific limited compressor will typically
+ * be used, and this is therefore not suitable for implementation in
+ * the driver.
+ *
+ * In particular, the ASTC format specifiers will not be added to
+ * Table 3.14, and thus will not be accepted by the TexImage*D
+ * functions, and will not be returned by the (already deprecated)
+ * COMPRESSED_TEXTURE_FORMATS query."
+ *
+ * The ES and the desktop specs diverge here. In OpenGL ES, the COMPRESSED_TEXTURE_FORMATS
+ * query returns the set of supported specific compressed formats.
+ */
+ if (ctx->API == API_OPENGLES2 &&
+ ctx->Extensions.KHR_texture_compression_astc_ldr) {
+ if (formats) {
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_4x4_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x4_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x6_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x6_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x8_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x5_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x6_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x8_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x10_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x10_KHR;
+ formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x12_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR;
+ formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR;
+ }
+ else {
+ n += 28;
+ }
+ }
+
return n;
}
diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 682b72755c7..945890aeeb5 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -297,8 +297,7 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
uint8_t rebaseSwizzle[4];
/* Decompress into temp float buffer, then pack into user buffer */
- tempImage = malloc(width * height * depth
- * 4 * sizeof(GLfloat));
+ tempImage = malloc(width * height * depth * 4 * sizeof(GLfloat));
if (!tempImage) {
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
return;
diff --git a/src/mesa/main/vdpau.c b/src/mesa/main/vdpau.c
index 0efa56e4f41..44be3a37443 100644
--- a/src/mesa/main/vdpau.c
+++ b/src/mesa/main/vdpau.c
@@ -163,9 +163,10 @@ register_surface(struct gl_context *ctx, GLboolean isOutput,
return (GLintptr)NULL;
}
- if (tex->Target == 0)
+ if (tex->Target == 0) {
tex->Target = target;
- else if (tex->Target != target) {
+ tex->TargetIndex = _mesa_tex_target_to_index(ctx, target);
+ } else if (tex->Target != target) {
_mesa_unlock_texture(ctx, tex);
free(surf);
_mesa_error(ctx, GL_INVALID_OPERATION,
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c
index acaa85d9356..20f8b3df99d 100644
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -73,7 +73,8 @@ void st_upload_constants( struct st_context *st,
* the parameters list are explicitly set by the user with glUniform,
* glProgramParameter(), etc.
*/
- _mesa_load_state_parameters(st->ctx, params);
+ if (params->StateFlags)
+ _mesa_load_state_parameters(st->ctx, params);
/* We always need to get a new buffer, to keep the drivers simple and
* avoid gratuitous rendering synchronization.
diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c
new file mode 100644
index 00000000000..75114cdb712
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_copyimage.h"
+#include "state_tracker/st_cb_fbo.h"
+#include "state_tracker/st_texture.h"
+
+#include "util/u_box.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+
+
+/**
+ * Return an equivalent canonical format without "X" channels.
+ *
+ * Copying between incompatible formats is easier when the format is
+ * canonicalized, meaning that it is in a standard form.
+ *
+ * The returned format has the same component sizes and swizzles as
+ * the source format, the type is changed to UINT or UNORM, depending on
+ * which one has the most swizzle combinations in their group.
+ *
+ * If it's not an array format, return a memcpy-equivalent array format.
+ *
+ * The key feature is that swizzled versions of formats of the same
+ * component size always return the same component type.
+ *
+ * X returns A.
+ * Luminance, intensity, alpha, depth, stencil, and 8-bit and 16-bit packed
+ * formats are not supported. (same as ARB_copy_image)
+ */
+static enum pipe_format
+get_canonical_format(enum pipe_format format)
+{
+ const struct util_format_description *desc =
+ util_format_description(format);
+
+ /* Packed formats. Return the equivalent array format. */
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ format == PIPE_FORMAT_R9G9B9E5_FLOAT)
+ return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT);
+
+ if (desc->nr_channels == 4 &&
+ desc->channel[0].size == 10 &&
+ desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 &&
+ desc->channel[3].size == 2) {
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_Y &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_Z)
+ return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT);
+
+ return PIPE_FORMAT_NONE;
+ }
+
+#define RETURN_FOR_SWIZZLE1(x, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x) \
+ return format
+
+#define RETURN_FOR_SWIZZLE2(x, y, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y) \
+ return format
+
+#define RETURN_FOR_SWIZZLE3(x, y, z, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z) \
+ return format
+
+#define RETURN_FOR_SWIZZLE4(x, y, z, w, format) \
+ if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z && \
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_##w) \
+ return format
+
+ /* Array formats. */
+ if (desc->is_array) {
+ switch (desc->nr_channels) {
+ case 1:
+ switch (desc->channel[0].size) {
+ case 8:
+ RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R8_UINT);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R16_UINT);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R32_UINT);
+ break;
+ }
+ break;
+
+ case 2:
+ switch (desc->channel[0].size) {
+ case 8:
+ /* All formats in each group must be of the same type.
+ * We can't use UINT for R8G8 while using UNORM for G8R8.
+ */
+ RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R8G8_UNORM);
+ RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G8R8_UNORM);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R16G16_UNORM);
+ RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G16R16_UNORM);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R32G32_UINT);
+ break;
+ }
+ break;
+
+ case 3:
+ switch (desc->channel[0].size) {
+ case 8:
+ RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R8G8B8_UINT);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R16G16B16_UINT);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R32G32B32_UINT);
+ break;
+ }
+ break;
+
+ case 4:
+ switch (desc->channel[0].size) {
+ case 8:
+ RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R8G8B8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R8G8B8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(Z, Y, X, W, PIPE_FORMAT_B8G8R8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(Z, Y, X, 1, PIPE_FORMAT_B8G8R8A8_UNORM);
+ RETURN_FOR_SWIZZLE4(W, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM);
+ RETURN_FOR_SWIZZLE4(1, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM);
+ RETURN_FOR_SWIZZLE4(W, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM);
+ RETURN_FOR_SWIZZLE4(1, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM);
+ break;
+
+ case 16:
+ RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R16G16B16A16_UINT);
+ RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R16G16B16A16_UINT);
+ break;
+
+ case 32:
+ RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R32G32B32A32_UINT);
+ RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R32G32B32A32_UINT);
+ break;
+ }
+ }
+
+ assert(!"unknown array format");
+ return PIPE_FORMAT_NONE;
+ }
+
+ assert(!"unknown packed format");
+ return PIPE_FORMAT_NONE;
+}
+
+/**
+ * Return true if the swizzle is XYZW in case of a 4-channel format,
+ * XY in case of a 2-channel format, or X in case of a 1-channel format.
+ */
+static bool
+has_identity_swizzle(const struct util_format_description *desc)
+{
+ int i;
+
+ for (i = 0; i < desc->nr_channels; i++)
+ if (desc->swizzle[i] != UTIL_FORMAT_SWIZZLE_X + i)
+ return false;
+
+ return true;
+}
+
+/**
+ * Return a canonical format for the given bits and channel size.
+ */
+static enum pipe_format
+canonical_format_from_bits(unsigned bits, unsigned channel_size)
+{
+ switch (bits) {
+ case 8:
+ if (channel_size == 8)
+ return get_canonical_format(PIPE_FORMAT_R8_UINT);
+ break;
+
+ case 16:
+ if (channel_size == 8)
+ return get_canonical_format(PIPE_FORMAT_R8G8_UINT);
+ if (channel_size == 16)
+ return get_canonical_format(PIPE_FORMAT_R16_UINT);
+ break;
+
+ case 32:
+ if (channel_size == 8)
+ return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT);
+ if (channel_size == 16)
+ return get_canonical_format(PIPE_FORMAT_R16G16_UINT);
+ if (channel_size == 32)
+ return get_canonical_format(PIPE_FORMAT_R32_UINT);
+ break;
+
+ case 64:
+ if (channel_size == 16)
+ return get_canonical_format(PIPE_FORMAT_R16G16B16A16_UINT);
+ if (channel_size == 32)
+ return get_canonical_format(PIPE_FORMAT_R32G32_UINT);
+ break;
+
+ case 128:
+ if (channel_size == 32)
+ return get_canonical_format(PIPE_FORMAT_R32G32B32A32_UINT);
+ break;
+ }
+
+ assert(!"impossible format");
+ return PIPE_FORMAT_NONE;
+}
+
+static void
+blit(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ enum pipe_format dst_format,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ enum pipe_format src_format,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct pipe_blit_info blit = {{0}};
+
+ blit.src.resource = src;
+ blit.dst.resource = dst;
+ blit.src.format = src_format;
+ blit.dst.format = dst_format;
+ blit.src.level = src_level;
+ blit.dst.level = dst_level;
+ blit.src.box = *src_box;
+ u_box_3d(dstx, dsty, dstz, src_box->width, src_box->height,
+ src_box->depth, &blit.dst.box);
+ blit.mask = PIPE_MASK_RGBA;
+ blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+ pipe->blit(pipe, &blit);
+}
+
+static void
+swizzled_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ const struct util_format_description *src_desc, *dst_desc;
+ unsigned bits;
+ enum pipe_format blit_src_format, blit_dst_format;
+
+ /* Get equivalent canonical formats. Those are always array formats and
+ * copying between compatible canonical formats behaves either like
+ * memcpy or like swizzled memcpy. The idea is that we won't have to care
+ * about the channel type from this point on.
+ * Only the swizzle and channel size.
+ */
+ blit_src_format = get_canonical_format(src->format);
+ blit_dst_format = get_canonical_format(dst->format);
+
+ assert(blit_src_format != PIPE_FORMAT_NONE);
+ assert(blit_dst_format != PIPE_FORMAT_NONE);
+
+ src_desc = util_format_description(blit_src_format);
+ dst_desc = util_format_description(blit_dst_format);
+
+ assert(src_desc->block.bits == dst_desc->block.bits);
+ bits = src_desc->block.bits;
+
+ if (dst_desc->channel[0].size == src_desc->channel[0].size) {
+ /* Only the swizzle is different, which means we can just blit,
+ * e.g. RGBA -> BGRA.
+ */
+ } else if (has_identity_swizzle(src_desc)) {
+ /* Src is unswizzled and dst can be swizzled, so src is typecast
+ * to an equivalent dst-compatible format.
+ * e.g. R32 -> BGRA8 is realized as RGBA8 -> BGRA8
+ */
+ blit_src_format =
+ canonical_format_from_bits(bits, dst_desc->channel[0].size);
+ } else if (has_identity_swizzle(dst_desc)) {
+ /* Dst is unswizzled and src can be swizzled, so dst is typecast
+ * to an equivalent src-compatible format.
+ * e.g. BGRA8 -> R32 is realized as BGRA8 -> RGBA8
+ */
+ blit_dst_format =
+ canonical_format_from_bits(bits, src_desc->channel[0].size);
+ } else {
+ assert(!"This should have been handled by handle_complex_copy.");
+ return;
+ }
+
+ blit(pipe, dst, blit_dst_format, dst_level, dstx, dsty, dstz,
+ src, blit_src_format, src_level, src_box);
+}
+
+static bool
+same_size_and_swizzle(const struct util_format_description *d1,
+ const struct util_format_description *d2)
+{
+ int i;
+
+ if (d1->layout != d2->layout ||
+ d1->nr_channels != d2->nr_channels ||
+ d1->is_array != d2->is_array)
+ return false;
+
+ for (i = 0; i < d1->nr_channels; i++) {
+ if (d1->channel[i].size != d2->channel[i].size)
+ return false;
+
+ if (d1->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W &&
+ d2->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W &&
+ d1->swizzle[i] != d2->swizzle[i])
+ return false;
+ }
+
+ return true;
+}
+
+static struct pipe_resource *
+create_texture(struct pipe_screen *screen, enum pipe_format format,
+ unsigned nr_samples,
+ unsigned width, unsigned height, unsigned depth)
+{
+ struct pipe_resource templ;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.format = format;
+ templ.width0 = width;
+ templ.height0 = height;
+ templ.depth0 = 1;
+ templ.array_size = depth;
+ templ.nr_samples = nr_samples;
+ templ.usage = PIPE_USAGE_DEFAULT;
+ templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+
+ if (depth > 1)
+ templ.target = PIPE_TEXTURE_2D_ARRAY;
+ else
+ templ.target = PIPE_TEXTURE_2D;
+
+ return screen->resource_create(screen, &templ);
+}
+
+/**
+ * Handle complex format conversions using 2 blits with a temporary texture
+ * in between, e.g. blitting from B10G10R10A2 to G16R16.
+ *
+ * This example is implemented this way:
+ * 1) First, blit from B10G10R10A2 to R10G10B10A2, which is canonical, so it
+ * can be reinterpreted as a different canonical format of the same bpp,
+ * such as R16G16. This blit only swaps R and B 10-bit components.
+ * 2) Finally, blit the result, which is R10G10B10A2, as R16G16 to G16R16.
+ * This blit only swaps R and G 16-bit components.
+ */
+static bool
+handle_complex_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box,
+ enum pipe_format noncanon_format,
+ enum pipe_format canon_format)
+{
+ struct pipe_box temp_box;
+ struct pipe_resource *temp = NULL;
+ const struct util_format_description *src_desc, *dst_desc;
+ const struct util_format_description *canon_desc, *noncanon_desc;
+ bool src_is_canon;
+ bool src_is_noncanon;
+ bool dst_is_canon;
+ bool dst_is_noncanon;
+
+ src_desc = util_format_description(src->format);
+ dst_desc = util_format_description(dst->format);
+ canon_desc = util_format_description(canon_format);
+ noncanon_desc = util_format_description(noncanon_format);
+
+ src_is_canon = same_size_and_swizzle(src_desc, canon_desc);
+ dst_is_canon = same_size_and_swizzle(dst_desc, canon_desc);
+ src_is_noncanon = same_size_and_swizzle(src_desc, noncanon_desc);
+ dst_is_noncanon = same_size_and_swizzle(dst_desc, noncanon_desc);
+
+ if (src_is_noncanon) {
+ /* Simple case - only types differ (e.g. UNORM and UINT). */
+ if (dst_is_noncanon) {
+ blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src,
+ noncanon_format, src_level, src_box);
+ return true;
+ }
+
+ /* Simple case - only types and swizzles differ. */
+ if (dst_is_canon) {
+ blit(pipe, dst, canon_format, dst_level, dstx, dsty, dstz, src,
+ noncanon_format, src_level, src_box);
+ return true;
+ }
+
+ /* Use the temporary texture. Src is converted to a canonical format,
+ * then proceed the generic swizzled_copy.
+ */
+ temp = create_texture(pipe->screen, canon_format, src->nr_samples,
+ src_box->width,
+ src_box->height, src_box->depth);
+
+ u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth,
+ &temp_box);
+
+ blit(pipe, temp, canon_format, 0, 0, 0, 0, src, noncanon_format,
+ src_level, src_box);
+ swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, temp, 0,
+ &temp_box);
+ pipe_resource_reference(&temp, NULL);
+ return true;
+ }
+
+ if (dst_is_noncanon) {
+ /* Simple case - only types and swizzles differ. */
+ if (src_is_canon) {
+ blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src,
+ canon_format, src_level, src_box);
+ return true;
+ }
+
+ /* Use the temporary texture. First, use the generic copy, but use
+ * a canonical format in the destination. Then convert */
+ temp = create_texture(pipe->screen, canon_format, dst->nr_samples,
+ src_box->width,
+ src_box->height, src_box->depth);
+
+ u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth,
+ &temp_box);
+
+ swizzled_copy(pipe, temp, 0, 0, 0, 0, src, src_level, src_box);
+ blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, temp,
+ canon_format, 0, &temp_box);
+ pipe_resource_reference(&temp, NULL);
+ return true;
+ }
+
+ return false;
+}
+
+static void
+copy_image(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ if (src->format == dst->format ||
+ util_format_is_compressed(src->format) ||
+ util_format_is_compressed(dst->format)) {
+ pipe->resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+ return;
+ }
+
+ /* Copying to/from B10G10R10*2 needs 2 blits with R10G10B10A2
+ * as a temporary texture in between.
+ */
+ if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box, PIPE_FORMAT_B10G10R10A2_UINT,
+ PIPE_FORMAT_R10G10B10A2_UINT))
+ return;
+
+ /* Copying to/from G8R8 needs 2 blits with R8G8 as a temporary texture
+ * in between.
+ */
+ if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box, PIPE_FORMAT_G8R8_UNORM,
+ PIPE_FORMAT_R8G8_UNORM))
+ return;
+
+ /* Copying to/from G16R16 needs 2 blits with R16G16 as a temporary texture
+ * in between.
+ */
+ if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src,
+ src_level, src_box, PIPE_FORMAT_G16R16_UNORM,
+ PIPE_FORMAT_R16G16_UNORM))
+ return;
+
+ /* Only allow non-identity swizzling on RGBA8 formats. */
+
+ /* Simple copy, memcpy with swizzling, no format conversion. */
+ swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level,
+ src_box);
+}
+
+static void
+st_CopyImageSubData(struct gl_context *ctx,
+ struct gl_texture_image *src_image,
+ struct gl_renderbuffer *src_renderbuffer,
+ int src_x, int src_y, int src_z,
+ struct gl_texture_image *dst_image,
+ struct gl_renderbuffer *dst_renderbuffer,
+ int dst_x, int dst_y, int dst_z,
+ int src_width, int src_height)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_resource *src_res, *dst_res;
+ struct pipe_box box;
+ int src_level, dst_level;
+
+ if (src_image) {
+ struct st_texture_image *src = st_texture_image(src_image);
+ src_res = src->pt;
+ src_level = src_image->Level;
+ src_z += src_image->Face;
+ } else {
+ struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
+ src_res = src->texture;
+ src_level = 0;
+ }
+
+ if (dst_image) {
+ struct st_texture_image *dst = st_texture_image(dst_image);
+ dst_res = dst->pt;
+ dst_level = dst_image->Level;
+ dst_z += dst_image->Face;
+ } else {
+ struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
+ dst_res = dst->texture;
+ dst_level = 0;
+ }
+
+ u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
+
+ copy_image(pipe, dst_res, dst_level, dst_x, dst_y, dst_z,
+ src_res, src_level, &box);
+}
+
+void
+st_init_copy_image_functions(struct dd_function_table *functions)
+{
+ functions->CopyImageSubData = st_CopyImageSubData;
+}
diff --git a/src/mesa/state_tracker/st_cb_copyimage.h b/src/mesa/state_tracker/st_cb_copyimage.h
new file mode 100644
index 00000000000..d17f35c0953
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_copyimage.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef ST_CB_COPY_IMAGE_H
+#define ST_CB_COPY_IMAGE_H
+
+struct dd_function_table;
+
+extern void
+st_init_copy_image_functions(struct dd_function_table *functions);
+
+#endif /* ST_CB_COPY_IMAGE_H */
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c
index b9997dacfd2..747b41464ae 100644
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -39,6 +39,7 @@
#include "main/imports.h"
#include "main/macros.h"
#include "main/feedback.h"
+#include "main/rastpos.h"
#include "st_context.h"
#include "st_atom.h"
@@ -224,6 +225,15 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
struct rastpos_stage *rs;
const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
+ if (ctx->VertexProgram._Current == NULL ||
+ ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) {
+ /* No vertex shader/program is enabled, used the simple/fast fixed-
+ * function implementation of RasterPos.
+ */
+ _mesa_RasterPos(ctx, v);
+ return;
+ }
+
if (st->rastpos_stage) {
/* get rastpos stage info */
rs = rastpos_stage(st->rastpos_stage);
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 5d25fed317e..d4c916e8057 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1873,55 +1873,6 @@ st_TextureView(struct gl_context *ctx,
return GL_TRUE;
}
-/* HACK: this is only enough for the most basic uses of CopyImage. Must fix
- * before actually exposing the extension.
- */
-static void
-st_CopyImageSubData(struct gl_context *ctx,
- struct gl_texture_image *src_image,
- struct gl_renderbuffer *src_renderbuffer,
- int src_x, int src_y, int src_z,
- struct gl_texture_image *dst_image,
- struct gl_renderbuffer *dst_renderbuffer,
- int dst_x, int dst_y, int dst_z,
- int src_width, int src_height)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_resource *src_res, *dst_res;
- struct pipe_box box;
- int src_level, dst_level;
-
- if (src_image) {
- struct st_texture_image *src = st_texture_image(src_image);
- src_res = src->pt;
- src_level = src_image->Level;
- }
- else {
- struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
- src_res = src->texture;
- src_level = 0;
- }
-
- if (dst_image) {
- struct st_texture_image *dst = st_texture_image(dst_image);
- dst_res = dst->pt;
- dst_level = dst_image->Level;
- }
- else {
- struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
- dst_res = dst->texture;
- dst_level = 0;
- }
-
- u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box);
- pipe->resource_copy_region(pipe, dst_res, dst_level,
- dst_x, dst_y, dst_z,
- src_res, src_level,
- &box);
-}
-
-
void
st_init_texture_functions(struct dd_function_table *functions)
{
@@ -1953,6 +1904,4 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->AllocTextureStorage = st_AllocTextureStorage;
functions->TextureView = st_TextureView;
-
- functions->CopyImageSubData = st_CopyImageSubData;
}
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 5abb17385c2..6e20fd1fda2 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -44,6 +44,7 @@
#include "st_cb_bufferobjects.h"
#include "st_cb_clear.h"
#include "st_cb_condrender.h"
+#include "st_cb_copyimage.h"
#include "st_cb_drawpixels.h"
#include "st_cb_rasterpos.h"
#include "st_cb_drawtex.h"
@@ -430,6 +431,7 @@ void st_init_driver_functions(struct pipe_screen *screen,
st_init_bufferobject_functions(functions);
st_init_clear_functions(functions);
st_init_bitmap_functions(functions);
+ st_init_copy_image_functions(functions);
st_init_drawpixels_functions(functions);
st_init_rasterpos_functions(functions);
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index d4724b46e0a..bd7cbccc20c 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -439,6 +439,7 @@ void st_init_extensions(struct pipe_screen *screen,
{ o(ARB_base_instance), PIPE_CAP_START_INSTANCE },
{ o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT },
{ o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED },
+ { o(ARB_copy_image), PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS },
{ o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE },
{ o(ARB_depth_texture), PIPE_CAP_TEXTURE_SHADOW_MAP },
{ o(ARB_draw_buffers_blend), PIPE_CAP_INDEP_BLEND_FUNC },
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index 26e1c21f6c5..b3700406df0 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx,
numLevels = texObj->BaseLevel + baseImage->MaxNumLevels;
numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1);
+ if (texObj->Immutable)
+ numLevels = MIN2(numLevels, texObj->NumLevels);
assert(numLevels >= 1);
return numLevels;
@@ -99,38 +101,40 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
*/
stObj->lastLevel = lastLevel;
- if (pt->last_level < lastLevel) {
- /* The current gallium texture doesn't have space for all the
- * mipmap levels we need to generate. So allocate a new texture.
- */
- struct pipe_resource *oldTex = stObj->pt;
-
- /* create new texture with space for more levels */
- stObj->pt = st_texture_create(st,
- oldTex->target,
- oldTex->format,
- lastLevel,
- oldTex->width0,
- oldTex->height0,
- oldTex->depth0,
- oldTex->array_size,
- 0,
- oldTex->bind);
-
- /* This will copy the old texture's base image into the new texture
- * which we just allocated.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
-
- /* release the old tex (will likely be freed too) */
- pipe_resource_reference(&oldTex, NULL);
- st_texture_release_all_sampler_views(st, stObj);
- }
- else {
- /* Make sure that the base texture image data is present in the
- * texture buffer.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
+ if (!texObj->Immutable) {
+ if (pt->last_level < lastLevel) {
+ /* The current gallium texture doesn't have space for all the
+ * mipmap levels we need to generate. So allocate a new texture.
+ */
+ struct pipe_resource *oldTex = stObj->pt;
+
+ /* create new texture with space for more levels */
+ stObj->pt = st_texture_create(st,
+ oldTex->target,
+ oldTex->format,
+ lastLevel,
+ oldTex->width0,
+ oldTex->height0,
+ oldTex->depth0,
+ oldTex->array_size,
+ 0,
+ oldTex->bind);
+
+ /* This will copy the old texture's base image into the new texture
+ * which we just allocated.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+
+ /* release the old tex (will likely be freed too) */
+ pipe_resource_reference(&oldTex, NULL);
+ st_texture_release_all_sampler_views(st, stObj);
+ }
+ else {
+ /* Make sure that the base texture image data is present in the
+ * texture buffer.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+ }
}
pt = stObj->pt;
diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
deleted file mode 100644
index 4bd9ac8539e..00000000000
--- a/src/mesa/tnl/t_rasterpos.c
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "c99_math.h"
-#include "main/glheader.h"
-#include "main/feedback.h"
-#include "main/light.h"
-#include "main/macros.h"
-#include "util/simple_list.h"
-#include "main/mtypes.h"
-#include "main/viewport.h"
-
-#include "math/m_matrix.h"
-#include "tnl/tnl.h"
-
-
-
-/**
- * Clip a point against the view volume.
- *
- * \param v vertex vector describing the point to clip.
- *
- * \return zero if outside view volume, or one if inside.
- */
-static GLuint
-viewclip_point_xy( const GLfloat v[] )
-{
- if ( v[0] > v[3] || v[0] < -v[3]
- || v[1] > v[3] || v[1] < -v[3] ) {
- return 0;
- }
- else {
- return 1;
- }
-}
-
-
-/**
- * Clip a point against the far/near Z clipping planes.
- *
- * \param v vertex vector describing the point to clip.
- *
- * \return zero if outside view volume, or one if inside.
- */
-static GLuint
-viewclip_point_z( const GLfloat v[] )
-{
- if (v[2] > v[3] || v[2] < -v[3] ) {
- return 0;
- }
- else {
- return 1;
- }
-}
-
-
-/**
- * Clip a point against the user clipping planes.
- *
- * \param ctx GL context.
- * \param v vertex vector describing the point to clip.
- *
- * \return zero if the point was clipped, or one otherwise.
- */
-static GLuint
-userclip_point( struct gl_context *ctx, const GLfloat v[] )
-{
- GLuint p;
-
- for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
- if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
- GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0]
- + v[1] * ctx->Transform._ClipUserPlane[p][1]
- + v[2] * ctx->Transform._ClipUserPlane[p][2]
- + v[3] * ctx->Transform._ClipUserPlane[p][3];
- if (dot < 0.0F) {
- return 0;
- }
- }
- }
-
- return 1;
-}
-
-
-/**
- * Compute lighting for the raster position. RGB modes computed.
- * \param ctx the context
- * \param vertex vertex location
- * \param normal normal vector
- * \param Rcolor returned color
- * \param Rspec returned specular color (if separate specular enabled)
- */
-static void
-shade_rastpos(struct gl_context *ctx,
- const GLfloat vertex[4],
- const GLfloat normal[3],
- GLfloat Rcolor[4],
- GLfloat Rspec[4])
-{
- /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor;
- const struct gl_light *light;
- GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */
-
- COPY_3V(diffuseColor, base[0]);
- diffuseColor[3] = CLAMP(
- ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F );
- ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0);
-
- foreach (light, &ctx->Light.EnabledList) {
- GLfloat attenuation = 1.0;
- GLfloat VP[3]; /* vector from vertex to light pos */
- GLfloat n_dot_VP;
- GLfloat diffuseContrib[3], specularContrib[3];
-
- if (!(light->_Flags & LIGHT_POSITIONAL)) {
- /* light at infinity */
- COPY_3V(VP, light->_VP_inf_norm);
- attenuation = light->_VP_inf_spot_attenuation;
- }
- else {
- /* local/positional light */
- GLfloat d;
-
- /* VP = vector from vertex pos to light[i].pos */
- SUB_3V(VP, light->_Position, vertex);
- /* d = length(VP) */
- d = (GLfloat) LEN_3FV( VP );
- if (d > 1.0e-6F) {
- /* normalize VP */
- GLfloat invd = 1.0F / d;
- SELF_SCALE_SCALAR_3V(VP, invd);
- }
-
- /* atti */
- attenuation = 1.0F / (light->ConstantAttenuation + d *
- (light->LinearAttenuation + d *
- light->QuadraticAttenuation));
-
- if (light->_Flags & LIGHT_SPOT) {
- GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection);
-
- if (PV_dot_dir<light->_CosCutoff) {
- continue;
- }
- else {
- GLfloat spot = powf(PV_dot_dir, light->SpotExponent);
- attenuation *= spot;
- }
- }
- }
-
- if (attenuation < 1e-3F)
- continue;
-
- n_dot_VP = DOT3( normal, VP );
-
- if (n_dot_VP < 0.0F) {
- ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]);
- continue;
- }
-
- /* Ambient + diffuse */
- COPY_3V(diffuseContrib, light->_MatAmbient[0]);
- ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]);
-
- /* Specular */
- {
- const GLfloat *h;
- GLfloat n_dot_h;
-
- ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0);
-
- if (ctx->Light.Model.LocalViewer) {
- GLfloat v[3];
- COPY_3V(v, vertex);
- NORMALIZE_3FV(v);
- SUB_3V(VP, VP, v);
- NORMALIZE_3FV(VP);
- h = VP;
- }
- else if (light->_Flags & LIGHT_POSITIONAL) {
- ACC_3V(VP, ctx->_EyeZDir);
- NORMALIZE_3FV(VP);
- h = VP;
- }
- else {
- h = light->_h_inf_norm;
- }
-
- n_dot_h = DOT3(normal, h);
-
- if (n_dot_h > 0.0F) {
- GLfloat shine;
- GLfloat spec_coef;
-
- shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0];
- spec_coef = powf(n_dot_h, shine);
-
- if (spec_coef > 1.0e-10F) {
- if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) {
- ACC_SCALE_SCALAR_3V( specularContrib, spec_coef,
- light->_MatSpecular[0]);
- }
- else {
- ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef,
- light->_MatSpecular[0]);
- }
- }
- }
- }
-
- ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib );
- ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib );
- }
-
- Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F);
- Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F);
- Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F);
- Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F);
- Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F);
- Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F);
- Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F);
- Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F);
-}
-
-
-/**
- * Do texgen needed for glRasterPos.
- * \param ctx rendering context
- * \param vObj object-space vertex coordinate
- * \param vEye eye-space vertex coordinate
- * \param normal vertex normal
- * \param unit texture unit number
- * \param texcoord incoming texcoord and resulting texcoord
- */
-static void
-compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4],
- const GLfloat normal[3], GLuint unit, GLfloat texcoord[4])
-{
- const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-
- /* always compute sphere map terms, just in case */
- GLfloat u[3], two_nu, rx, ry, rz, m, mInv;
- COPY_3V(u, vEye);
- NORMALIZE_3FV(u);
- two_nu = 2.0F * DOT3(normal, u);
- rx = u[0] - normal[0] * two_nu;
- ry = u[1] - normal[1] * two_nu;
- rz = u[2] - normal[2] * two_nu;
- m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F);
- if (m > 0.0F)
- mInv = 0.5F * (1.0f / sqrtf(m));
- else
- mInv = 0.0F;
-
- if (texUnit->TexGenEnabled & S_BIT) {
- switch (texUnit->GenS.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane);
- break;
- case GL_SPHERE_MAP:
- texcoord[0] = rx * mInv + 0.5F;
- break;
- case GL_REFLECTION_MAP:
- texcoord[0] = rx;
- break;
- case GL_NORMAL_MAP:
- texcoord[0] = normal[0];
- break;
- default:
- _mesa_problem(ctx, "Bad S texgen in compute_texgen()");
- return;
- }
- }
-
- if (texUnit->TexGenEnabled & T_BIT) {
- switch (texUnit->GenT.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane);
- break;
- case GL_SPHERE_MAP:
- texcoord[1] = ry * mInv + 0.5F;
- break;
- case GL_REFLECTION_MAP:
- texcoord[1] = ry;
- break;
- case GL_NORMAL_MAP:
- texcoord[1] = normal[1];
- break;
- default:
- _mesa_problem(ctx, "Bad T texgen in compute_texgen()");
- return;
- }
- }
-
- if (texUnit->TexGenEnabled & R_BIT) {
- switch (texUnit->GenR.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane);
- break;
- case GL_REFLECTION_MAP:
- texcoord[2] = rz;
- break;
- case GL_NORMAL_MAP:
- texcoord[2] = normal[2];
- break;
- default:
- _mesa_problem(ctx, "Bad R texgen in compute_texgen()");
- return;
- }
- }
-
- if (texUnit->TexGenEnabled & Q_BIT) {
- switch (texUnit->GenQ.Mode) {
- case GL_OBJECT_LINEAR:
- texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane);
- break;
- case GL_EYE_LINEAR:
- texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane);
- break;
- default:
- _mesa_problem(ctx, "Bad Q texgen in compute_texgen()");
- return;
- }
- }
-}
-
-
-/**
- * glRasterPos transformation. Typically called via ctx->Driver.RasterPos().
- * XXX some of this code (such as viewport xform, clip testing and setting
- * of ctx->Current.Raster* fields) could get lifted up into the
- * main/rasterpos.c code.
- *
- * \param vObj vertex position in object space
- */
-void
-_tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4])
-{
- if (ctx->VertexProgram._Enabled) {
- /* XXX implement this */
- _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos");
- return;
- }
- else {
- GLfloat eye[4], clip[4], ndc[3], d;
- GLfloat *norm, eyenorm[3];
- GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
- float scale[3], translate[3];
-
- /* apply modelview matrix: eye = MV * obj */
- TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj );
- /* apply projection matrix: clip = Proj * eye */
- TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye );
-
- /* clip to view volume. */
- if (!ctx->Transform.DepthClamp) {
- if (viewclip_point_z(clip) == 0) {
- ctx->Current.RasterPosValid = GL_FALSE;
- return;
- }
- }
- if (!ctx->Transform.RasterPositionUnclipped) {
- if (viewclip_point_xy(clip) == 0) {
- ctx->Current.RasterPosValid = GL_FALSE;
- return;
- }
- }
-
- /* clip to user clipping planes */
- if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) {
- ctx->Current.RasterPosValid = GL_FALSE;
- return;
- }
-
- /* ndc = clip / W */
- d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3];
- ndc[0] = clip[0] * d;
- ndc[1] = clip[1] * d;
- ndc[2] = clip[2] * d;
- /* wincoord = viewport_mapping(ndc) */
- _mesa_get_viewport_xform(ctx, 0, scale, translate);
- ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0];
- ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1];
- ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2];
- ctx->Current.RasterPos[3] = clip[3];
-
- if (ctx->Transform.DepthClamp) {
- ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3],
- ctx->ViewportArray[0].Near,
- ctx->ViewportArray[0].Far);
- }
-
- /* compute raster distance */
- if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
- ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0];
- else
- ctx->Current.RasterDistance =
- sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] );
-
- /* compute transformed normal vector (for lighting or texgen) */
- if (ctx->_NeedEyeCoords) {
- const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv;
- TRANSFORM_NORMAL( eyenorm, objnorm, inv );
- norm = eyenorm;
- }
- else {
- norm = objnorm;
- }
-
- /* update raster color */
- if (ctx->Light.Enabled) {
- /* lighting */
- shade_rastpos( ctx, vObj, norm,
- ctx->Current.RasterColor,
- ctx->Current.RasterSecondaryColor );
- }
- else {
- /* use current color */
- COPY_4FV(ctx->Current.RasterColor,
- ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
- COPY_4FV(ctx->Current.RasterSecondaryColor,
- ctx->Current.Attrib[VERT_ATTRIB_COLOR1]);
- }
-
- /* texture coords */
- {
- GLuint u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- GLfloat tc[4];
- COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]);
- if (ctx->Texture.Unit[u].TexGenEnabled) {
- compute_texgen(ctx, vObj, eye, norm, u, tc);
- }
- TRANSFORM_POINT(ctx->Current.RasterTexCoords[u],
- ctx->TextureMatrixStack[u].Top->m, tc);
- }
- }
-
- ctx->Current.RasterPosValid = GL_TRUE;
- }
-
- if (ctx->RenderMode == GL_SELECT) {
- _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] );
- }
-}
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h
index e6b9d890d5f..6293a8b9edc 100644
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -207,7 +207,8 @@ vbo_compute_max_verts(const struct vbo_exec_context *exec)
{
unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
(exec->vtx.vertex_size * sizeof(GLfloat));
- assert(n > 0);
+ if (n == 0)
+ return 0;
/* Subtract one so we're always sure to have room for an extra
* vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion.
*/
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index a23d5aa08aa..a614b26cae4 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -132,8 +132,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
static void
vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
{
- fi_type *data = exec->vtx.copied.buffer;
- GLuint i;
+ unsigned numComponents;
/* Run pipeline on current vertices, copy wrapped vertices
* to exec->vtx.copied.
@@ -149,13 +148,12 @@ vbo_exec_vtx_wrap(struct vbo_exec_context *exec)
*/
assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr);
- for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
- memcpy( exec->vtx.buffer_ptr, data,
- exec->vtx.vertex_size * sizeof(GLfloat));
- exec->vtx.buffer_ptr += exec->vtx.vertex_size;
- data += exec->vtx.vertex_size;
- exec->vtx.vert_count++;
- }
+ numComponents = exec->vtx.copied.nr * exec->vtx.vertex_size;
+ memcpy(exec->vtx.buffer_ptr,
+ exec->vtx.copied.buffer,
+ numComponents * sizeof(fi_type));
+ exec->vtx.buffer_ptr += numComponents;
+ exec->vtx.vert_count += exec->vtx.copied.nr;
exec->vtx.copied.nr = 0;
}
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 34d2c1d3d6b..e27fdd90532 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -1807,13 +1807,20 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx,
SET_EvalMesh2(exec, vbo_exec_EvalMesh2);
}
- if (_mesa_is_desktop_gl(ctx)) {
+ if (ctx->API != API_OPENGLES &&
+ ctx->Extensions.ARB_draw_elements_base_vertex) {
SET_DrawElementsBaseVertex(exec, vbo_exec_DrawElementsBaseVertex);
- SET_DrawRangeElementsBaseVertex(exec, vbo_exec_DrawRangeElementsBaseVertex);
SET_MultiDrawElementsBaseVertex(exec, vbo_exec_MultiDrawElementsBaseVertex);
+
+ if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) {
+ SET_DrawRangeElementsBaseVertex(exec, vbo_exec_DrawRangeElementsBaseVertex);
+ SET_DrawElementsInstancedBaseVertex(exec, vbo_exec_DrawElementsInstancedBaseVertex);
+ }
+ }
+
+ if (_mesa_is_desktop_gl(ctx)) {
SET_DrawArraysInstancedBaseInstance(exec, vbo_exec_DrawArraysInstancedBaseInstance);
SET_DrawElementsInstancedBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseInstance);
- SET_DrawElementsInstancedBaseVertex(exec, vbo_exec_DrawElementsInstancedBaseVertex);
SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance);
}
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index d49aa15b1b7..97a1dfdeb3f 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -601,8 +601,7 @@ static void
_save_wrap_filled_vertex(struct gl_context *ctx)
{
struct vbo_save_context *save = &vbo_context(ctx)->save;
- fi_type *data = save->copied.buffer;
- GLuint i;
+ unsigned numComponents;
/* Emit a glEnd to close off the last vertex list.
*/
@@ -612,12 +611,12 @@ _save_wrap_filled_vertex(struct gl_context *ctx)
*/
assert(save->max_vert - save->vert_count > save->copied.nr);
- for (i = 0; i < save->copied.nr; i++) {
- memcpy(save->buffer_ptr, data, save->vertex_size * sizeof(GLfloat));
- data += save->vertex_size;
- save->buffer_ptr += save->vertex_size;
- save->vert_count++;
- }
+ numComponents = save->copied.nr * save->vertex_size;
+ memcpy(save->buffer_ptr,
+ save->copied.buffer,
+ numComponents * sizeof(fi_type));
+ save->buffer_ptr += numComponents;
+ save->vert_count += save->copied.nr;
}