diff options
Diffstat (limited to 'src/mesa/drivers/dri')
62 files changed, 3674 insertions, 3039 deletions
diff --git a/src/mesa/drivers/dri/Makefile.defines b/src/mesa/drivers/dri/Makefile.defines new file mode 100644 index 00000000000..19b6de8b85a --- /dev/null +++ b/src/mesa/drivers/dri/Makefile.defines @@ -0,0 +1,34 @@ +# -*-makefile-*- + +COMMON_GALLIUM_SOURCES = \ + ../common/utils.c \ + ../common/vblank.c \ + ../common/dri_util.c \ + ../common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + ../../common/driverfuncs.c \ + ../common/texmem.c \ + ../common/drirenderbuffer.c + +INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CXX_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + + +### Include directories +SHARED_INCLUDES = \ + -I. \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -Iserver \ + -I$(TOP)/include \ + -I$(TOP)/src/mapi \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/egl/drivers/dri \ + $(LIBDRM_CFLAGS) + +INCLUDES += $(API_DEFINES) +CXXFLAGS += $(API_DEFINES) diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.targets index d1a119379ed..436b2a3c477 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.targets @@ -1,38 +1,5 @@ # -*-makefile-*- -COMMON_GALLIUM_SOURCES = \ - ../common/utils.c \ - ../common/vblank.c \ - ../common/dri_util.c \ - ../common/xmlconfig.c - -COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ - ../../common/driverfuncs.c \ - ../common/texmem.c \ - ../common/drirenderbuffer.c - -INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CXX_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - - -### Include directories -SHARED_INCLUDES = \ - -I. \ - -I$(TOP)/src/mesa/drivers/dri/common \ - -Iserver \ - -I$(TOP)/include \ - -I$(TOP)/src/mapi \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/egl/main \ - -I$(TOP)/src/egl/drivers/dri \ - $(LIBDRM_CFLAGS) - -CFLAGS += $(API_DEFINES) -CXXFLAGS += $(API_DEFINES) - ##### RULES ##### .c.o: @@ -45,7 +12,7 @@ CXXFLAGS += $(API_DEFINES) $(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@ -##### TARGETS ##### +#### TARGETS ##### default: subdirs lib @@ -55,7 +22,7 @@ lib: symlinks subdirs depend @$(MAKE) $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) $(LIBNAME): $(OBJECTS) $(EXTRA_MODULES) $(MESA_MODULES) Makefile \ - $(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o + $(TOP)/src/mesa/drivers/dri/Makefile.targets $(TOP)/src/mesa/drivers/dri/common/dri_test.o $(MKLIB) -o [email protected] -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ $(OBJECTS) $(EXTRA_MODULES) $(DRI_LIB_DEPS) $(CXX) $(CFLAGS) -o [email protected] $(TOP)/src/mesa/drivers/dri/common/dri_test.o [email protected] $(DRI_LIB_DEPS) diff --git a/src/mesa/drivers/dri/i810/Makefile b/src/mesa/drivers/dri/i810/Makefile index 54a837d5ea9..edc6dd21732 100644 --- a/src/mesa/drivers/dri/i810/Makefile +++ b/src/mesa/drivers/dri/i810/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = i810_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ i810context.c \ i810ioctl.c \ @@ -24,5 +26,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 65fd658c047..79e03f2f1d1 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -4,6 +4,8 @@ include $(TOP)/configs/current LIBNAME = i915_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ i830_context.c \ i830_state.c \ @@ -63,7 +65,7 @@ DRIVER_DEFINES = -I../intel -DI915 \ INCLUDES += $(INTEL_CFLAGS) DRI_LIB_DEPS += $(INTEL_LIBS) -include ../Makefile.template +include ../Makefile.targets intel_decode.o: ../intel/intel_decode.c diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 89650b618e4..820feba04ba 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -687,6 +687,14 @@ i915_assert_not_dirty( struct intel_context *intel ) (void) dirty; } +/** Return false; i915 does not support HiZ. */ +static bool +i915_is_hiz_depth_format(struct intel_context *intel, + gl_format format) +{ + return false; +} + void i915InitVtbl(struct i915_context *i915) { @@ -702,4 +710,5 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.finish_batch = intel_finish_vb; i915->intel.vtbl.render_target_supported = i915_render_target_supported; + i915->intel.vtbl.is_hiz_depth_format = i915_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index b96f42bfe88..44f28cd9d15 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -4,6 +4,8 @@ include $(TOP)/configs/current LIBNAME = i965_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_blit.c \ @@ -116,10 +118,13 @@ C_SOURCES = \ CXX_SOURCES = \ brw_cubemap_normalize.cpp \ brw_fs.cpp \ + brw_fs_emit.cpp \ + brw_fs_visitor.cpp \ brw_fs_channel_expressions.cpp \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ - brw_fs_vector_splitting.cpp + brw_fs_vector_splitting.cpp \ + brw_shader.cpp ASM_SOURCES = @@ -128,7 +133,7 @@ DRIVER_DEFINES = -I../intel INCLUDES += $(INTEL_CFLAGS) DRI_LIB_DEPS += $(INTEL_LIBS) -include ../Makefile.template +include ../Makefile.targets intel_decode.o: ../intel/intel_decode.c intel_tex_layout.o: ../intel/intel_tex_layout.c diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index df753abed02..0256ab9061f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -94,9 +94,9 @@ GLboolean brwCreateContext( int api, ctx->Const.MaxVertexTextureImageUnits + ctx->Const.MaxTextureImageUnits; - /* Mesa limits textures to 4kx4k; it would be nice to fix that someday - */ - ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxTextureLevels = 14; /* 8192 */ + if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS) + ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = (1<<12); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1d2ef066db2..621b6f8990b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -231,8 +231,8 @@ struct brw_wm_prog_data { GLuint first_curbe_grf; GLuint first_curbe_grf_16; - GLuint total_grf; - GLuint total_grf_16; + GLuint reg_blocks; + GLuint reg_blocks_16; GLuint total_scratch; GLuint nr_params; /**< number of float params/constants */ @@ -863,6 +863,17 @@ float convert_param(enum param_conversion conversion, float param) } } +/** + * Pre-gen6, the register file of the EUs was shared between threads, + * and each thread used some subset allocated on a 16-register block + * granularity. The unit states wanted these block counts. + */ +static inline int +brw_register_blocks(int reg_count) +{ + return ALIGN(reg_count, 16) / 16 - 1; +} + GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); #endif diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 5eb7892bb08..6d41b1e69d3 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -28,7 +28,10 @@ * Authors: * Keith Whitwell <[email protected]> */ - + +#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low)) +#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK) +#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) #ifndef BRW_DEFINES_H #define BRW_DEFINES_H @@ -243,8 +246,17 @@ #define BRW_STENCILOP_DECR 6 #define BRW_STENCILOP_INVERT 7 +/* Surface state DW0 */ +#define BRW_SURFACE_RC_READ_WRITE (1 << 8) +#define BRW_SURFACE_MIPLAYOUT_SHIFT 10 #define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 #define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 +#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f +#define BRW_SURFACE_BLEND_ENABLED (1 << 13) +#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14 +#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15 +#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16 +#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17 #define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 #define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 @@ -403,10 +415,14 @@ #define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D #define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E #define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define BRW_SURFACE_FORMAT_SHIFT 18 +#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) #define BRW_SURFACERETURNFORMAT_FLOAT32 0 #define BRW_SURFACERETURNFORMAT_S1 1 +#define BRW_SURFACE_TYPE_SHIFT 29 +#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29) #define BRW_SURFACE_1D 0 #define BRW_SURFACE_2D 1 #define BRW_SURFACE_3D 2 @@ -414,6 +430,32 @@ #define BRW_SURFACE_BUFFER 4 #define BRW_SURFACE_NULL 7 +/* Surface state DW2 */ +#define BRW_SURFACE_HEIGHT_SHIFT 19 +#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) +#define BRW_SURFACE_WIDTH_SHIFT 6 +#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6) +#define BRW_SURFACE_LOD_SHIFT 2 +#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2) + +/* Surface state DW3 */ +#define BRW_SURFACE_DEPTH_SHIFT 21 +#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21) +#define BRW_SURFACE_PITCH_SHIFT 3 +#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) +#define BRW_SURFACE_TILED (1 << 1) +#define BRW_SURFACE_TILED_Y (1 << 0) + +/* Surface state DW4 */ +#define BRW_SURFACE_MIN_LOD_SHIFT 28 +#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) + +/* Surface state DW5 */ +#define BRW_SURFACE_X_OFFSET_SHIFT 25 +#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) +#define BRW_SURFACE_Y_OFFSET_SHIFT 20 +#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) + #define BRW_TEXCOORDMODE_WRAP 0 #define BRW_TEXCOORDMODE_MIRROR 1 #define BRW_TEXCOORDMODE_CLAMP 2 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 2d41302d15a..e7370f36064 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -93,8 +93,6 @@ static void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, struct brw_reg dest) { - struct intel_context *intel = &p->brw->intel; - if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) assert(dest.nr < 128); @@ -1254,7 +1252,6 @@ struct brw_instruction *gen6_CONT(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; - int br = 2; insn = next_insn(p, BRW_OPCODE_CONTINUE); brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6d545afab64..09033aecd7c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -19,10 +19,13 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. + */ + +/** @file brw_fs.cpp * - * Authors: - * Eric Anholt <[email protected]> - * + * This file drives the GLSL IR -> LIR translation, contains the + * optimizations on the LIR, and drives the generation of native code + * from the LIR. */ extern "C" { @@ -34,7 +37,6 @@ extern "C" { #include "main/uniforms.h" #include "program/prog_parameter.h" #include "program/prog_print.h" -#include "program/prog_optimize.h" #include "program/register_allocate.h" #include "program/sampler.h" #include "program/hash_table.h" @@ -42,113 +44,15 @@ extern "C" { #include "brw_eu.h" #include "brw_wm.h" } +#include "brw_shader.h" #include "brw_fs.h" #include "../glsl/glsl_types.h" -#include "../glsl/ir_optimization.h" #include "../glsl/ir_print_visitor.h" #define MAX_INSTRUCTION (1 << 30) -static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); - -struct gl_shader * -brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) -{ - struct brw_shader *shader; - - shader = rzalloc(NULL, struct brw_shader); - if (shader) { - shader->base.Type = type; - shader->base.Name = name; - _mesa_init_shader(ctx, &shader->base); - } - - return &shader->base; -} - -struct gl_shader_program * -brw_new_shader_program(struct gl_context *ctx, GLuint name) -{ - struct brw_shader_program *prog; - prog = rzalloc(NULL, struct brw_shader_program); - if (prog) { - prog->base.Name = name; - _mesa_init_shader_program(ctx, &prog->base); - } - return &prog->base; -} - -GLboolean -brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; - - struct brw_shader *shader = - (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (shader != NULL) { - void *mem_ctx = ralloc_context(NULL); - bool progress; - - if (shader->ir) - ralloc_free(shader->ir); - shader->ir = new(shader) exec_list; - clone_ir_list(mem_ctx, shader->ir, shader->base.ir); - - do_mat_op_to_vec(shader->ir); - lower_instructions(shader->ir, - MOD_TO_FRACT | - DIV_TO_MUL_RCP | - SUB_TO_ADD_NEG | - EXP_TO_EXP2 | - LOG_TO_LOG2); - - /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, - * if-statements need to be flattened. - */ - if (intel->gen < 6) - lower_if_to_cond_assign(shader->ir, 16); - - do_lower_texture_projection(shader->ir); - do_vec_index_to_cond_assign(shader->ir); - brw_do_cubemap_normalize(shader->ir); - lower_noise(shader->ir); - lower_quadop_vector(shader->ir, false); - lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ); - - do { - progress = false; - - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); - - progress = do_lower_jumps(shader->ir, true, true, - true, /* main return */ - false, /* continue */ - false /* loops */ - ) || progress; - - progress = do_common_optimization(shader->ir, true, 32) || progress; - } while (progress); - - validate_ir_tree(shader->ir); - reparent_ir(shader->ir, shader->ir); - ralloc_free(mem_ctx); - } - - if (!_mesa_ir_link_shader(ctx, prog)) - return GL_FALSE; - - return GL_TRUE; -} - -static int -type_size(const struct glsl_type *type) +int +fs_visitor::type_size(const struct glsl_type *type) { unsigned int size, i; @@ -180,17 +84,23 @@ type_size(const struct glsl_type *type) void fs_visitor::fail(const char *format, ...) { - if (!failed) { - failed = true; + va_list va; + char *msg; - if (INTEL_DEBUG & DEBUG_WM) { - fprintf(stderr, "FS compile failed: "); + if (failed) + return; - va_list va; - va_start(va, format); - vfprintf(stderr, format, va); - va_end(va); - } + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_WM) { + fprintf(stderr, msg); } } @@ -297,38 +207,13 @@ fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) this->type = type; } -int -brw_type_for_base_type(const struct glsl_type *type) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - return BRW_REGISTER_TYPE_F; - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - return BRW_REGISTER_TYPE_D; - case GLSL_TYPE_UINT: - return BRW_REGISTER_TYPE_UD; - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_SAMPLER: - /* These should be overridden with the type of the member when - * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely - * way to trip up if we don't. - */ - return BRW_REGISTER_TYPE_UD; - default: - assert(!"not reached"); - return BRW_REGISTER_TYPE_F; - } -} - /** Automatic reg constructor. */ fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type) { init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type)); + this->reg = v->virtual_grf_alloc(v->type_size(type)); this->reg_offset = 0; this->type = brw_type_for_base_type(type); } @@ -584,7 +469,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) attr.reg_offset++; } - if (intel->gen < 6 && !(is_gl_Color && c->key.linear_color)) { + if (intel->gen < 6) { attr.reg_offset -= type->vector_elements; for (unsigned int k = 0; k < type->vector_elements; k++) { emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); @@ -706,2134 +591,6 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) return inst; } -void -fs_visitor::visit(ir_variable *ir) -{ - fs_reg *reg = NULL; - - if (variable_storage(ir)) - return; - - if (strcmp(ir->name, "gl_FragColor") == 0) { - this->frag_color = ir; - } else if (strcmp(ir->name, "gl_FragData") == 0) { - this->frag_data = ir; - } else if (strcmp(ir->name, "gl_FragDepth") == 0) { - this->frag_depth = ir; - } - - if (ir->mode == ir_var_in) { - if (!strcmp(ir->name, "gl_FragCoord")) { - reg = emit_fragcoord_interpolation(ir); - } else if (!strcmp(ir->name, "gl_FrontFacing")) { - reg = emit_frontfacing_interpolation(ir); - } else { - reg = emit_general_interpolation(ir); - } - assert(reg); - hash_table_insert(this->variable_ht, reg, ir); - return; - } - - if (ir->mode == ir_var_uniform) { - int param_index = c->prog_data.nr_params; - - if (c->dispatch_width == 16) { - if (!variable_storage(ir)) { - fail("Failed to find uniform '%s' in 16-wide\n", ir->name); - } - return; - } - - if (!strncmp(ir->name, "gl_", 3)) { - setup_builtin_uniform_values(ir); - } else { - setup_uniform_values(ir->location, ir->type); - } - - reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); - reg->type = brw_type_for_base_type(ir->type); - } - - if (!reg) - reg = new(this->mem_ctx) fs_reg(this, ir->type); - - hash_table_insert(this->variable_ht, reg, ir); -} - -void -fs_visitor::visit(ir_dereference_variable *ir) -{ - fs_reg *reg = variable_storage(ir->var); - this->result = *reg; -} - -void -fs_visitor::visit(ir_dereference_record *ir) -{ - const glsl_type *struct_type = ir->record->type; - - ir->record->accept(this); - - unsigned int offset = 0; - for (unsigned int i = 0; i < struct_type->length; i++) { - if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) - break; - offset += type_size(struct_type->fields.structure[i].type); - } - this->result.reg_offset += offset; - this->result.type = brw_type_for_base_type(ir->type); -} - -void -fs_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *index; - int element_size; - - ir->array->accept(this); - index = ir->array_index->as_constant(); - - element_size = type_size(ir->type); - this->result.type = brw_type_for_base_type(ir->type); - - if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); - this->result.reg_offset += index->value.i[0] * element_size; - } else { - assert(!"FINISHME: non-constant array element"); - } -} - -/* Instruction selection: Produce a MOV.sat instead of - * MIN(MAX(val, 0), 1) when possible. - */ -bool -fs_visitor::try_emit_saturate(ir_expression *ir) -{ - ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); - - if (!sat_val) - return false; - - sat_val->accept(this); - fs_reg src = this->result; - - this->result = fs_reg(this, ir->type); - fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); - inst->saturate = true; - - return true; -} - -static uint32_t -brw_conditional_for_comparison(unsigned int op) -{ - switch (op) { - case ir_binop_less: - return BRW_CONDITIONAL_L; - case ir_binop_greater: - return BRW_CONDITIONAL_G; - case ir_binop_lequal: - return BRW_CONDITIONAL_LE; - case ir_binop_gequal: - return BRW_CONDITIONAL_GE; - case ir_binop_equal: - case ir_binop_all_equal: /* same as equal for scalars */ - return BRW_CONDITIONAL_Z; - case ir_binop_nequal: - case ir_binop_any_nequal: /* same as nequal for scalars */ - return BRW_CONDITIONAL_NZ; - default: - assert(!"not reached: bad operation for comparison"); - return BRW_CONDITIONAL_NZ; - } -} - -void -fs_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - fs_reg op[2], temp; - fs_inst *inst; - - assert(ir->get_num_operands() <= 2); - - if (try_emit_saturate(ir)) - return; - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - ir->operands[operand]->accept(this); - if (this->result.file == BAD_FILE) { - ir_print_visitor v; - fail("Failed to get tree for expression operand:\n"); - ir->operands[operand]->accept(&v); - } - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - /* And then those vector operands should have been broken down to scalar. - */ - assert(!ir->operands[operand]->type->is_vector()); - } - - /* Storage for our result. If our result goes into an assignment, it will - * just get copy-propagated out, so no worries. - */ - this->result = fs_reg(this, ir->type); - - switch (ir->operation) { - case ir_unop_logic_not: - /* Note that BRW_OPCODE_NOT is not appropriate here, since it is - * ones complement of the whole register, not just bit 0. - */ - emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); - break; - case ir_unop_neg: - op[0].negate = !op[0].negate; - this->result = op[0]; - break; - case ir_unop_abs: - op[0].abs = true; - op[0].negate = false; - this->result = op[0]; - break; - case ir_unop_sign: - temp = fs_reg(this, ir->type); - - emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); - - inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_G; - inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); - inst->predicated = true; - - inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_L; - inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); - inst->predicated = true; - - break; - case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); - break; - - case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); - break; - case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); - break; - case ir_unop_exp: - case ir_unop_log: - assert(!"not reached: should be handled by ir_explog_to_explog2"); - break; - case ir_unop_sin: - case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); - break; - case ir_unop_cos: - case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); - break; - - case ir_unop_dFdx: - emit(FS_OPCODE_DDX, this->result, op[0]); - break; - case ir_unop_dFdy: - emit(FS_OPCODE_DDY, this->result, op[0]); - break; - - case ir_binop_add: - emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); - break; - case ir_binop_sub: - assert(!"not reached: should be handled by ir_sub_to_add_neg"); - break; - - case ir_binop_mul: - emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); - break; - case ir_binop_div: - assert(!"not reached: should be handled by ir_div_to_mul_rcp"); - break; - case ir_binop_mod: - assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - temp = this->result; - /* original gen4 does implicit conversion before comparison. */ - if (intel->gen < 5) - temp.type = op[0].type; - - inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); - inst->conditional_mod = brw_conditional_for_comparison(ir->operation); - emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)); - break; - - case ir_binop_logic_xor: - emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); - break; - - case ir_binop_logic_or: - emit(BRW_OPCODE_OR, this->result, op[0], op[1]); - break; - - case ir_binop_logic_and: - emit(BRW_OPCODE_AND, this->result, op[0], op[1]); - break; - - case ir_binop_dot: - case ir_unop_any: - assert(!"not reached: should be handled by brw_fs_channel_expressions"); - break; - - case ir_unop_noise: - assert(!"not reached: should be handled by lower_noise"); - break; - - case ir_quadop_vector: - assert(!"not reached: should be handled by lower_quadop_vector"); - break; - - case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); - break; - - case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); - break; - - case ir_unop_i2f: - case ir_unop_b2f: - case ir_unop_b2i: - case ir_unop_f2i: - emit(BRW_OPCODE_MOV, this->result, op[0]); - break; - case ir_unop_f2b: - case ir_unop_i2b: - temp = this->result; - /* original gen4 does implicit conversion before comparison. */ - if (intel->gen < 5) - temp.type = op[0].type; - - inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); - break; - - case ir_unop_trunc: - emit(BRW_OPCODE_RNDZ, this->result, op[0]); - break; - case ir_unop_ceil: - op[0].negate = !op[0].negate; - inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); - this->result.negate = true; - break; - case ir_unop_floor: - inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); - break; - case ir_unop_fract: - inst = emit(BRW_OPCODE_FRC, this->result, op[0]); - break; - case ir_unop_round_even: - emit(BRW_OPCODE_RNDE, this->result, op[0]); - break; - - case ir_binop_min: - inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_L; - - inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); - inst->predicated = true; - break; - case ir_binop_max: - inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_G; - - inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); - inst->predicated = true; - break; - - case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); - break; - - case ir_unop_bit_not: - inst = emit(BRW_OPCODE_NOT, this->result, op[0]); - break; - case ir_binop_bit_and: - inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); - break; - case ir_binop_bit_xor: - inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); - break; - case ir_binop_bit_or: - inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); - break; - - case ir_unop_u2f: - case ir_binop_lshift: - case ir_binop_rshift: - assert(!"GLSL 1.30 features unsupported"); - break; - } -} - -void -fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, - const glsl_type *type, bool predicated) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - for (unsigned int i = 0; i < type->components(); i++) { - l.type = brw_type_for_base_type(type); - r.type = brw_type_for_base_type(type); - - fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); - inst->predicated = predicated; - - l.reg_offset++; - r.reg_offset++; - } - break; - case GLSL_TYPE_ARRAY: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.array, predicated); - } - break; - - case GLSL_TYPE_STRUCT: - for (unsigned int i = 0; i < type->length; i++) { - emit_assignment_writes(l, r, type->fields.structure[i].type, - predicated); - } - break; - - case GLSL_TYPE_SAMPLER: - break; - - default: - assert(!"not reached"); - break; - } -} - -void -fs_visitor::visit(ir_assignment *ir) -{ - struct fs_reg l, r; - fs_inst *inst; - - /* FINISHME: arrays on the lhs */ - ir->lhs->accept(this); - l = this->result; - - ir->rhs->accept(this); - r = this->result; - - assert(l.file != BAD_FILE); - assert(r.file != BAD_FILE); - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition); - } - - if (ir->lhs->type->is_scalar() || - ir->lhs->type->is_vector()) { - for (int i = 0; i < ir->lhs->type->vector_elements; i++) { - if (ir->write_mask & (1 << i)) { - inst = emit(BRW_OPCODE_MOV, l, r); - if (ir->condition) - inst->predicated = true; - r.reg_offset++; - } - l.reg_offset++; - } - } else { - emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); - } -} - -fs_inst * -fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, - int sampler) -{ - int mlen; - int base_mrf = 1; - bool simd16 = false; - fs_reg orig_dst; - - /* g0 header. */ - mlen = 1; - - if (ir->shadow_comparitor) { - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, - fs_reg(MRF, base_mrf + mlen + i), coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - - coordinate.reg_offset++; - } - /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ - mlen += 3; - - if (ir->op == ir_tex) { - /* There's no plain shadow compare message, so we use shadow - * compare with a bias of 0.0. - */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); - mlen++; - } else if (ir->op == ir_txb) { - ir->lod_info.bias->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; - } else { - assert(ir->op == ir_txl); - ir->lod_info.lod->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; - } - - ir->shadow_comparitor->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; - } else if (ir->op == ir_tex) { - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), - coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - coordinate.reg_offset++; - } - /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ - mlen += 3; - } else if (ir->op == ir_txd) { - assert(!"TXD isn't supported on gen4 yet."); - } else { - /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod - * instructions. We'll need to do SIMD16 here. - */ - assert(ir->op == ir_txb || ir->op == ir_txl); - - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, - base_mrf + mlen + i * 2), - coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - coordinate.reg_offset++; - } - - /* lod/bias appears after u/v/r. */ - mlen += 6; - - if (ir->op == ir_txb) { - ir->lod_info.bias->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; - } else { - ir->lod_info.lod->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen++; - } - - /* The unused upper half. */ - mlen++; - - /* Now, since we're doing simd16, the return is 2 interleaved - * vec4s where the odd-indexed ones are junk. We'll need to move - * this weirdness around to the expected layout. - */ - simd16 = true; - orig_dst = dst; - dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, - 2)); - dst.type = BRW_REGISTER_TYPE_F; - } - - fs_inst *inst = NULL; - switch (ir->op) { - case ir_tex: - inst = emit(FS_OPCODE_TEX, dst); - break; - case ir_txb: - inst = emit(FS_OPCODE_TXB, dst); - break; - case ir_txl: - inst = emit(FS_OPCODE_TXL, dst); - break; - case ir_txd: - inst = emit(FS_OPCODE_TXD, dst); - break; - case ir_txf: - assert(!"GLSL 1.30 features unsupported"); - break; - } - inst->base_mrf = base_mrf; - inst->mlen = mlen; - inst->header_present = true; - - if (simd16) { - for (int i = 0; i < 4; i++) { - emit(BRW_OPCODE_MOV, orig_dst, dst); - orig_dst.reg_offset++; - dst.reg_offset += 2; - } - } - - return inst; -} - -/* gen5's sampler has slots for u, v, r, array index, then optional - * parameters like shadow comparitor or LOD bias. If optional - * parameters aren't present, those base slots are optional and don't - * need to be included in the message. - * - * We don't fill in the unnecessary slots regardless, which may look - * surprising in the disassembly. - */ -fs_inst * -fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, - int sampler) -{ - int mlen = 0; - int base_mrf = 2; - int reg_width = c->dispatch_width / 8; - bool header_present = false; - - if (ir->offset) { - /* The offsets set up by the ir_texture visitor are in the - * m1 header, so we can't go headerless. - */ - header_present = true; - mlen++; - base_mrf--; - } - - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, - fs_reg(MRF, base_mrf + mlen + i * reg_width), - coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - coordinate.reg_offset++; - } - mlen += ir->coordinate->type->vector_elements * reg_width; - - if (ir->shadow_comparitor) { - mlen = MAX2(mlen, header_present + 4 * reg_width); - - ir->shadow_comparitor->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen += reg_width; - } - - fs_inst *inst = NULL; - switch (ir->op) { - case ir_tex: - inst = emit(FS_OPCODE_TEX, dst); - break; - case ir_txb: - ir->lod_info.bias->accept(this); - mlen = MAX2(mlen, header_present + 4 * reg_width); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen += reg_width; - - inst = emit(FS_OPCODE_TXB, dst); - - break; - case ir_txl: - ir->lod_info.lod->accept(this); - mlen = MAX2(mlen, header_present + 4 * reg_width); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen += reg_width; - - inst = emit(FS_OPCODE_TXL, dst); - break; - case ir_txd: - case ir_txf: - assert(!"GLSL 1.30 features unsupported"); - break; - } - inst->base_mrf = base_mrf; - inst->mlen = mlen; - inst->header_present = header_present; - - if (mlen > 11) { - fail("Message length >11 disallowed by hardware\n"); - } - - return inst; -} - -fs_inst * -fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, - int sampler) -{ - int mlen = 0; - int base_mrf = 2; - int reg_width = c->dispatch_width / 8; - bool header_present = false; - - if (ir->offset) { - /* The offsets set up by the ir_texture visitor are in the - * m1 header, so we can't go headerless. - */ - header_present = true; - mlen++; - base_mrf--; - } - - if (ir->shadow_comparitor) { - ir->shadow_comparitor->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen += reg_width; - } - - /* Set up the LOD info */ - switch (ir->op) { - case ir_tex: - break; - case ir_txb: - ir->lod_info.bias->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen += reg_width; - break; - case ir_txl: - ir->lod_info.lod->accept(this); - emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); - mlen += reg_width; - break; - case ir_txd: - case ir_txf: - assert(!"GLSL 1.30 features unsupported"); - break; - } - - /* Set up the coordinate */ - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), - coordinate); - if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) - inst->saturate = true; - coordinate.reg_offset++; - mlen += reg_width; - } - - /* Generate the SEND */ - fs_inst *inst = NULL; - switch (ir->op) { - case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break; - case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; - case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; - case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; - case ir_txf: assert(!"TXF unsupported."); - } - inst->base_mrf = base_mrf; - inst->mlen = mlen; - inst->header_present = header_present; - - if (mlen > 11) { - fail("Message length >11 disallowed by hardware\n"); - } - - return inst; -} - -void -fs_visitor::visit(ir_texture *ir) -{ - int sampler; - fs_inst *inst = NULL; - - ir->coordinate->accept(this); - fs_reg coordinate = this->result; - - if (ir->offset != NULL) { - ir_constant *offset = ir->offset->as_constant(); - assert(offset != NULL); - - signed char offsets[3]; - for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) - offsets[i] = (signed char) offset->value.i[i]; - - /* Combine all three offsets into a single unsigned dword: - * - * bits 11:8 - U Offset (X component) - * bits 7:4 - V Offset (Y component) - * bits 3:0 - R Offset (Z component) - */ - unsigned offset_bits = 0; - for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { - const unsigned shift = 4 * (2 - i); - offset_bits |= (offsets[i] << shift) & (0xF << shift); - } - - /* Explicitly set up the message header by copying g0 to msg reg m1. */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), - fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD)); - - /* Then set the offset bits in DWord 2 of the message header. */ - emit(BRW_OPCODE_MOV, - fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), - BRW_REGISTER_TYPE_UD)), - fs_reg(brw_imm_uw(offset_bits))); - } - - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); - - sampler = _mesa_get_sampler_uniform_value(ir->sampler, - ctx->Shader.CurrentFragmentProgram, - &brw->fragment_program->Base); - sampler = c->fp->program.Base.SamplerUnits[sampler]; - - /* The 965 requires the EU to do the normalization of GL rectangle - * texture coordinates. We use the program parameter state - * tracking to get the scaling factor. - */ - if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { - struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; - int tokens[STATE_LENGTH] = { - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - sampler, - 0, - 0 - }; - - if (c->dispatch_width == 16) { - fail("rectangle scale uniform setup not supported on 16-wide\n"); - this->result = fs_reg(this, ir->type); - return; - } - - c->prog_data.param_convert[c->prog_data.nr_params] = - PARAM_NO_CONVERT; - c->prog_data.param_convert[c->prog_data.nr_params + 1] = - PARAM_NO_CONVERT; - - fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); - fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); - GLuint index = _mesa_add_state_reference(params, - (gl_state_index *)tokens); - - this->param_index[c->prog_data.nr_params] = index; - this->param_offset[c->prog_data.nr_params] = 0; - c->prog_data.nr_params++; - this->param_index[c->prog_data.nr_params] = index; - this->param_offset[c->prog_data.nr_params] = 1; - c->prog_data.nr_params++; - - fs_reg dst = fs_reg(this, ir->coordinate->type); - fs_reg src = coordinate; - coordinate = dst; - - emit(BRW_OPCODE_MUL, dst, src, scale_x); - dst.reg_offset++; - src.reg_offset++; - emit(BRW_OPCODE_MUL, dst, src, scale_y); - } - - /* Writemasking doesn't eliminate channels on SIMD8 texture - * samples, so don't worry about them. - */ - fs_reg dst = fs_reg(this, glsl_type::vec4_type); - - if (intel->gen >= 7) { - inst = emit_texture_gen7(ir, dst, coordinate, sampler); - } else if (intel->gen >= 5) { - inst = emit_texture_gen5(ir, dst, coordinate, sampler); - } else { - inst = emit_texture_gen4(ir, dst, coordinate, sampler); - } - - /* If there's an offset, we already set up m1. To avoid the implied move, - * use the null register. Otherwise, we want an implied move from g0. - */ - if (ir->offset != NULL || !inst->header_present) - inst->src[0] = fs_reg(brw_null_reg()); - else - inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - - inst->sampler = sampler; - - this->result = dst; - - if (ir->shadow_comparitor) - inst->shadow_compare = true; - - if (ir->type == glsl_type::float_type) { - /* Ignore DEPTH_TEXTURE_MODE swizzling. */ - assert(ir->sampler->type->sampler_shadow); - } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { - fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); - - for (int i = 0; i < 4; i++) { - int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); - fs_reg l = swizzle_dst; - l.reg_offset += i; - - if (swiz == SWIZZLE_ZERO) { - emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); - } else if (swiz == SWIZZLE_ONE) { - emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); - } else { - fs_reg r = dst; - r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); - emit(BRW_OPCODE_MOV, l, r); - } - } - this->result = swizzle_dst; - } -} - -void -fs_visitor::visit(ir_swizzle *ir) -{ - ir->val->accept(this); - fs_reg val = this->result; - - if (ir->type->vector_elements == 1) { - this->result.reg_offset += ir->mask.x; - return; - } - - fs_reg result = fs_reg(this, ir->type); - this->result = result; - - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - fs_reg channel = val; - int swiz = 0; - - switch (i) { - case 0: - swiz = ir->mask.x; - break; - case 1: - swiz = ir->mask.y; - break; - case 2: - swiz = ir->mask.z; - break; - case 3: - swiz = ir->mask.w; - break; - } - - channel.reg_offset += swiz; - emit(BRW_OPCODE_MOV, result, channel); - result.reg_offset++; - } -} - -void -fs_visitor::visit(ir_discard *ir) -{ - assert(ir->condition == NULL); /* FINISHME */ - - emit(FS_OPCODE_DISCARD); - kill_emitted = true; -} - -void -fs_visitor::visit(ir_constant *ir) -{ - /* Set this->result to reg at the bottom of the function because some code - * paths will cause this visitor to be applied to other fields. This will - * cause the value stored in this->result to be modified. - * - * Make reg constant so that it doesn't get accidentally modified along the - * way. Yes, I actually had this problem. :( - */ - const fs_reg reg(this, ir->type); - fs_reg dst_reg = reg; - - if (ir->type->is_array()) { - const unsigned size = type_size(ir->type->fields.array); - - for (unsigned i = 0; i < ir->type->length; i++) { - ir->array_elements[i]->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(BRW_OPCODE_MOV, dst_reg, src_reg); - src_reg.reg_offset++; - dst_reg.reg_offset++; - } - } - } else if (ir->type->is_record()) { - foreach_list(node, &ir->components) { - ir_instruction *const field = (ir_instruction *) node; - const unsigned size = type_size(field->type); - - field->accept(this); - fs_reg src_reg = this->result; - - dst_reg.type = src_reg.type; - for (unsigned j = 0; j < size; j++) { - emit(BRW_OPCODE_MOV, dst_reg, src_reg); - src_reg.reg_offset++; - dst_reg.reg_offset++; - } - } - } else { - const unsigned size = type_size(ir->type); - - for (unsigned i = 0; i < size; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); - break; - case GLSL_TYPE_UINT: - emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); - break; - case GLSL_TYPE_INT: - emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); - break; - case GLSL_TYPE_BOOL: - emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); - break; - default: - assert(!"Non-float/uint/int/bool constant"); - } - dst_reg.reg_offset++; - } - } - - this->result = reg; -} - -void -fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) -{ - ir_expression *expr = ir->as_expression(); - - if (expr) { - fs_reg op[2]; - fs_inst *inst; - - assert(expr->get_num_operands() <= 2); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - - case ir_binop_logic_xor: - inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_or: - inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_and: - inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_unop_f2b: - if (intel->gen >= 6) { - inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); - } else { - inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_unop_i2b: - if (intel->gen >= 6) { - inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); - } else { - inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); - inst->conditional_mod = - brw_conditional_for_comparison(expr->operation); - break; - - default: - assert(!"not reached"); - fail("bad cond code\n"); - break; - } - return; - } - - ir->accept(this); - - if (intel->gen >= 6) { - fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } else { - fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } -} - -/** - * Emit a gen6 IF statement with the comparison folded into the IF - * instruction. - */ -void -fs_visitor::emit_if_gen6(ir_if *ir) -{ - ir_expression *expr = ir->condition->as_expression(); - - if (expr) { - fs_reg op[2]; - fs_inst *inst; - fs_reg temp; - - assert(expr->get_num_operands() <= 2); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - assert(expr->operands[i]->type->is_scalar()); - - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_Z; - return; - - case ir_binop_logic_xor: - inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_binop_logic_or: - temp = fs_reg(this, glsl_type::bool_type); - emit(BRW_OPCODE_OR, temp, op[0], op[1]); - inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_binop_logic_and: - temp = fs_reg(this, glsl_type::bool_type); - emit(BRW_OPCODE_AND, temp, op[0], op[1]); - inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_unop_f2b: - inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_unop_i2b: - inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - return; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_all_equal: - case ir_binop_nequal: - case ir_binop_any_nequal: - inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); - inst->conditional_mod = - brw_conditional_for_comparison(expr->operation); - return; - default: - assert(!"not reached"); - inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - fail("bad condition\n"); - return; - } - return; - } - - ir->condition->accept(this); - - fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); - inst->conditional_mod = BRW_CONDITIONAL_NZ; -} - -void -fs_visitor::visit(ir_if *ir) -{ - fs_inst *inst; - - if (intel->gen != 6 && c->dispatch_width == 16) { - fail("Can't support (non-uniform) control flow on 16-wide\n"); - } - - /* Don't point the annotation at the if statement, because then it plus - * the then and else blocks get printed. - */ - this->base_ir = ir->condition; - - if (intel->gen == 6) { - emit_if_gen6(ir); - } else { - emit_bool_to_cond_code(ir->condition); - - inst = emit(BRW_OPCODE_IF); - inst->predicated = true; - } - - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); - this->base_ir = ir; - - ir->accept(this); - } - - if (!ir->else_instructions.is_empty()) { - emit(BRW_OPCODE_ELSE); - - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); - this->base_ir = ir; - - ir->accept(this); - } - } - - emit(BRW_OPCODE_ENDIF); -} - -void -fs_visitor::visit(ir_loop *ir) -{ - fs_reg counter = reg_undef; - - if (c->dispatch_width == 16) { - fail("Can't support (non-uniform) control flow on 16-wide\n"); - } - - if (ir->counter) { - this->base_ir = ir->counter; - ir->counter->accept(this); - counter = *(variable_storage(ir->counter)); - - if (ir->from) { - this->base_ir = ir->from; - ir->from->accept(this); - - emit(BRW_OPCODE_MOV, counter, this->result); - } - } - - emit(BRW_OPCODE_DO); - - if (ir->to) { - this->base_ir = ir->to; - ir->to->accept(this); - - fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); - inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); - - inst = emit(BRW_OPCODE_BREAK); - inst->predicated = true; - } - - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); - - this->base_ir = ir; - ir->accept(this); - } - - if (ir->increment) { - this->base_ir = ir->increment; - ir->increment->accept(this); - emit(BRW_OPCODE_ADD, counter, counter, this->result); - } - - emit(BRW_OPCODE_WHILE); -} - -void -fs_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(BRW_OPCODE_BREAK); - break; - case ir_loop_jump::jump_continue: - emit(BRW_OPCODE_CONTINUE); - break; - } -} - -void -fs_visitor::visit(ir_call *ir) -{ - assert(!"FINISHME"); -} - -void -fs_visitor::visit(ir_return *ir) -{ - assert(!"FINISHME"); -} - -void -fs_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined before we get to ir_to_mesa. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(&empty); - - assert(sig); - - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); - this->base_ir = ir; - - ir->accept(this); - } - } -} - -void -fs_visitor::visit(ir_function_signature *ir) -{ - assert(!"not reached"); - (void)ir; -} - -fs_inst * -fs_visitor::emit(fs_inst inst) -{ - fs_inst *list_inst = new(mem_ctx) fs_inst; - *list_inst = inst; - - if (force_uncompressed_stack > 0) - list_inst->force_uncompressed = true; - else if (force_sechalf_stack > 0) - list_inst->force_sechalf = true; - - list_inst->annotation = this->current_annotation; - list_inst->ir = this->base_ir; - - this->instructions.push_tail(list_inst); - - return list_inst; -} - -/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ -void -fs_visitor::emit_dummy_fs() -{ - /* Everyone's favorite color. */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f)); - emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f)); - emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f)); - emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f)); - - fs_inst *write; - write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); - write->base_mrf = 0; -} - -/* The register location here is relative to the start of the URB - * data. It will get adjusted to be a real location before - * generate_code() time. - */ -struct brw_reg -fs_visitor::interp_reg(int location, int channel) -{ - int regnr = urb_setup[location] * 2 + channel / 2; - int stride = (channel & 1) * 4; - - assert(urb_setup[location] != -1); - - return brw_vec1_grf(regnr, stride); -} - -/** Emits the interpolation for the varying inputs. */ -void -fs_visitor::emit_interpolation_setup_gen4() -{ - this->current_annotation = "compute pixel centers"; - this->pixel_x = fs_reg(this, glsl_type::uint_type); - this->pixel_y = fs_reg(this, glsl_type::uint_type); - this->pixel_x.type = BRW_REGISTER_TYPE_UW; - this->pixel_y.type = BRW_REGISTER_TYPE_UW; - - emit(FS_OPCODE_PIXEL_X, this->pixel_x); - emit(FS_OPCODE_PIXEL_Y, this->pixel_y); - - this->current_annotation = "compute pixel deltas from v0"; - if (brw->has_pln) { - this->delta_x = fs_reg(this, glsl_type::vec2_type); - this->delta_y = this->delta_x; - this->delta_y.reg_offset++; - } else { - this->delta_x = fs_reg(this, glsl_type::float_type); - this->delta_y = fs_reg(this, glsl_type::float_type); - } - emit(BRW_OPCODE_ADD, this->delta_x, - this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); - emit(BRW_OPCODE_ADD, this->delta_y, - this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); - - this->current_annotation = "compute pos.w and 1/pos.w"; - /* Compute wpos.w. It's always in our setup, since it's needed to - * interpolate the other attributes. - */ - this->wpos_w = fs_reg(this, glsl_type::float_type); - emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 3)); - /* Compute the pixel 1/W value from wpos.w. */ - this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); - this->current_annotation = NULL; -} - -/** Emits the interpolation for the varying inputs. */ -void -fs_visitor::emit_interpolation_setup_gen6() -{ - struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - - /* If the pixel centers end up used, the setup is the same as for gen4. */ - this->current_annotation = "compute pixel centers"; - fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); - fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); - int_pixel_x.type = BRW_REGISTER_TYPE_UW; - int_pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(BRW_OPCODE_ADD, - int_pixel_x, - fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010))); - emit(BRW_OPCODE_ADD, - int_pixel_y, - fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100))); - - /* As of gen6, we can no longer mix float and int sources. We have - * to turn the integer pixel centers into floats for their actual - * use. - */ - this->pixel_x = fs_reg(this, glsl_type::float_type); - this->pixel_y = fs_reg(this, glsl_type::float_type); - emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); - emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); - - this->current_annotation = "compute pos.w"; - this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); - this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); - - this->delta_x = fs_reg(brw_vec8_grf(2, 0)); - this->delta_y = fs_reg(brw_vec8_grf(3, 0)); - - this->current_annotation = NULL; -} - -void -fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) -{ - int reg_width = c->dispatch_width / 8; - - if (c->dispatch_width == 8 || intel->gen == 6) { - /* SIMD8 write looks like: - * m + 0: r0 - * m + 1: r1 - * m + 2: g0 - * m + 3: g1 - * - * gen6 SIMD16 DP write looks like: - * m + 0: r0 - * m + 1: r1 - * m + 2: g0 - * m + 3: g1 - * m + 4: b0 - * m + 5: b1 - * m + 6: a0 - * m + 7: a1 - */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); - } else { - /* pre-gen6 SIMD16 single source DP write looks like: - * m + 0: r0 - * m + 1: g0 - * m + 2: b0 - * m + 3: a0 - * m + 4: r1 - * m + 5: g1 - * m + 6: b1 - * m + 7: a1 - */ - if (brw->has_compr4) { - /* By setting the high bit of the MRF register number, we - * indicate that we want COMPR4 mode - instead of doing the - * usual destination + 1 for the second half we get - * destination + 4. - */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); - } else { - push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); - pop_force_uncompressed(); - - push_force_sechalf(); - color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); - pop_force_sechalf(); - color.sechalf = false; - } - } -} - -void -fs_visitor::emit_fb_writes() -{ - this->current_annotation = "FB write header"; - GLboolean header_present = GL_TRUE; - int nr = 0; - int reg_width = c->dispatch_width / 8; - - if (intel->gen >= 6 && - !this->kill_emitted && - c->key.nr_color_regions == 1) { - header_present = false; - } - - if (header_present) { - /* m0, m1 header */ - nr += 2; - } - - if (c->aa_dest_stencil_reg) { - push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); - pop_force_uncompressed(); - } - - /* Reserve space for color. It'll be filled in per MRT below. */ - int color_mrf = nr; - nr += 4 * reg_width; - - if (c->source_depth_to_render_target) { - if (intel->gen == 6 && c->dispatch_width == 16) { - /* For outputting oDepth on gen6, SIMD8 writes have to be - * used. This would require 8-wide moves of each half to - * message regs, kind of like pre-gen5 SIMD16 FB writes. - * Just bail on doing so for now. - */ - fail("Missing support for simd16 depth writes on gen6\n"); - } - - if (c->computes_depth) { - /* Hand over gl_FragDepth. */ - assert(this->frag_depth); - fs_reg depth = *(variable_storage(this->frag_depth)); - - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); - } else { - /* Pass through the payload depth. */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), - fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); - } - nr += reg_width; - } - - if (c->dest_depth_reg) { - emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), - fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); - nr += reg_width; - } - - fs_reg color = reg_undef; - if (this->frag_color) - color = *(variable_storage(this->frag_color)); - else if (this->frag_data) { - color = *(variable_storage(this->frag_data)); - color.type = BRW_REGISTER_TYPE_F; - } - - for (int target = 0; target < c->key.nr_color_regions; target++) { - this->current_annotation = ralloc_asprintf(this->mem_ctx, - "FB write target %d", - target); - if (this->frag_color || this->frag_data) { - for (int i = 0; i < 4; i++) { - emit_color_write(i, color_mrf, color); - color.reg_offset++; - } - } - - if (this->frag_color) - color.reg_offset -= 4; - - fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->target = target; - inst->base_mrf = 0; - inst->mlen = nr; - if (target == c->key.nr_color_regions - 1) - inst->eot = true; - inst->header_present = header_present; - } - - if (c->key.nr_color_regions == 0) { - if (c->key.alpha_test && (this->frag_color || this->frag_data)) { - /* If the alpha test is enabled but there's no color buffer, - * we still need to send alpha out the pipeline to our null - * renderbuffer. - */ - color.reg_offset += 3; - emit_color_write(3, color_mrf, color); - } - - fs_inst *inst = emit(FS_OPCODE_FB_WRITE); - inst->base_mrf = 0; - inst->mlen = nr; - inst->eot = true; - inst->header_present = header_present; - } - - this->current_annotation = NULL; -} - -void -fs_visitor::generate_fb_write(fs_inst *inst) -{ - GLboolean eot = inst->eot; - struct brw_reg implied_header; - - /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied - * move, here's g1. - */ - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - if (inst->header_present) { - if (intel->gen >= 6) { - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - brw_MOV(p, - retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD), - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - if (inst->target > 0) { - /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), - BRW_REGISTER_TYPE_UD), - brw_imm_ud(inst->target)); - } - - implied_header = brw_null_reg(); - } else { - implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_MOV(p, - brw_message_reg(inst->base_mrf + 1), - brw_vec8_grf(1, 0)); - } - } else { - implied_header = brw_null_reg(); - } - - brw_pop_insn_state(p); - - brw_fb_WRITE(p, - c->dispatch_width, - inst->base_mrf, - implied_header, - inst->target, - inst->mlen, - 0, - eot, - inst->header_present); -} - -/* Computes the integer pixel x,y values from the origin. - * - * This is the basis of gl_FragCoord computation, but is also used - * pre-gen6 for computing the deltas from v0 for computing - * interpolation. - */ -void -fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x) -{ - struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - struct brw_reg src; - struct brw_reg deltas; - - if (is_x) { - src = stride(suboffset(g1_uw, 4), 2, 4, 0); - deltas = brw_imm_v(0x10101010); - } else { - src = stride(suboffset(g1_uw, 5), 2, 4, 0); - deltas = brw_imm_v(0x11001100); - } - - if (c->dispatch_width == 16) { - dst = vec16(dst); - } - - /* We do this 8 or 16-wide, but since the destination is UW we - * don't do compression in the 16-wide case. - */ - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_ADD(p, dst, src, deltas); - brw_pop_insn_state(p); -} - -void -fs_visitor::generate_linterp(fs_inst *inst, - struct brw_reg dst, struct brw_reg *src) -{ - struct brw_reg delta_x = src[0]; - struct brw_reg delta_y = src[1]; - struct brw_reg interp = src[2]; - - if (brw->has_pln && - delta_y.nr == delta_x.nr + 1 && - (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { - brw_PLN(p, dst, interp, delta_x); - } else { - brw_LINE(p, brw_null_reg(), interp, delta_x); - brw_MAC(p, dst, suboffset(interp, 1), delta_y); - } -} - -void -fs_visitor::generate_math(fs_inst *inst, - struct brw_reg dst, struct brw_reg *src) -{ - int op; - - switch (inst->opcode) { - case FS_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case FS_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case FS_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case FS_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case FS_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case FS_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case FS_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case FS_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } - - if (intel->gen >= 6) { - assert(inst->mlen == 0); - - if (inst->opcode == FS_OPCODE_POW) { - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math2(p, dst, op, src[0], src[1]); - - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1])); - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } - } else { - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, dst, - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - 0, src[0], - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, sechalf(dst), - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - 0, sechalf(src[0]), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } - } - } else /* gen <= 5 */{ - assert(inst->mlen >= 1); - - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, dst, - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - inst->base_mrf, src[0], - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, sechalf(dst), - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - inst->base_mrf + 1, sechalf(src[0]), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } - } -} - -void -fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) -{ - int msg_type = -1; - int rlen = 4; - uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - - if (c->dispatch_width == 16) - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - - if (intel->gen >= 5) { - switch (inst->opcode) { - case FS_OPCODE_TEX: - if (inst->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; - } - break; - case FS_OPCODE_TXB: - if (inst->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; - } - break; - case FS_OPCODE_TXL: - if (inst->shadow_compare) { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; - } - break; - case FS_OPCODE_TXD: - assert(!"TXD isn't supported on gen5+ yet."); - break; - } - } else { - switch (inst->opcode) { - case FS_OPCODE_TEX: - /* Note that G45 and older determines shadow compare and dispatch width - * from message length for most messages. - */ - assert(c->dispatch_width == 8); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - if (inst->shadow_compare) { - assert(inst->mlen == 6); - } else { - assert(inst->mlen <= 4); - } - break; - case FS_OPCODE_TXB: - if (inst->shadow_compare) { - assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; - } else { - assert(inst->mlen == 9); - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } - break; - case FS_OPCODE_TXL: - if (inst->shadow_compare) { - assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; - } else { - assert(inst->mlen == 9); - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } - break; - case FS_OPCODE_TXD: - assert(!"TXD isn't supported on gen4 yet."); - break; - } - } - assert(msg_type != -1); - - if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { - rlen = 8; - dst = vec16(dst); - } - - brw_SAMPLE(p, - retype(dst, BRW_REGISTER_TYPE_UW), - inst->base_mrf, - src, - SURF_INDEX_TEXTURE(inst->sampler), - inst->sampler, - WRITEMASK_XYZW, - msg_type, - rlen, - inst->mlen, - 0, - inst->header_present, - simd_mode); -} - - -/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input - * looking like: - * - * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br - * - * and we're trying to produce: - * - * DDX DDY - * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) - * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) - * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) - * (ss0.br - ss0.bl) (ss0.tr - ss0.br) - * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) - * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) - * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) - * (ss1.br - ss1.bl) (ss1.tr - ss1.br) - * - * and add another set of two more subspans if in 16-pixel dispatch mode. - * - * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result - * for each pair, and vertstride = 2 jumps us 2 elements after processing a - * pair. But for DDY, it's harder, as we want to produce the pairs swizzled - * between each other. We could probably do it like ddx and swizzle the right - * order later, but bail for now and just produce - * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) - */ -void -fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) -{ - struct brw_reg src0 = brw_reg(src.file, src.nr, 1, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - struct brw_reg src1 = brw_reg(src.file, src.nr, 0, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - brw_ADD(p, dst, src0, negate(src1)); -} - -void -fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) -{ - struct brw_reg src0 = brw_reg(src.file, src.nr, 0, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - struct brw_reg src1 = brw_reg(src.file, src.nr, 2, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - brw_ADD(p, dst, src0, negate(src1)); -} - -void -fs_visitor::generate_discard(fs_inst *inst) -{ - struct brw_reg f0 = brw_flag_reg(); - - if (intel->gen >= 6) { - struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - struct brw_reg some_register; - - /* As of gen6, we no longer have the mask register to look at, - * so life gets a bit more complicated. - */ - - /* Load the flag register with all ones. */ - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, f0, brw_imm_uw(0xffff)); - brw_pop_insn_state(p); - - /* Do a comparison that should always fail, to produce 0s in the flag - * reg where we have active channels. - */ - some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - BRW_CONDITIONAL_NZ, some_register, some_register); - - /* Undo CMP's whacking of predication*/ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_AND(p, g1, f0, g1); - brw_pop_insn_state(p); - } else { - struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - struct brw_reg mask = brw_uw1_reg(mask.file, mask.nr, 0); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - /* Unlike the 965, we have the mask reg, so we just need - * somewhere to invert that (containing channels to be disabled) - * so it can be ANDed with the mask of pixels still to be - * written. Use the flag reg for consistency with gen6+. - */ - brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */ - brw_AND(p, g0, f0, g0); - - brw_pop_insn_state(p); - } -} - -void -fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) -{ - assert(inst->mlen != 0); - - brw_MOV(p, - retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), - retype(src, BRW_REGISTER_TYPE_UD)); - brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, - inst->offset); -} - -void -fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) -{ - assert(inst->mlen != 0); - - /* Clear any post destination dependencies that would be ignored by - * the block read. See the B-Spec for pre-gen5 send instruction. - * - * This could use a better solution, since texture sampling and - * math reads could potentially run into it as well -- anywhere - * that we have a SEND with a destination that is a register that - * was written but not read within the last N instructions (what's - * N? unsure). This is rare because of dead code elimination, but - * not impossible. - */ - if (intel->gen == 4 && !intel->is_g4x) - brw_MOV(p, brw_null_reg(), dst); - - brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, - inst->offset); - - if (intel->gen == 4 && !intel->is_g4x) { - /* gen4 errata: destination from a send can't be used as a - * destination until it's been read. Just read it so we don't - * have to worry. - */ - brw_MOV(p, brw_null_reg(), dst); - } -} - - -void -fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) -{ - assert(inst->mlen != 0); - - /* Clear any post destination dependencies that would be ignored by - * the block read. See the B-Spec for pre-gen5 send instruction. - * - * This could use a better solution, since texture sampling and - * math reads could potentially run into it as well -- anywhere - * that we have a SEND with a destination that is a register that - * was written but not read within the last N instructions (what's - * N? unsure). This is rare because of dead code elimination, but - * not impossible. - */ - if (intel->gen == 4 && !intel->is_g4x) - brw_MOV(p, brw_null_reg(), dst); - - brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), - inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); - - if (intel->gen == 4 && !intel->is_g4x) { - /* gen4 errata: destination from a send can't be used as a - * destination until it's been read. Just read it so we don't - * have to worry. - */ - brw_MOV(p, brw_null_reg(), dst); - } -} - /** * To be called after the last _mesa_add_state_reference() call, to * set up prog_data.param[] for assign_curb_setup() and @@ -2892,7 +649,7 @@ fs_visitor::calculate_urb_setup() /* Figure out where each of the incoming setup attributes lands. */ if (intel->gen >= 6) { for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) { + if (fp->Base.InputsRead & BITFIELD64_BIT(i)) { urb_setup[i] = urb_next++; } } @@ -3276,12 +1033,16 @@ fs_visitor::propagate_constants() scan_inst->src[i] = inst->src[0]; progress = true; } else if (i == 0 && scan_inst->src[1].file != IMM) { - /* Fit this constant in by swapping the operands and - * flipping the predicate - */ scan_inst->src[0] = scan_inst->src[1]; scan_inst->src[1] = inst->src[0]; - scan_inst->predicate_inverse = !scan_inst->predicate_inverse; + + /* If this was predicated, flipping operands means + * we also need to flip the predicate. + */ + if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) { + scan_inst->predicate_inverse = + !scan_inst->predicate_inverse; + } progress = true; } break; @@ -3734,355 +1495,6 @@ fs_visitor::virtual_grf_interferes(int a, int b) return start < end; } -static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) -{ - struct brw_reg brw_reg; - - switch (reg->file) { - case GRF: - case ARF: - case MRF: - if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); - } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); - } - brw_reg = retype(brw_reg, reg->type); - if (reg->sechalf) - brw_reg = sechalf(brw_reg); - break; - case IMM: - switch (reg->type) { - case BRW_REGISTER_TYPE_F: - brw_reg = brw_imm_f(reg->imm.f); - break; - case BRW_REGISTER_TYPE_D: - brw_reg = brw_imm_d(reg->imm.i); - break; - case BRW_REGISTER_TYPE_UD: - brw_reg = brw_imm_ud(reg->imm.u); - break; - default: - assert(!"not reached"); - brw_reg = brw_null_reg(); - break; - } - break; - case FIXED_HW_REG: - brw_reg = reg->fixed_hw_reg; - break; - case BAD_FILE: - /* Probably unused. */ - brw_reg = brw_null_reg(); - break; - case UNIFORM: - assert(!"not reached"); - brw_reg = brw_null_reg(); - break; - default: - assert(!"not reached"); - brw_reg = brw_null_reg(); - break; - } - if (reg->abs) - brw_reg = brw_abs(brw_reg); - if (reg->negate) - brw_reg = negate(brw_reg); - - return brw_reg; -} - -void -fs_visitor::generate_code() -{ - int last_native_inst = p->nr_insn; - const char *last_annotation_string = NULL; - ir_instruction *last_annotation_ir = NULL; - - int loop_stack_array_size = 16; - int loop_stack_depth = 0; - brw_instruction **loop_stack = - rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); - int *if_depth_in_loop = - rzalloc_array(this->mem_ctx, int, loop_stack_array_size); - - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("Native code for fragment shader %d (%d-wide dispatch):\n", - ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width); - } - - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); - struct brw_reg src[3], dst; - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - if (last_annotation_ir != inst->ir) { - last_annotation_ir = inst->ir; - if (last_annotation_ir) { - printf(" "); - last_annotation_ir->print(); - printf("\n"); - } - } - if (last_annotation_string != inst->annotation) { - last_annotation_string = inst->annotation; - if (last_annotation_string) - printf(" %s\n", last_annotation_string); - } - } - - for (unsigned int i = 0; i < 3; i++) { - src[i] = brw_reg_from_fs_reg(&inst->src[i]); - } - dst = brw_reg_from_fs_reg(&inst->dst); - - brw_set_conditionalmod(p, inst->conditional_mod); - brw_set_predicate_control(p, inst->predicated); - brw_set_predicate_inverse(p, inst->predicate_inverse); - brw_set_saturate(p, inst->saturate); - - if (inst->force_uncompressed || c->dispatch_width == 8) { - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - } else if (inst->force_sechalf) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - } else { - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } - - switch (inst->opcode) { - case BRW_OPCODE_MOV: - brw_MOV(p, dst, src[0]); - break; - case BRW_OPCODE_ADD: - brw_ADD(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_MUL: - brw_MUL(p, dst, src[0], src[1]); - break; - - case BRW_OPCODE_FRC: - brw_FRC(p, dst, src[0]); - break; - case BRW_OPCODE_RNDD: - brw_RNDD(p, dst, src[0]); - break; - case BRW_OPCODE_RNDE: - brw_RNDE(p, dst, src[0]); - break; - case BRW_OPCODE_RNDZ: - brw_RNDZ(p, dst, src[0]); - break; - - case BRW_OPCODE_AND: - brw_AND(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_OR: - brw_OR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_XOR: - brw_XOR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_NOT: - brw_NOT(p, dst, src[0]); - break; - case BRW_OPCODE_ASR: - brw_ASR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_SHR: - brw_SHR(p, dst, src[0], src[1]); - break; - case BRW_OPCODE_SHL: - brw_SHL(p, dst, src[0], src[1]); - break; - - case BRW_OPCODE_CMP: - brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); - break; - case BRW_OPCODE_SEL: - brw_SEL(p, dst, src[0], src[1]); - break; - - case BRW_OPCODE_IF: - if (inst->src[0].file != BAD_FILE) { - /* The instruction has an embedded compare (only allowed on gen6) */ - assert(intel->gen == 6); - gen6_IF(p, inst->conditional_mod, src[0], src[1]); - } else { - brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); - } - if_depth_in_loop[loop_stack_depth]++; - break; - - case BRW_OPCODE_ELSE: - brw_ELSE(p); - break; - case BRW_OPCODE_ENDIF: - brw_ENDIF(p); - if_depth_in_loop[loop_stack_depth]--; - break; - - case BRW_OPCODE_DO: - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); - if (loop_stack_array_size <= loop_stack_depth) { - loop_stack_array_size *= 2; - loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, - loop_stack_array_size); - if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, - loop_stack_array_size); - } - if_depth_in_loop[loop_stack_depth] = 0; - break; - - case BRW_OPCODE_BREAK: - brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case BRW_OPCODE_CONTINUE: - /* FINISHME: We need to write the loop instruction support still. */ - if (intel->gen >= 6) - gen6_CONT(p, loop_stack[loop_stack_depth - 1]); - else - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - - case BRW_OPCODE_WHILE: { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen >= 5) - br = 2; - - assert(loop_stack_depth > 0); - loop_stack_depth--; - inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - if (intel->gen < 6) { - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - } - break; - - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: - generate_math(inst, dst, src); - break; - case FS_OPCODE_PIXEL_X: - generate_pixel_xy(dst, true); - break; - case FS_OPCODE_PIXEL_Y: - generate_pixel_xy(dst, false); - break; - case FS_OPCODE_CINTERP: - brw_MOV(p, dst, src[0]); - break; - case FS_OPCODE_LINTERP: - generate_linterp(inst, dst, src); - break; - case FS_OPCODE_TEX: - case FS_OPCODE_TXB: - case FS_OPCODE_TXD: - case FS_OPCODE_TXL: - generate_tex(inst, dst, src[0]); - break; - case FS_OPCODE_DISCARD: - generate_discard(inst); - break; - case FS_OPCODE_DDX: - generate_ddx(inst, dst, src[0]); - break; - case FS_OPCODE_DDY: - generate_ddy(inst, dst, src[0]); - break; - - case FS_OPCODE_SPILL: - generate_spill(inst, src[0]); - break; - - case FS_OPCODE_UNSPILL: - generate_unspill(inst, dst); - break; - - case FS_OPCODE_PULL_CONSTANT_LOAD: - generate_pull_constant_load(inst, dst); - break; - - case FS_OPCODE_FB_WRITE: - generate_fb_write(inst); - break; - default: - if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { - _mesa_problem(ctx, "Unsupported opcode `%s' in FS", - brw_opcodes[inst->opcode].name); - } else { - _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); - } - fail("unsupported opcode in FS\n"); - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { - if (0) { - printf("0x%08x 0x%08x 0x%08x 0x%08x ", - ((uint32_t *)&p->store[i])[3], - ((uint32_t *)&p->store[i])[2], - ((uint32_t *)&p->store[i])[1], - ((uint32_t *)&p->store[i])[0]); - } - brw_disasm(stdout, &p->store[i], intel->gen); - } - } - - last_native_inst = p->nr_insn; - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("\n"); - } - - ralloc_free(loop_stack); - ralloc_free(if_depth_in_loop); - - brw_set_uip_jip(p); - - /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS - * emit issues, it doesn't get the jump distances into the output, - * which is often something we want to debug. So this is here in - * case you're doing that. - */ - if (0) { - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - for (unsigned int i = 0; i < p->nr_insn; i++) { - printf("0x%08x 0x%08x 0x%08x 0x%08x ", - ((uint32_t *)&p->store[i])[3], - ((uint32_t *)&p->store[i])[2], - ((uint32_t *)&p->store[i])[1], - ((uint32_t *)&p->store[i])[0]); - brw_disasm(stdout, &p->store[i], intel->gen); - } - } - } -} - bool fs_visitor::run() { @@ -4118,6 +1530,7 @@ fs_visitor::run() foreach_iter(exec_list_iterator, iter, *shader->ir) { ir_instruction *ir = (ir_instruction *)iter.get(); base_ir = ir; + this->result = reg_undef; ir->accept(this); } @@ -4171,9 +1584,9 @@ fs_visitor::run() generate_code(); if (c->dispatch_width == 8) { - c->prog_data.total_grf = grf_used; + c->prog_data.reg_blocks = brw_register_blocks(grf_used); } else { - c->prog_data.total_grf_16 = grf_used; + c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used); c->prog_data.prog_offset_16 = prog_offset_16; /* Make sure we didn't try to sneak in an extra uniform */ @@ -4184,11 +1597,10 @@ fs_visitor::run() } bool -brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) +brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, + struct gl_shader_program *prog) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &intel->ctx; - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; if (!prog) return false; @@ -4208,16 +1620,17 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) */ c->dispatch_width = 8; - fs_visitor v(c, shader); + fs_visitor v(c, prog, shader); if (!v.run()) { - /* FINISHME: Cleanly fail, test at link time, etc. */ - assert(!"not reached"); + prog->LinkStatus = GL_FALSE; + prog->InfoLog = ralloc_strdup(prog, v.fail_msg); + return false; } if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; - fs_visitor v2(c, shader); + fs_visitor v2(c, prog, shader); v2.import_uniforms(v.variable_ht); v2.run(); } @@ -4226,3 +1639,73 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) return true; } + +bool +brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_prog_key key; + struct gl_fragment_program *fp = prog->FragmentProgram; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + if (!fp) + return true; + + memset(&key, 0, sizeof(key)); + + if (fp->UsesKill) + key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* Just assume depth testing. */ + key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + key.vp_outputs_written |= BITFIELD64_BIT(FRAG_ATTRIB_WPOS); + for (int i = 0; i < FRAG_ATTRIB_MAX; i++) { + int vp_index = -1; + + if (!(fp->Base.InputsRead & BITFIELD64_BIT(i))) + continue; + + key.proj_attrib_mask |= 1 << i; + + if (i <= FRAG_ATTRIB_TEX7) + vp_index = i; + else if (i >= FRAG_ATTRIB_VAR0) + vp_index = i - FRAG_ATTRIB_VAR0 + VERT_RESULT_VAR0; + + if (vp_index >= 0) + key.vp_outputs_written |= BITFIELD64_BIT(vp_index); + } + + key.clamp_fragment_color = true; + + for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { + /* FINISHME: depth compares might use (0,0,0,W) for example */ + key.tex_swizzles[i] = SWIZZLE_XYZW; + } + + if (fp->Base.InputsRead & FRAG_BIT_WPOS) { + key.drawable_height = ctx->DrawBuffer->Height; + key.render_to_fbo = ctx->DrawBuffer->Name != 0; + } + + key.nr_color_regions = 1; + + key.program_string_id = bfp->id; + + drm_intel_bo *old_prog_bo = brw->wm.prog_bo; + struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; + brw->wm.prog_bo = NULL; + + bool success = do_wm_prog(brw, prog, bfp, &key); + + drm_intel_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = old_prog_bo; + brw->wm.prog_data = old_prog_data; + + return success; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4b355c979eb..7570dda1024 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -359,12 +359,14 @@ class fs_visitor : public ir_visitor { public: - fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) + fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, + struct brw_shader *shader) { this->c = c; this->p = &c->func; this->brw = p->brw; - this->fp = brw->fragment_program; + this->fp = prog->FragmentProgram; + this->prog = prog; this->intel = &brw->intel; this->ctx = &intel->ctx; this->mem_ctx = ralloc_context(NULL); @@ -466,6 +468,8 @@ public: return emit(fs_inst(opcode, dst, src0, src1, src2)); } + int type_size(const struct glsl_type *type); + bool run(); void setup_paramvalues_refs(); void assign_curb_setup(); @@ -542,6 +546,7 @@ public: struct brw_wm_compile *c; struct brw_compile *p; struct brw_shader *shader; + struct gl_shader_program *prog; void *mem_ctx; exec_list instructions; @@ -570,8 +575,12 @@ public: /** @} */ bool failed; + char *fail_msg; - /* Result of last visit() method. */ + /* On entry to a visit() method, this is the storage for the + * result. On exit, the visit() called may have changed it, in + * which case the parent must use the new storage instead. + */ fs_reg result; fs_reg pixel_x; @@ -590,3 +599,4 @@ public: GLboolean brw_do_channel_expressions(struct exec_list *instructions); GLboolean brw_do_vector_splitting(struct exec_list *instructions); +bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp new file mode 100644 index 00000000000..6b7c434949c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -0,0 +1,875 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_fs_emit.cpp + * + * This file supports emitting code from the FS LIR to the actual + * native instructions. + */ + +extern "C" { +#include "main/macros.h" +#include "brw_context.h" +#include "brw_eu.h" +} /* extern "C" */ + +#include "brw_fs.h" +#include "../glsl/ir_print_visitor.h" + +void +fs_visitor::generate_fb_write(fs_inst *inst) +{ + GLboolean eot = inst->eot; + struct brw_reg implied_header; + + /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied + * move, here's g1. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (inst->header_present) { + if (intel->gen >= 6) { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (inst->target > 0) { + /* Set the render target index for choosing BLEND_STATE. */ + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(inst->target)); + } + + implied_header = brw_null_reg(); + } else { + implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_MOV(p, + brw_message_reg(inst->base_mrf + 1), + brw_vec8_grf(1, 0)); + } + } else { + implied_header = brw_null_reg(); + } + + brw_pop_insn_state(p); + + brw_fb_WRITE(p, + c->dispatch_width, + inst->base_mrf, + implied_header, + inst->target, + inst->mlen, + 0, + eot, + inst->header_present); +} + +/* Computes the integer pixel x,y values from the origin. + * + * This is the basis of gl_FragCoord computation, but is also used + * pre-gen6 for computing the deltas from v0 for computing + * interpolation. + */ +void +fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x) +{ + struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + struct brw_reg src; + struct brw_reg deltas; + + if (is_x) { + src = stride(suboffset(g1_uw, 4), 2, 4, 0); + deltas = brw_imm_v(0x10101010); + } else { + src = stride(suboffset(g1_uw, 5), 2, 4, 0); + deltas = brw_imm_v(0x11001100); + } + + if (c->dispatch_width == 16) { + dst = vec16(dst); + } + + /* We do this 8 or 16-wide, but since the destination is UW we + * don't do compression in the 16-wide case. + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_ADD(p, dst, src, deltas); + brw_pop_insn_state(p); +} + +void +fs_visitor::generate_linterp(fs_inst *inst, + struct brw_reg dst, struct brw_reg *src) +{ + struct brw_reg delta_x = src[0]; + struct brw_reg delta_y = src[1]; + struct brw_reg interp = src[2]; + + if (brw->has_pln && + delta_y.nr == delta_x.nr + 1 && + (intel->gen >= 6 || (delta_x.nr & 1) == 0)) { + brw_PLN(p, dst, interp, delta_x); + } else { + brw_LINE(p, brw_null_reg(), interp, delta_x); + brw_MAC(p, dst, suboffset(interp, 1), delta_y); + } +} + +void +fs_visitor::generate_math(fs_inst *inst, + struct brw_reg dst, struct brw_reg *src) +{ + int op; + + switch (inst->opcode) { + case FS_OPCODE_RCP: + op = BRW_MATH_FUNCTION_INV; + break; + case FS_OPCODE_RSQ: + op = BRW_MATH_FUNCTION_RSQ; + break; + case FS_OPCODE_SQRT: + op = BRW_MATH_FUNCTION_SQRT; + break; + case FS_OPCODE_EXP2: + op = BRW_MATH_FUNCTION_EXP; + break; + case FS_OPCODE_LOG2: + op = BRW_MATH_FUNCTION_LOG; + break; + case FS_OPCODE_POW: + op = BRW_MATH_FUNCTION_POW; + break; + case FS_OPCODE_SIN: + op = BRW_MATH_FUNCTION_SIN; + break; + case FS_OPCODE_COS: + op = BRW_MATH_FUNCTION_COS; + break; + default: + assert(!"not reached: unknown math function"); + op = 0; + break; + } + + if (intel->gen >= 6) { + assert(inst->mlen == 0); + + if (inst->opcode == FS_OPCODE_POW) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math2(p, dst, op, src[0], src[1]); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1])); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } + } else { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, dst, + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + 0, src[0], + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, sechalf(dst), + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + 0, sechalf(src[0]), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } + } + } else /* gen <= 5 */{ + assert(inst->mlen >= 1); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, dst, + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + inst->base_mrf, src[0], + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, sechalf(dst), + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + inst->base_mrf + 1, sechalf(src[0]), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } + } +} + +void +fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) +{ + int msg_type = -1; + int rlen = 4; + uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + + if (c->dispatch_width == 16) + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + + if (intel->gen >= 5) { + switch (inst->opcode) { + case FS_OPCODE_TEX: + if (inst->shadow_compare) { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; + } else { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; + } + break; + case FS_OPCODE_TXB: + if (inst->shadow_compare) { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; + } else { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; + } + break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; + } else { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen5+ yet."); + break; + } + } else { + switch (inst->opcode) { + case FS_OPCODE_TEX: + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + assert(c->dispatch_width == 8); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + if (inst->shadow_compare) { + assert(inst->mlen == 6); + } else { + assert(inst->mlen <= 4); + } + break; + case FS_OPCODE_TXB: + if (inst->shadow_compare) { + assert(inst->mlen == 6); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; + } else { + assert(inst->mlen == 9); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } + break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + assert(inst->mlen == 6); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; + } else { + assert(inst->mlen == 9); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen4 yet."); + break; + } + } + assert(msg_type != -1); + + if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { + rlen = 8; + dst = vec16(dst); + } + + brw_SAMPLE(p, + retype(dst, BRW_REGISTER_TYPE_UW), + inst->base_mrf, + src, + SURF_INDEX_TEXTURE(inst->sampler), + inst->sampler, + WRITEMASK_XYZW, + msg_type, + rlen, + inst->mlen, + 0, + inst->header_present, + simd_mode); +} + + +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) + * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) + * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) + * (ss0.br - ss0.bl) (ss0.tr - ss0.br) + * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) + * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) + * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) + * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * + * and add another set of two more subspans if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + */ +void +fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) +{ + struct brw_reg src0 = brw_reg(src.file, src.nr, 1, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + struct brw_reg src1 = brw_reg(src.file, src.nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + brw_ADD(p, dst, src0, negate(src1)); +} + +void +fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src) +{ + struct brw_reg src0 = brw_reg(src.file, src.nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + struct brw_reg src1 = brw_reg(src.file, src.nr, 2, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + brw_ADD(p, dst, src0, negate(src1)); +} + +void +fs_visitor::generate_discard(fs_inst *inst) +{ + struct brw_reg f0 = brw_flag_reg(); + + if (intel->gen >= 6) { + struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + struct brw_reg some_register; + + /* As of gen6, we no longer have the mask register to look at, + * so life gets a bit more complicated. + */ + + /* Load the flag register with all ones. */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, f0, brw_imm_uw(0xffff)); + brw_pop_insn_state(p); + + /* Do a comparison that should always fail, to produce 0s in the flag + * reg where we have active channels. + */ + some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + BRW_CONDITIONAL_NZ, some_register, some_register); + + /* Undo CMP's whacking of predication*/ + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_AND(p, g1, f0, g1); + brw_pop_insn_state(p); + } else { + struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* Unlike the 965, we have the mask reg, so we just need + * somewhere to invert that (containing channels to be disabled) + * so it can be ANDed with the mask of pixels still to be + * written. Use the flag reg for consistency with gen6+. + */ + brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */ + brw_AND(p, g0, f0, g0); + + brw_pop_insn_state(p); + } +} + +void +fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) +{ + assert(inst->mlen != 0); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), + retype(src, BRW_REGISTER_TYPE_UD)); + brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, + inst->offset); +} + +void +fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) +{ + assert(inst->mlen != 0); + + /* Clear any post destination dependencies that would be ignored by + * the block read. See the B-Spec for pre-gen5 send instruction. + * + * This could use a better solution, since texture sampling and + * math reads could potentially run into it as well -- anywhere + * that we have a SEND with a destination that is a register that + * was written but not read within the last N instructions (what's + * N? unsure). This is rare because of dead code elimination, but + * not impossible. + */ + if (intel->gen == 4 && !intel->is_g4x) + brw_MOV(p, brw_null_reg(), dst); + + brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, + inst->offset); + + if (intel->gen == 4 && !intel->is_g4x) { + /* gen4 errata: destination from a send can't be used as a + * destination until it's been read. Just read it so we don't + * have to worry. + */ + brw_MOV(p, brw_null_reg(), dst); + } +} + +void +fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) +{ + assert(inst->mlen != 0); + + /* Clear any post destination dependencies that would be ignored by + * the block read. See the B-Spec for pre-gen5 send instruction. + * + * This could use a better solution, since texture sampling and + * math reads could potentially run into it as well -- anywhere + * that we have a SEND with a destination that is a register that + * was written but not read within the last N instructions (what's + * N? unsure). This is rare because of dead code elimination, but + * not impossible. + */ + if (intel->gen == 4 && !intel->is_g4x) + brw_MOV(p, brw_null_reg(), dst); + + brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), + inst->offset, SURF_INDEX_FRAG_CONST_BUFFER); + + if (intel->gen == 4 && !intel->is_g4x) { + /* gen4 errata: destination from a send can't be used as a + * destination until it's been read. Just read it so we don't + * have to worry. + */ + brw_MOV(p, brw_null_reg(), dst); + } +} + +static struct brw_reg +brw_reg_from_fs_reg(fs_reg *reg) +{ + struct brw_reg brw_reg; + + switch (reg->file) { + case GRF: + case ARF: + case MRF: + if (reg->smear == -1) { + brw_reg = brw_vec8_reg(reg->file, + reg->hw_reg, 0); + } else { + brw_reg = brw_vec1_reg(reg->file, + reg->hw_reg, reg->smear); + } + brw_reg = retype(brw_reg, reg->type); + if (reg->sechalf) + brw_reg = sechalf(brw_reg); + break; + case IMM: + switch (reg->type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(reg->imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(reg->imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(reg->imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + case FIXED_HW_REG: + brw_reg = reg->fixed_hw_reg; + break; + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case UNIFORM: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + if (reg->abs) + brw_reg = brw_abs(brw_reg); + if (reg->negate) + brw_reg = negate(brw_reg); + + return brw_reg; +} + +void +fs_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + printf("Native code for fragment shader %d (%d-wide dispatch):\n", + prog->Name, c->dispatch_width); + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = brw_reg_from_fs_reg(&inst->src[i]); + } + dst = brw_reg_from_fs_reg(&inst->dst); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicated); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + if (inst->force_uncompressed || c->dispatch_width == 8) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + } else if (inst->force_sechalf) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + } else { + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + case FS_OPCODE_RCP: + case FS_OPCODE_RSQ: + case FS_OPCODE_SQRT: + case FS_OPCODE_EXP2: + case FS_OPCODE_LOG2: + case FS_OPCODE_POW: + case FS_OPCODE_SIN: + case FS_OPCODE_COS: + generate_math(inst, dst, src); + break; + case FS_OPCODE_PIXEL_X: + generate_pixel_xy(dst, true); + break; + case FS_OPCODE_PIXEL_Y: + generate_pixel_xy(dst, false); + break; + case FS_OPCODE_CINTERP: + brw_MOV(p, dst, src[0]); + break; + case FS_OPCODE_LINTERP: + generate_linterp(inst, dst, src); + break; + case FS_OPCODE_TEX: + case FS_OPCODE_TXB: + case FS_OPCODE_TXD: + case FS_OPCODE_TXL: + generate_tex(inst, dst, src[0]); + break; + case FS_OPCODE_DISCARD: + generate_discard(inst); + break; + case FS_OPCODE_DDX: + generate_ddx(inst, dst, src[0]); + break; + case FS_OPCODE_DDY: + generate_ddy(inst, dst, src[0]); + break; + + case FS_OPCODE_SPILL: + generate_spill(inst, src[0]); + break; + + case FS_OPCODE_UNSPILL: + generate_unspill(inst, dst); + break; + + case FS_OPCODE_PULL_CONSTANT_LOAD: + generate_pull_constant_load(inst, dst); + break; + + case FS_OPCODE_FB_WRITE: + generate_fb_write(inst); + break; + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + _mesa_problem(ctx, "Unsupported opcode `%s' in FS", + brw_opcodes[inst->opcode].name); + } else { + _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); + } + fail("unsupported opcode in FS\n"); + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index fb1192c810a..d8218c26edb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -140,6 +140,7 @@ public: } void add_barrier_deps(schedule_node *n); void add_dep(schedule_node *before, schedule_node *after, int latency); + void add_dep(schedule_node *before, schedule_node *after); void add_inst(fs_inst *inst); void calculate_deps(); @@ -210,6 +211,15 @@ instruction_scheduler::add_dep(schedule_node *before, schedule_node *after, after->parent_count++; } +void +instruction_scheduler::add_dep(schedule_node *before, schedule_node *after) +{ + if (!before) + return; + + add_dep(before, after, before->latency); +} + /** * Sometimes we really want this node to execute after everything that * was before it and before everything that followed it. This adds @@ -253,6 +263,12 @@ instruction_scheduler::calculate_deps() schedule_node *last_grf_write[virtual_grf_count]; schedule_node *last_mrf_write[BRW_MAX_MRF]; schedule_node *last_conditional_mod = NULL; + /* Fixed HW registers are assumed to be separate from the virtual + * GRFs, so they can be tracked separately. We don't really write + * to fixed GRFs much, so don't bother tracking them on a more + * granular level. + */ + schedule_node *last_fixed_grf_write = NULL; /* The last instruction always needs to still be the last * instruction. Either it's flow control (IF, ELSE, ENDIF, DO, @@ -274,10 +290,11 @@ instruction_scheduler::calculate_deps() /* read-after-write deps. */ for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { - if (last_grf_write[inst->src[i].reg]) { - add_dep(last_grf_write[inst->src[i].reg], n, - last_grf_write[inst->src[i].reg]->latency); - } + add_dep(last_grf_write[inst->src[i].reg], n); + } else if (inst->src[i].file == FIXED_HW_REG && + (inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { + add_dep(last_fixed_grf_write, n); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -291,53 +308,41 @@ instruction_scheduler::calculate_deps() * instruction once it's sent, not when the result comes * back. */ - if (last_mrf_write[inst->base_mrf + i]) { - add_dep(last_mrf_write[inst->base_mrf + i], n, - last_mrf_write[inst->base_mrf + i]->latency); - } + add_dep(last_mrf_write[inst->base_mrf + i], n); } if (inst->predicated) { assert(last_conditional_mod); - add_dep(last_conditional_mod, n, last_conditional_mod->latency); + add_dep(last_conditional_mod, n); } /* write-after-write deps. */ if (inst->dst.file == GRF) { - if (last_grf_write[inst->dst.reg]) { - add_dep(last_grf_write[inst->dst.reg], n, - last_grf_write[inst->dst.reg]->latency); - } + add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; - if (last_mrf_write[reg]) { - add_dep(last_mrf_write[reg], n, - last_mrf_write[reg]->latency); - } + add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { if (inst->dst.hw_reg & BRW_MRF_COMPR4) reg += 4; else reg++; - if (last_mrf_write[reg]) { - add_dep(last_mrf_write[reg], n, - last_mrf_write[reg]->latency); - } + add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; } + } else if (inst->dst.file == FIXED_HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } if (inst->mlen > 0) { for (int i = 0; i < v->implied_mrf_writes(inst); i++) { - if (last_mrf_write[inst->base_mrf + i]) { - add_dep(last_mrf_write[inst->base_mrf + i], n, - last_mrf_write[inst->base_mrf + i]->latency); - } + add_dep(last_mrf_write[inst->base_mrf + i], n); last_mrf_write[inst->base_mrf + i] = n; } } @@ -352,6 +357,7 @@ instruction_scheduler::calculate_deps() memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); last_conditional_mod = NULL; + last_fixed_grf_write = NULL; exec_node *node; exec_node *prev; @@ -364,9 +370,11 @@ instruction_scheduler::calculate_deps() /* write-after-read deps. */ for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { - if (last_grf_write[inst->src[i].reg]) { - add_dep(n, last_grf_write[inst->src[i].reg], n->latency); - } + add_dep(n, last_grf_write[inst->src[i].reg]); + } else if (inst->src[i].file == FIXED_HW_REG && + (inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { + add_dep(n, last_fixed_grf_write); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -384,9 +392,7 @@ instruction_scheduler::calculate_deps() } if (inst->predicated) { - if (last_conditional_mod) { - add_dep(n, last_conditional_mod, n->latency); - } + add_dep(n, last_conditional_mod); } /* Update the things this instruction wrote, so earlier reads @@ -407,6 +413,9 @@ instruction_scheduler::calculate_deps() last_mrf_write[reg] = n; } + } else if (inst->dst.file == FIXED_HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp new file mode 100644 index 00000000000..b4857871c78 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -0,0 +1,1734 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_fs_visitor.cpp + * + * This file supports generating the FS LIR from the GLSL IR. The LIR + * makes it easier to do backend-specific optimizations than doing so + * in the GLSL IR or in the native code. + */ +extern "C" { + +#include <sys/types.h> + +#include "main/macros.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/prog_parameter.h" +#include "program/prog_print.h" +#include "program/prog_optimize.h" +#include "program/register_allocate.h" +#include "program/sampler.h" +#include "program/hash_table.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +} +#include "brw_shader.h" +#include "brw_fs.h" +#include "../glsl/glsl_types.h" +#include "../glsl/ir_optimization.h" +#include "../glsl/ir_print_visitor.h" + +void +fs_visitor::visit(ir_variable *ir) +{ + fs_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + if (strcmp(ir->name, "gl_FragColor") == 0) { + this->frag_color = ir; + } else if (strcmp(ir->name, "gl_FragData") == 0) { + this->frag_data = ir; + } else if (strcmp(ir->name, "gl_FragDepth") == 0) { + this->frag_depth = ir; + } + + if (ir->mode == ir_var_in) { + if (!strcmp(ir->name, "gl_FragCoord")) { + reg = emit_fragcoord_interpolation(ir); + } else if (!strcmp(ir->name, "gl_FrontFacing")) { + reg = emit_frontfacing_interpolation(ir); + } else { + reg = emit_general_interpolation(ir); + } + assert(reg); + hash_table_insert(this->variable_ht, reg, ir); + return; + } + + if (ir->mode == ir_var_uniform) { + int param_index = c->prog_data.nr_params; + + if (c->dispatch_width == 16) { + if (!variable_storage(ir)) { + fail("Failed to find uniform '%s' in 16-wide\n", ir->name); + } + return; + } + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } + + reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); + reg->type = brw_type_for_base_type(ir->type); + } + + if (!reg) + reg = new(this->mem_ctx) fs_reg(this, ir->type); + + hash_table_insert(this->variable_ht, reg, ir); +} + +void +fs_visitor::visit(ir_dereference_variable *ir) +{ + fs_reg *reg = variable_storage(ir->var); + this->result = *reg; +} + +void +fs_visitor::visit(ir_dereference_record *ir) +{ + const glsl_type *struct_type = ir->record->type; + + ir->record->accept(this); + + unsigned int offset = 0; + for (unsigned int i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + this->result.reg_offset += offset; + this->result.type = brw_type_for_base_type(ir->type); +} + +void +fs_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *index; + int element_size; + + ir->array->accept(this); + index = ir->array_index->as_constant(); + + element_size = type_size(ir->type); + this->result.type = brw_type_for_base_type(ir->type); + + if (index) { + assert(this->result.file == UNIFORM || + (this->result.file == GRF && + this->result.reg != 0)); + this->result.reg_offset += index->value.i[0] * element_size; + } else { + assert(!"FINISHME: non-constant array element"); + } +} + +/* Instruction selection: Produce a MOV.sat instead of + * MIN(MAX(val, 0), 1) when possible. + */ +bool +fs_visitor::try_emit_saturate(ir_expression *ir) +{ + ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); + + if (!sat_val) + return false; + + this->result = reg_undef; + sat_val->accept(this); + fs_reg src = this->result; + + this->result = fs_reg(this, ir->type); + fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src); + inst->saturate = true; + + return true; +} + +void +fs_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + fs_reg op[2], temp; + fs_inst *inst; + + assert(ir->get_num_operands() <= 2); + + if (try_emit_saturate(ir)) + return; + + /* This is where our caller would like us to put the result, if possible. */ + fs_reg saved_result_storage = this->result; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result = reg_undef; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + ir_print_visitor v; + fail("Failed to get tree for expression operand:\n"); + ir->operands[operand]->accept(&v); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + /* And then those vector operands should have been broken down to scalar. + */ + assert(!ir->operands[operand]->type->is_vector()); + } + + /* Inherit storage from our parent if possible, and otherwise we + * alloc a temporary. + */ + if (saved_result_storage.file == BAD_FILE) { + this->result = fs_reg(this, ir->type); + } else { + this->result = saved_result_storage; + } + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, this->result, op[0], fs_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + case ir_unop_sign: + temp = fs_reg(this, ir->type); + + /* Unalias the destination. (imagine a = sign(a)) */ + this->result = fs_reg(this, ir->type); + + emit(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)); + inst->predicated = true; + + inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)); + inst->predicated = true; + + break; + case ir_unop_rcp: + emit_math(FS_OPCODE_RCP, this->result, op[0]); + break; + + case ir_unop_exp2: + emit_math(FS_OPCODE_EXP2, this->result, op[0]); + break; + case ir_unop_log2: + emit_math(FS_OPCODE_LOG2, this->result, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(FS_OPCODE_SIN, this->result, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(FS_OPCODE_COS, this->result, op[0]); + break; + + case ir_unop_dFdx: + emit(FS_OPCODE_DDX, this->result, op[0]); + break; + case ir_unop_dFdy: + emit(FS_OPCODE_DDY, this->result, op[0]); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, this->result, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + break; + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)); + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, this->result, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, this->result, op[0], op[1]); + break; + + case ir_binop_dot: + case ir_unop_any: + assert(!"not reached: should be handled by brw_fs_channel_expressions"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + + case ir_unop_sqrt: + emit_math(FS_OPCODE_SQRT, this->result, op[0]); + break; + + case ir_unop_rsq: + emit_math(FS_OPCODE_RSQ, this->result, op[0]); + break; + + case ir_unop_i2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, this->result, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, this->result, this->result, fs_reg(1)); + break; + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, this->result, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, this->result, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, this->result, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, this->result, op[0]); + break; + + case ir_binop_min: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + } else { + /* Unalias the destination */ + this->result = fs_reg(this, ir->type); + + inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); + inst->predicated = true; + } + break; + case ir_binop_max: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_GE; + } else { + /* Unalias the destination */ + this->result = fs_reg(this, ir->type); + + inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]); + inst->predicated = true; + } + break; + + case ir_binop_pow: + emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, this->result, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, this->result, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, this->result, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, this->result, op[0], op[1]); + break; + + case ir_unop_u2f: + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + } +} + +void +fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, + const glsl_type *type, bool predicated) +{ + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->components(); i++) { + l.type = brw_type_for_base_type(type); + r.type = brw_type_for_base_type(type); + + if (predicated || !l.equals(&r)) { + fs_inst *inst = emit(BRW_OPCODE_MOV, l, r); + inst->predicated = predicated; + } + + l.reg_offset++; + r.reg_offset++; + } + break; + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + emit_assignment_writes(l, r, type->fields.array, predicated); + } + break; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + emit_assignment_writes(l, r, type->fields.structure[i].type, + predicated); + } + break; + + case GLSL_TYPE_SAMPLER: + break; + + default: + assert(!"not reached"); + break; + } +} + +void +fs_visitor::visit(ir_assignment *ir) +{ + struct fs_reg l, r; + fs_inst *inst; + + /* FINISHME: arrays on the lhs */ + this->result = reg_undef; + ir->lhs->accept(this); + l = this->result; + + /* If we're doing a direct assignment, an RHS expression could + * drop its result right into our destination. Otherwise, tell it + * not to. + */ + if (ir->condition || + !(ir->lhs->type->is_scalar() || + (ir->lhs->type->is_vector() && + ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1))) { + this->result = reg_undef; + } + + ir->rhs->accept(this); + r = this->result; + + assert(l.file != BAD_FILE); + assert(r.file != BAD_FILE); + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + if (ir->lhs->type->is_scalar() || + ir->lhs->type->is_vector()) { + for (int i = 0; i < ir->lhs->type->vector_elements; i++) { + if (ir->write_mask & (1 << i)) { + if (ir->condition) { + inst = emit(BRW_OPCODE_MOV, l, r); + inst->predicated = true; + } else if (!l.equals(&r)) { + inst = emit(BRW_OPCODE_MOV, l, r); + } + + r.reg_offset++; + } + l.reg_offset++; + } + } else { + emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL); + } +} + +fs_inst * +fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler) +{ + int mlen; + int base_mrf = 1; + bool simd16 = false; + fs_reg orig_dst; + + /* g0 header. */ + mlen = 1; + + if (ir->shadow_comparitor) { + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, base_mrf + mlen + i), coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + + coordinate.reg_offset++; + } + /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ + mlen += 3; + + if (ir->op == ir_tex) { + /* There's no plain shadow compare message, so we use shadow + * compare with a bias of 0.0. + */ + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)); + mlen++; + } else if (ir->op == ir_txb) { + this->result = reg_undef; + ir->lod_info.bias->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen++; + } else { + assert(ir->op == ir_txl); + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen++; + } + + this->result = reg_undef; + ir->shadow_comparitor->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen++; + } else if (ir->op == ir_tex) { + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + } + /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ + mlen += 3; + } else if (ir->op == ir_txd) { + assert(!"TXD isn't supported on gen4 yet."); + } else { + /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod + * instructions. We'll need to do SIMD16 here. + */ + assert(ir->op == ir_txb || ir->op == ir_txl); + + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, + base_mrf + mlen + i * 2), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + } + + /* lod/bias appears after u/v/r. */ + mlen += 6; + + if (ir->op == ir_txb) { + this->result = reg_undef; + ir->lod_info.bias->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen++; + } else { + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen++; + } + + /* The unused upper half. */ + mlen++; + + /* Now, since we're doing simd16, the return is 2 interleaved + * vec4s where the odd-indexed ones are junk. We'll need to move + * this weirdness around to the expected layout. + */ + simd16 = true; + orig_dst = dst; + dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, + 2)); + dst.type = BRW_REGISTER_TYPE_F; + } + + fs_inst *inst = NULL; + switch (ir->op) { + case ir_tex: + inst = emit(FS_OPCODE_TEX, dst); + break; + case ir_txb: + inst = emit(FS_OPCODE_TXB, dst); + break; + case ir_txl: + inst = emit(FS_OPCODE_TXL, dst); + break; + case ir_txd: + inst = emit(FS_OPCODE_TXD, dst); + break; + case ir_txf: + assert(!"GLSL 1.30 features unsupported"); + break; + } + inst->base_mrf = base_mrf; + inst->mlen = mlen; + inst->header_present = true; + + if (simd16) { + for (int i = 0; i < 4; i++) { + emit(BRW_OPCODE_MOV, orig_dst, dst); + orig_dst.reg_offset++; + dst.reg_offset += 2; + } + } + + return inst; +} + +/* gen5's sampler has slots for u, v, r, array index, then optional + * parameters like shadow comparitor or LOD bias. If optional + * parameters aren't present, those base slots are optional and don't + * need to be included in the message. + * + * We don't fill in the unnecessary slots regardless, which may look + * surprising in the disassembly. + */ +fs_inst * +fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler) +{ + int mlen = 0; + int base_mrf = 2; + int reg_width = c->dispatch_width / 8; + bool header_present = false; + + if (ir->offset) { + /* The offsets set up by the ir_texture visitor are in the + * m1 header, so we can't go headerless. + */ + header_present = true; + mlen++; + base_mrf--; + } + + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, base_mrf + mlen + i * reg_width), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + } + mlen += ir->coordinate->type->vector_elements * reg_width; + + if (ir->shadow_comparitor) { + mlen = MAX2(mlen, header_present + 4 * reg_width); + + this->result = reg_undef; + ir->shadow_comparitor->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen += reg_width; + } + + fs_inst *inst = NULL; + switch (ir->op) { + case ir_tex: + inst = emit(FS_OPCODE_TEX, dst); + break; + case ir_txb: + this->result = reg_undef; + ir->lod_info.bias->accept(this); + mlen = MAX2(mlen, header_present + 4 * reg_width); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen += reg_width; + + inst = emit(FS_OPCODE_TXB, dst); + + break; + case ir_txl: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + mlen = MAX2(mlen, header_present + 4 * reg_width); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen += reg_width; + + inst = emit(FS_OPCODE_TXL, dst); + break; + case ir_txd: + case ir_txf: + assert(!"GLSL 1.30 features unsupported"); + break; + } + inst->base_mrf = base_mrf; + inst->mlen = mlen; + inst->header_present = header_present; + + if (mlen > 11) { + fail("Message length >11 disallowed by hardware\n"); + } + + return inst; +} + +fs_inst * +fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, + int sampler) +{ + int mlen = 0; + int base_mrf = 2; + int reg_width = c->dispatch_width / 8; + bool header_present = false; + + if (ir->offset) { + /* The offsets set up by the ir_texture visitor are in the + * m1 header, so we can't go headerless. + */ + header_present = true; + mlen++; + base_mrf--; + } + + if (ir->shadow_comparitor) { + ir->shadow_comparitor->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen += reg_width; + } + + /* Set up the LOD info */ + switch (ir->op) { + case ir_tex: + break; + case ir_txb: + ir->lod_info.bias->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen += reg_width; + break; + case ir_txl: + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); + mlen += reg_width; + break; + case ir_txd: + case ir_txf: + assert(!"GLSL 1.30 features unsupported"); + break; + } + + /* Set up the coordinate */ + for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), + coordinate); + if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler)) + inst->saturate = true; + coordinate.reg_offset++; + mlen += reg_width; + } + + /* Generate the SEND */ + fs_inst *inst = NULL; + switch (ir->op) { + case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break; + case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; + case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; + case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; + case ir_txf: assert(!"TXF unsupported."); + } + inst->base_mrf = base_mrf; + inst->mlen = mlen; + inst->header_present = header_present; + + if (mlen > 11) { + fail("Message length >11 disallowed by hardware\n"); + } + + return inst; +} + +void +fs_visitor::visit(ir_texture *ir) +{ + int sampler; + fs_inst *inst = NULL; + + this->result = reg_undef; + ir->coordinate->accept(this); + fs_reg coordinate = this->result; + + if (ir->offset != NULL) { + ir_constant *offset = ir->offset->as_constant(); + assert(offset != NULL); + + signed char offsets[3]; + for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) + offsets[i] = (signed char) offset->value.i[i]; + + /* Combine all three offsets into a single unsigned dword: + * + * bits 11:8 - U Offset (X component) + * bits 7:4 - V Offset (Y component) + * bits 3:0 - R Offset (Z component) + */ + unsigned offset_bits = 0; + for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { + const unsigned shift = 4 * (2 - i); + offset_bits |= (offsets[i] << shift) & (0xF << shift); + } + + /* Explicitly set up the message header by copying g0 to msg reg m1. */ + emit(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), + fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD)); + + /* Then set the offset bits in DWord 2 of the message header. */ + emit(BRW_OPCODE_MOV, + fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), + BRW_REGISTER_TYPE_UD)), + fs_reg(brw_imm_uw(offset_bits))); + } + + /* Should be lowered by do_lower_texture_projection */ + assert(!ir->projector); + + sampler = _mesa_get_sampler_uniform_value(ir->sampler, + prog, + &fp->Base); + sampler = fp->Base.SamplerUnits[sampler]; + + /* The 965 requires the EU to do the normalization of GL rectangle + * texture coordinates. We use the program parameter state + * tracking to get the scaling factor. + */ + if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { + struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; + int tokens[STATE_LENGTH] = { + STATE_INTERNAL, + STATE_TEXRECT_SCALE, + sampler, + 0, + 0 + }; + + if (c->dispatch_width == 16) { + fail("rectangle scale uniform setup not supported on 16-wide\n"); + this->result = fs_reg(this, ir->type); + return; + } + + c->prog_data.param_convert[c->prog_data.nr_params] = + PARAM_NO_CONVERT; + c->prog_data.param_convert[c->prog_data.nr_params + 1] = + PARAM_NO_CONVERT; + + fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); + fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); + GLuint index = _mesa_add_state_reference(params, + (gl_state_index *)tokens); + + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 0; + c->prog_data.nr_params++; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 1; + c->prog_data.nr_params++; + + fs_reg dst = fs_reg(this, ir->coordinate->type); + fs_reg src = coordinate; + coordinate = dst; + + emit(BRW_OPCODE_MUL, dst, src, scale_x); + dst.reg_offset++; + src.reg_offset++; + emit(BRW_OPCODE_MUL, dst, src, scale_y); + } + + /* Writemasking doesn't eliminate channels on SIMD8 texture + * samples, so don't worry about them. + */ + fs_reg dst = fs_reg(this, glsl_type::vec4_type); + + if (intel->gen >= 7) { + inst = emit_texture_gen7(ir, dst, coordinate, sampler); + } else if (intel->gen >= 5) { + inst = emit_texture_gen5(ir, dst, coordinate, sampler); + } else { + inst = emit_texture_gen4(ir, dst, coordinate, sampler); + } + + /* If there's an offset, we already set up m1. To avoid the implied move, + * use the null register. Otherwise, we want an implied move from g0. + */ + if (ir->offset != NULL || !inst->header_present) + inst->src[0] = reg_undef; + else + inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + inst->sampler = sampler; + + this->result = dst; + + if (ir->shadow_comparitor) + inst->shadow_compare = true; + + if (ir->type == glsl_type::float_type) { + /* Ignore DEPTH_TEXTURE_MODE swizzling. */ + assert(ir->sampler->type->sampler_shadow); + } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { + fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); + + for (int i = 0; i < 4; i++) { + int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i); + fs_reg l = swizzle_dst; + l.reg_offset += i; + + if (swiz == SWIZZLE_ZERO) { + emit(BRW_OPCODE_MOV, l, fs_reg(0.0f)); + } else if (swiz == SWIZZLE_ONE) { + emit(BRW_OPCODE_MOV, l, fs_reg(1.0f)); + } else { + fs_reg r = dst; + r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i); + emit(BRW_OPCODE_MOV, l, r); + } + } + this->result = swizzle_dst; + } +} + +void +fs_visitor::visit(ir_swizzle *ir) +{ + this->result = reg_undef; + ir->val->accept(this); + fs_reg val = this->result; + + if (ir->type->vector_elements == 1) { + this->result.reg_offset += ir->mask.x; + return; + } + + fs_reg result = fs_reg(this, ir->type); + this->result = result; + + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + fs_reg channel = val; + int swiz = 0; + + switch (i) { + case 0: + swiz = ir->mask.x; + break; + case 1: + swiz = ir->mask.y; + break; + case 2: + swiz = ir->mask.z; + break; + case 3: + swiz = ir->mask.w; + break; + } + + channel.reg_offset += swiz; + emit(BRW_OPCODE_MOV, result, channel); + result.reg_offset++; + } +} + +void +fs_visitor::visit(ir_discard *ir) +{ + assert(ir->condition == NULL); /* FINISHME */ + + emit(FS_OPCODE_DISCARD); + kill_emitted = true; +} + +void +fs_visitor::visit(ir_constant *ir) +{ + /* Set this->result to reg at the bottom of the function because some code + * paths will cause this visitor to be applied to other fields. This will + * cause the value stored in this->result to be modified. + * + * Make reg constant so that it doesn't get accidentally modified along the + * way. Yes, I actually had this problem. :( + */ + const fs_reg reg(this, ir->type); + fs_reg dst_reg = reg; + + if (ir->type->is_array()) { + const unsigned size = type_size(ir->type->fields.array); + + for (unsigned i = 0; i < ir->type->length; i++) { + this->result = reg_undef; + ir->array_elements[i]->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(BRW_OPCODE_MOV, dst_reg, src_reg); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else if (ir->type->is_record()) { + foreach_list(node, &ir->components) { + ir_instruction *const field = (ir_instruction *) node; + const unsigned size = type_size(field->type); + + this->result = reg_undef; + field->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(BRW_OPCODE_MOV, dst_reg, src_reg); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else { + const unsigned size = type_size(ir->type); + + for (unsigned i = 0; i < size; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + dst_reg.reg_offset++; + } + } + + this->result = reg; +} + +void +fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + fs_reg op[2]; + fs_inst *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + this->result = reg_undef; + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], fs_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, reg_null_d, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, reg_null_d, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, reg_null_d, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, reg_null_f, op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, reg_null_d, op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + fail("bad cond code\n"); + break; + } + return; + } + + this->result = reg_undef; + ir->accept(this); + + if (intel->gen >= 6) { + fs_inst *inst = emit(BRW_OPCODE_AND, reg_null_d, this->result, fs_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + fs_inst *inst = emit(BRW_OPCODE_MOV, reg_null_d, this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +fs_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + fs_reg op[2]; + fs_inst *inst; + fs_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + this->result = reg_undef; + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, temp, op[0], fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = fs_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = fs_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + fail("bad condition\n"); + return; + } + return; + } + + this->result = reg_undef; + ir->condition->accept(this); + + fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +fs_visitor::visit(ir_if *ir) +{ + fs_inst *inst; + + if (intel->gen != 6 && c->dispatch_width == 16) { + fail("Can't support (non-uniform) control flow on 16-wide\n"); + } + + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ + this->base_ir = ir->condition; + + if (intel->gen == 6) { + emit_if_gen6(ir); + } else { + emit_bool_to_cond_code(ir->condition); + + inst = emit(BRW_OPCODE_IF); + inst->predicated = true; + } + + foreach_iter(exec_list_iterator, iter, ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)iter.get(); + this->base_ir = ir; + this->result = reg_undef; + ir->accept(this); + } + + if (!ir->else_instructions.is_empty()) { + emit(BRW_OPCODE_ELSE); + + foreach_iter(exec_list_iterator, iter, ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)iter.get(); + this->base_ir = ir; + this->result = reg_undef; + ir->accept(this); + } + } + + emit(BRW_OPCODE_ENDIF); +} + +void +fs_visitor::visit(ir_loop *ir) +{ + fs_reg counter = reg_undef; + + if (c->dispatch_width == 16) { + fail("Can't support (non-uniform) control flow on 16-wide\n"); + } + + if (ir->counter) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); + + if (ir->from) { + this->result = counter; + + this->base_ir = ir->from; + this->result = counter; + ir->from->accept(this); + + if (!this->result.equals(&counter)) + emit(BRW_OPCODE_MOV, counter, this->result); + } + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + this->base_ir = ir->to; + this->result = reg_undef; + ir->to->accept(this); + + fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result); + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); + + inst = emit(BRW_OPCODE_BREAK); + inst->predicated = true; + } + + foreach_iter(exec_list_iterator, iter, ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)iter.get(); + + this->base_ir = ir; + this->result = reg_undef; + ir->accept(this); + } + + if (ir->increment) { + this->base_ir = ir->increment; + this->result = reg_undef; + ir->increment->accept(this); + emit(BRW_OPCODE_ADD, counter, counter, this->result); + } + + emit(BRW_OPCODE_WHILE); +} + +void +fs_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + +void +fs_visitor::visit(ir_call *ir) +{ + assert(!"FINISHME"); +} + +void +fs_visitor::visit(ir_return *ir) +{ + assert(!"FINISHME"); +} + +void +fs_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined before we get to ir_to_mesa. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + foreach_iter(exec_list_iterator, iter, sig->body) { + ir_instruction *ir = (ir_instruction *)iter.get(); + this->base_ir = ir; + this->result = reg_undef; + ir->accept(this); + } + } +} + +void +fs_visitor::visit(ir_function_signature *ir) +{ + assert(!"not reached"); + (void)ir; +} + +fs_inst * +fs_visitor::emit(fs_inst inst) +{ + fs_inst *list_inst = new(mem_ctx) fs_inst; + *list_inst = inst; + + if (force_uncompressed_stack > 0) + list_inst->force_uncompressed = true; + else if (force_sechalf_stack > 0) + list_inst->force_sechalf = true; + + list_inst->annotation = this->current_annotation; + list_inst->ir = this->base_ir; + + this->instructions.push_tail(list_inst); + + return list_inst; +} + +/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ +void +fs_visitor::emit_dummy_fs() +{ + /* Everyone's favorite color. */ + emit(BRW_OPCODE_MOV, fs_reg(MRF, 2), fs_reg(1.0f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 3), fs_reg(0.0f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 4), fs_reg(1.0f)); + emit(BRW_OPCODE_MOV, fs_reg(MRF, 5), fs_reg(0.0f)); + + fs_inst *write; + write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0)); + write->base_mrf = 0; +} + +/* The register location here is relative to the start of the URB + * data. It will get adjusted to be a real location before + * generate_code() time. + */ +struct brw_reg +fs_visitor::interp_reg(int location, int channel) +{ + int regnr = urb_setup[location] * 2 + channel / 2; + int stride = (channel & 1) * 4; + + assert(urb_setup[location] != -1); + + return brw_vec1_grf(regnr, stride); +} + +/** Emits the interpolation for the varying inputs. */ +void +fs_visitor::emit_interpolation_setup_gen4() +{ + this->current_annotation = "compute pixel centers"; + this->pixel_x = fs_reg(this, glsl_type::uint_type); + this->pixel_y = fs_reg(this, glsl_type::uint_type); + this->pixel_x.type = BRW_REGISTER_TYPE_UW; + this->pixel_y.type = BRW_REGISTER_TYPE_UW; + + emit(FS_OPCODE_PIXEL_X, this->pixel_x); + emit(FS_OPCODE_PIXEL_Y, this->pixel_y); + + this->current_annotation = "compute pixel deltas from v0"; + if (brw->has_pln) { + this->delta_x = fs_reg(this, glsl_type::vec2_type); + this->delta_y = this->delta_x; + this->delta_y.reg_offset++; + } else { + this->delta_x = fs_reg(this, glsl_type::float_type); + this->delta_y = fs_reg(this, glsl_type::float_type); + } + emit(BRW_OPCODE_ADD, this->delta_x, + this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))); + emit(BRW_OPCODE_ADD, this->delta_y, + this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); + + this->current_annotation = "compute pos.w and 1/pos.w"; + /* Compute wpos.w. It's always in our setup, since it's needed to + * interpolate the other attributes. + */ + this->wpos_w = fs_reg(this, glsl_type::float_type); + emit(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 3)); + /* Compute the pixel 1/W value from wpos.w. */ + this->pixel_w = fs_reg(this, glsl_type::float_type); + emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + this->current_annotation = NULL; +} + +/** Emits the interpolation for the varying inputs. */ +void +fs_visitor::emit_interpolation_setup_gen6() +{ + struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + /* If the pixel centers end up used, the setup is the same as for gen4. */ + this->current_annotation = "compute pixel centers"; + fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type); + fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type); + int_pixel_x.type = BRW_REGISTER_TYPE_UW; + int_pixel_y.type = BRW_REGISTER_TYPE_UW; + emit(BRW_OPCODE_ADD, + int_pixel_x, + fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), + fs_reg(brw_imm_v(0x10101010))); + emit(BRW_OPCODE_ADD, + int_pixel_y, + fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), + fs_reg(brw_imm_v(0x11001100))); + + /* As of gen6, we can no longer mix float and int sources. We have + * to turn the integer pixel centers into floats for their actual + * use. + */ + this->pixel_x = fs_reg(this, glsl_type::float_type); + this->pixel_y = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, this->pixel_x, int_pixel_x); + emit(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y); + + this->current_annotation = "compute pos.w"; + this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); + this->wpos_w = fs_reg(this, glsl_type::float_type); + emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + + this->delta_x = fs_reg(brw_vec8_grf(2, 0)); + this->delta_y = fs_reg(brw_vec8_grf(3, 0)); + + this->current_annotation = NULL; +} + +void +fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) +{ + int reg_width = c->dispatch_width / 8; + + if (c->dispatch_width == 8 || intel->gen == 6) { + /* SIMD8 write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * + * gen6 SIMD16 DP write looks like: + * m + 0: r0 + * m + 1: r1 + * m + 2: g0 + * m + 3: g1 + * m + 4: b0 + * m + 5: b1 + * m + 6: a0 + * m + 7: a1 + */ + emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), + color); + } else { + /* pre-gen6 SIMD16 single source DP write looks like: + * m + 0: r0 + * m + 1: g0 + * m + 2: b0 + * m + 3: a0 + * m + 4: r1 + * m + 5: g1 + * m + 6: b1 + * m + 7: a1 + */ + if (brw->has_compr4) { + /* By setting the high bit of the MRF register number, we + * indicate that we want COMPR4 mode - instead of doing the + * usual destination + 1 for the second half we get + * destination + 4. + */ + emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + } else { + push_force_uncompressed(); + emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + pop_force_uncompressed(); + + push_force_sechalf(); + color.sechalf = true; + emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + pop_force_sechalf(); + color.sechalf = false; + } + } +} + +void +fs_visitor::emit_fb_writes() +{ + this->current_annotation = "FB write header"; + GLboolean header_present = GL_TRUE; + int nr = 0; + int reg_width = c->dispatch_width / 8; + + if (intel->gen >= 6 && + !this->kill_emitted && + c->key.nr_color_regions == 1) { + header_present = false; + } + + if (header_present) { + /* m0, m1 header */ + nr += 2; + } + + if (c->aa_dest_stencil_reg) { + push_force_uncompressed(); + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))); + pop_force_uncompressed(); + } + + /* Reserve space for color. It'll be filled in per MRT below. */ + int color_mrf = nr; + nr += 4 * reg_width; + + if (c->source_depth_to_render_target) { + if (intel->gen == 6 && c->dispatch_width == 16) { + /* For outputting oDepth on gen6, SIMD8 writes have to be + * used. This would require 8-wide moves of each half to + * message regs, kind of like pre-gen5 SIMD16 FB writes. + * Just bail on doing so for now. + */ + fail("Missing support for simd16 depth writes on gen6\n"); + } + + if (c->computes_depth) { + /* Hand over gl_FragDepth. */ + assert(this->frag_depth); + fs_reg depth = *(variable_storage(this->frag_depth)); + + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth); + } else { + /* Pass through the payload depth. */ + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), + fs_reg(brw_vec8_grf(c->source_depth_reg, 0))); + } + nr += reg_width; + } + + if (c->dest_depth_reg) { + emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))); + nr += reg_width; + } + + fs_reg color = reg_undef; + if (this->frag_color) + color = *(variable_storage(this->frag_color)); + else if (this->frag_data) { + color = *(variable_storage(this->frag_data)); + color.type = BRW_REGISTER_TYPE_F; + } + + for (int target = 0; target < c->key.nr_color_regions; target++) { + this->current_annotation = ralloc_asprintf(this->mem_ctx, + "FB write target %d", + target); + if (this->frag_color || this->frag_data) { + for (int i = 0; i < 4; i++) { + emit_color_write(i, color_mrf, color); + color.reg_offset++; + } + } + + if (this->frag_color) + color.reg_offset -= 4; + + fs_inst *inst = emit(FS_OPCODE_FB_WRITE); + inst->target = target; + inst->base_mrf = 0; + inst->mlen = nr; + if (target == c->key.nr_color_regions - 1) + inst->eot = true; + inst->header_present = header_present; + } + + if (c->key.nr_color_regions == 0) { + if (c->key.alpha_test && (this->frag_color || this->frag_data)) { + /* If the alpha test is enabled but there's no color buffer, + * we still need to send alpha out the pipeline to our null + * renderbuffer. + */ + color.reg_offset += 3; + emit_color_write(3, color_mrf, color); + } + + fs_inst *inst = emit(FS_OPCODE_FB_WRITE); + inst->base_mrf = 0; + inst->mlen = nr; + inst->eot = true; + inst->header_present = header_present; + } + + this->current_annotation = NULL; +} diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 9b82fe159f9..c2227777cfb 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -181,9 +181,6 @@ static void upload_sf_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); - /* _NEW_HINT */ - key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); - /* _NEW_POLYGON */ if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index e525c730d3f..be32085c697 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -52,7 +52,6 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint linear_color:1; /**< linear interp vs. perspective interp */ GLuint sprite_origin_lower_left:1; GLuint pad:24; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 4b2e26cbed7..52a3fb3893d 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -316,7 +316,7 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLbitfield64 persp_mask; GLbitfield64 linear_mask; - if (c->key.do_flat_shading || c->key.linear_color) + if (c->key.do_flat_shading) persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | FRAG_BIT_COL0 | FRAG_BIT_COL1); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp new file mode 100644 index 00000000000..9471883fb2b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -0,0 +1,194 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +extern "C" { +#include "main/macros.h" +#include "brw_context.h" +} +#include "brw_fs.h" +#include "../glsl/ir_optimization.h" +#include "../glsl/ir_print_visitor.h" + +struct gl_shader * +brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) +{ + struct brw_shader *shader; + + shader = rzalloc(NULL, struct brw_shader); + if (shader) { + shader->base.Type = type; + shader->base.Name = name; + _mesa_init_shader(ctx, &shader->base); + } + + return &shader->base; +} + +struct gl_shader_program * +brw_new_shader_program(struct gl_context *ctx, GLuint name) +{ + struct brw_shader_program *prog; + prog = rzalloc(NULL, struct brw_shader_program); + if (prog) { + prog->base.Name = name; + _mesa_init_shader_program(ctx, &prog->base); + } + return &prog->base; +} + +/** + * Performs a compile of the shader stages even when we don't know + * what non-orthogonal state will be set, in the hope that it reflects + * the eventual NOS used, and thus allows us to produce link failures. + */ +bool +brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + if (!brw_fs_precompile(ctx, prog)) + return false; + + return true; +} + +GLboolean +brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + + struct brw_shader *shader = + (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + if (shader != NULL) { + void *mem_ctx = ralloc_context(NULL); + bool progress; + + if (shader->ir) + ralloc_free(shader->ir); + shader->ir = new(shader) exec_list; + clone_ir_list(mem_ctx, shader->ir, shader->base.ir); + + do_mat_op_to_vec(shader->ir); + lower_instructions(shader->ir, + MOD_TO_FRACT | + DIV_TO_MUL_RCP | + SUB_TO_ADD_NEG | + EXP_TO_EXP2 | + LOG_TO_LOG2); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (intel->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + + do_lower_texture_projection(shader->ir); + do_vec_index_to_cond_assign(shader->ir); + brw_do_cubemap_normalize(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); + lower_variable_index_to_cond_assign(shader->ir, + GL_TRUE, /* input */ + GL_TRUE, /* output */ + GL_TRUE, /* temp */ + GL_TRUE /* uniform */ + ); + + do { + progress = false; + + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + + progress = do_lower_jumps(shader->ir, true, true, + true, /* main return */ + false, /* continue */ + false /* loops */ + ) || progress; + + progress = do_common_optimization(shader->ir, true, 32) || progress; + } while (progress); + + validate_ir_tree(shader->ir); + + reparent_ir(shader->ir, shader->ir); + ralloc_free(mem_ctx); + } + + if (!_mesa_ir_link_shader(ctx, prog)) + return GL_FALSE; + + if (!brw_shader_precompile(ctx, prog)) + return GL_FALSE; + + return GL_TRUE; +} + + +int +brw_type_for_base_type(const struct glsl_type *type) +{ + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + return BRW_REGISTER_TYPE_F; + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + return BRW_REGISTER_TYPE_D; + case GLSL_TYPE_UINT: + return BRW_REGISTER_TYPE_UD; + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_SAMPLER: + /* These should be overridden with the type of the member when + * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely + * way to trip up if we don't. + */ + return BRW_REGISTER_TYPE_UD; + default: + assert(!"not reached"); + return BRW_REGISTER_TYPE_F; + } +} + +uint32_t +brw_conditional_for_comparison(unsigned int op) +{ + switch (op) { + case ir_binop_less: + return BRW_CONDITIONAL_L; + case ir_binop_greater: + return BRW_CONDITIONAL_G; + case ir_binop_lequal: + return BRW_CONDITIONAL_LE; + case ir_binop_gequal: + return BRW_CONDITIONAL_GE; + case ir_binop_equal: + case ir_binop_all_equal: /* same as equal for scalars */ + return BRW_CONDITIONAL_Z; + case ir_binop_nequal: + case ir_binop_any_nequal: /* same as nequal for scalars */ + return BRW_CONDITIONAL_NZ; + default: + assert(!"not reached: bad operation for comparison"); + return BRW_CONDITIONAL_NZ; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h new file mode 100644 index 00000000000..4c568a26caa --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -0,0 +1,25 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +int brw_type_for_base_type(const struct glsl_type *type); +uint32_t brw_conditional_for_comparison(unsigned int op); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 3a3aa8c0346..ef58619702d 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -110,7 +110,7 @@ static void dump_wm_surface_state(struct brw_context *brw) for (i = 0; i < brw->wm.nr_surfaces; i++) { unsigned int surfoff; - struct brw_surface_state *surf; + uint32_t *surf; char name[20]; if (brw->wm.surf_offset[i] == 0) { @@ -118,21 +118,25 @@ static void dump_wm_surface_state(struct brw_context *brw) continue; } surfoff = bo->offset + brw->wm.surf_offset[i]; - surf = (struct brw_surface_state *)(base + brw->wm.surf_offset[i]); + surf = (uint32_t *)(base + brw->wm.surf_offset[i]); sprintf(name, "WM SURF%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1); + state_out(name, surf, surfoff, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss4.min_lod); + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); } drm_intel_bo_unmap(bo); } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index ad31222e9ec..7b9cdba4cbf 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1225,80 +1225,6 @@ struct gen7_sf_clip_viewport { GLfloat pad1[4]; }; -/* Documented in the subsystem/shared-functions/sampler chapter... - * - * vol5c Shared Functions - 1.13.4.1.1 - */ -struct brw_surface_state -{ - struct { - GLuint cube_pos_z:1; - GLuint cube_neg_z:1; - GLuint cube_pos_y:1; - GLuint cube_neg_y:1; - GLuint cube_pos_x:1; - GLuint cube_neg_x:1; - GLuint pad:2; - /* Required on gen6 for surfaces accessed through render cache messages. - */ - GLuint render_cache_read_write:1; - /* Ironlake and newer: instead of replicating one of the texels */ - GLuint cube_corner_average:1; - GLuint mipmap_layout_mode:1; - GLuint vert_line_stride_ofs:1; - GLuint vert_line_stride:1; - GLuint color_blend:1; - GLuint writedisable_blue:1; - GLuint writedisable_green:1; - GLuint writedisable_red:1; - GLuint writedisable_alpha:1; - GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */ - GLuint data_return_format:1; - GLuint pad0:1; - GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ - } ss0; - - struct { - GLuint base_addr; - } ss1; - - struct { - GLuint pad:2; - GLuint mip_count:4; - GLuint width:13; - GLuint height:13; - } ss2; - - struct { - GLuint tile_walk:1; - GLuint tiled_surface:1; - GLuint pad:1; - GLuint pitch:18; - GLuint depth:11; - } ss3; - - struct { - GLuint multisample_position_palette_index:3; - GLuint pad1:1; - GLuint num_multisamples:3; - GLuint pad0:1; - GLuint render_target_view_extent:9; - GLuint min_array_elt:11; - GLuint min_lod:4; - } ss4; - - struct { - GLuint pad1:16; - GLuint cache_control:2; - GLuint gfdt:1; - GLuint encrypt:1; - GLuint y_offset:4; - GLuint pad0:1; - GLuint x_offset:7; - } ss5; /* New in G4X */ - -}; - /* volume 5c Shared Functions - 1.13.4.1.2 */ struct gen7_surface_state { diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 9ac0713a1d3..4a3a2bfada2 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -61,7 +61,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, */ h0 = ALIGN(mt->height0, align_h); h1 = ALIGN(minify(h0), align_h); - qpitch = (h0 + h1 + 11 * align_h); + qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; @@ -152,9 +152,6 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * in the texture surfaces run, so they may be "vertical" through * memory. As a result, the docs say in Surface Padding Requirements: * Sampling Engine Surfaces that two extra rows of padding are required. - * We don't know of similar requirements for pre-965, but given that - * those docs are silent on padding requirements in general, let's play - * it safe. */ if (mt->target == GL_TEXTURE_CUBE_MAP) mt->total_height += 2; diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 47cc0a7da7a..9fdfebe9f76 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,11 +194,19 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; + /* If we're going to go through brw_fs.cpp, we don't end up using + * brw->wm.input_size_masks. + */ + if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) + return; + memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -238,7 +246,9 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_INPUT_DIMENSIONS), .cache = 0 }, .prepare = calc_wm_input_sizes diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 9f99ef57214..69650e1df77 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -136,6 +136,16 @@ static void brw_invalidate_state( struct intel_context *intel, GLuint new_state /* nothing */ } +/** + * \see intel_context.vtbl.is_hiz_depth_format + */ +static bool brw_is_hiz_depth_format(struct intel_context *intel, + gl_format format) +{ + /* In the future, this will support Z_FLOAT32. */ + return intel->has_hiz && (format == MESA_FORMAT_X8_Z24); +} + void brwInitVtbl( struct brw_context *brw ) { @@ -152,4 +162,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.debug_batch = brw_debug_batch; brw->intel.vtbl.render_target_supported = brw_render_target_supported; + brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 907976295ab..1aebd12df49 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -34,6 +34,7 @@ #include "brw_state.h" #include "main/formats.h" #include "main/samplerobj.h" +#include "program/prog_parameter.h" #include "../glsl/ralloc.h" @@ -115,7 +116,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_pass2(c); /* how many general-purpose registers are used */ - c->prog_data.total_grf = c->max_wm_grf; + c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf); /* Emit GEN4 code. */ @@ -184,9 +185,10 @@ brw_wm_payload_setup(struct brw_context *brw, * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ -static void do_wm_prog( struct brw_context *brw, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) +bool do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) { struct intel_context *intel = &brw->intel; struct brw_wm_compile *c; @@ -202,7 +204,7 @@ static void do_wm_prog( struct brw_context *brw, * without triggering a segfault, no way to signal, * so just return. */ - return; + return false; } c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); @@ -226,7 +228,10 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func, c); - if (!brw_wm_fs_emit(brw, c)) { + if (prog && prog->FragmentProgram) { + if (!brw_wm_fs_emit(brw, c, prog)) + return false; + } else { /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); @@ -274,6 +279,8 @@ static void do_wm_prog( struct brw_context *brw, program, program_size, &c->prog_data, sizeof(c->prog_data), &brw->wm.prog_data); + + return true; } @@ -355,9 +362,6 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_HINT */ - key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); - /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ key->clamp_fragment_color = ctx->Color._ClampFragmentColor; @@ -426,9 +430,6 @@ static void brw_wm_populate_key( struct brw_context *brw, } } - /* Shadow */ - key->shadowtex_mask = fp->program.Base.ShadowSamplers; - /* _NEW_BUFFERS */ /* * Include the draw buffer origin and height so that we can calculate @@ -468,6 +469,8 @@ static void brw_wm_populate_key( struct brw_context *brw, static void brw_prepare_wm_prog(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; @@ -480,8 +483,11 @@ static void brw_prepare_wm_prog(struct brw_context *brw) brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, &key, sizeof(key), &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) - do_wm_prog(brw, fp, &key); + if (brw->wm.prog_bo == NULL) { + bool success = do_wm_prog(brw, ctx->Shader.CurrentFragmentProgram, fp, + &key); + assert(success); + } } @@ -489,7 +495,6 @@ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | - _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 8ab531bdf87..e244b55a083 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -59,16 +59,16 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { + uint8_t iz_lookup; GLuint stats_wm:1; GLuint flat_shade:1; - GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint nr_color_regions:5; GLuint render_to_fbo:1; GLuint alpha_test:1; GLuint clamp_fragment_color:1; + GLuint line_aa:2; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ - GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ uint16_t gl_clamp_mask[3]; @@ -76,8 +76,6 @@ struct brw_wm_prog_key { GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; GLushort drawable_height; GLbitfield64 vp_outputs_written; - GLuint iz_lookup; - GLuint line_aa; GLuint program_string_id:32; }; @@ -314,7 +312,8 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz(struct intel_context *intel, struct brw_wm_compile *c); -bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); +bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, + struct gl_shader_program *prog); /* brw_wm_emit.c */ void emit_alu1(struct brw_compile *p, @@ -476,5 +475,9 @@ bool brw_color_buffer_write_enabled(struct brw_context *brw); bool brw_render_target_supported(gl_format format); void brw_wm_payload_setup(struct brw_context *brw, struct brw_wm_compile *c); +bool do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 9ddbee2edf4..59dcda7b414 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -417,25 +417,14 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - if (c->key.linear_color) { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); - } - else { - /* perspective-corrected color interpolation */ - emit_op(c, - WM_PINTERP, - dst, - 0, - interp, - deltas, - get_pixel_w(c)); - } + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); } break; case FRAG_ATTRIB_FOGC: diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index a356711470a..ef98f8126dc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -91,8 +91,8 @@ brw_prepare_wm_unit(struct brw_context *brw) } /* CACHE_NEW_WM_PROG */ - wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; - wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 - 1; + wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks; + wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16; wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ /* reloc */ wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset + diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index de1953ed600..6c1eba69d4b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -200,22 +200,16 @@ translate_tex_format(gl_format mesa_format, } } -static void -brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) +static uint32_t +brw_get_surface_tiling_bits(uint32_t tiling) { switch (tiling) { - case I915_TILING_NONE: - surf->ss3.tiled_surface = 0; - surf->ss3.tile_walk = 0; - break; case I915_TILING_X: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; - break; + return BRW_SURFACE_TILED; case I915_TILING_Y: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; - break; + return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; + default: + return 0; } } @@ -228,46 +222,36 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); const GLuint surf_index = SURF_INDEX_TEXTURE(unit); - struct brw_surface_state *surf; + uint32_t *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[surf_index]); - memset(surf, 0, sizeof(*surf)); + surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[surf_index]); - surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf->ss0.surface_type = translate_tex_target(tObj->Target); - surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat, - firstImage->InternalFormat, - sampler->DepthMode, - sampler->sRGBDecode); + surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | + BRW_SURFACE_CUBEFACE_ENABLES | + (translate_tex_format(firstImage->TexFormat, + firstImage->InternalFormat, + sampler->DepthMode, + sampler->sRGBDecode) << + BRW_SURFACE_FORMAT_SHIFT)); - /* This is ok for all textures with channel width 8bit or less: - */ -/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - surf->ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */ + surf[1] = intelObj->mt->region->buffer->offset; /* reloc */ - surf->ss2.mip_count = intelObj->_MaxLevel - tObj->BaseLevel; - surf->ss2.width = firstImage->Width - 1; - surf->ss2.height = firstImage->Height - 1; - brw_set_surface_tiling(surf, intelObj->mt->region->tiling); - surf->ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1; - surf->ss3.depth = firstImage->Depth - 1; + surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT | + (firstImage->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | + (firstImage->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); - surf->ss4.min_lod = 0; - - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - surf->ss0.cube_pos_x = 1; - surf->ss0.cube_pos_y = 1; - surf->ss0.cube_pos_z = 1; - surf->ss0.cube_neg_x = 1; - surf->ss0.cube_neg_y = 1; - surf->ss0.cube_neg_z = 1; - } + surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) | + (firstImage->Depth - 1) << BRW_SURFACE_DEPTH_SHIFT | + ((intelObj->mt->region->pitch * intelObj->mt->cpp) - 1) << + BRW_SURFACE_PITCH_SHIFT); + + surf[4] = 0; + surf[5] = 0; /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->wm.surf_offset[surf_index] + - offsetof(struct brw_surface_state, ss1), + brw->wm.surf_offset[surf_index] + 4, intelObj->mt->region->buffer, 0, I915_GEM_DOMAIN_SAMPLER, 0); } @@ -284,34 +268,34 @@ brw_create_constant_surface(struct brw_context *brw, { struct intel_context *intel = &brw->intel; const GLint w = width - 1; - struct brw_surface_state *surf; + uint32_t *surf; - surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); - memset(surf, 0, sizeof(*surf)); + surf = brw_state_batch(brw, 6 * 4, 32, out_offset); - surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf->ss0.surface_type = BRW_SURFACE_BUFFER; - surf->ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT); if (intel->gen >= 6) - surf->ss0.render_cache_read_write = 1; + surf[0] |= BRW_SURFACE_RC_READ_WRITE; + + surf[1] = bo->offset; /* reloc */ - assert(bo); - surf->ss1.base_addr = bo->offset; /* reloc */ + surf[2] = (((w & 0x7f) - 1) << BRW_SURFACE_WIDTH_SHIFT | + (((w >> 7) & 0x1fff) - 1) << BRW_SURFACE_HEIGHT_SHIFT); - surf->ss2.width = w & 0x7f; /* bits 6:0 of size or width */ - surf->ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ - surf->ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf->ss3.pitch = (width * 16) - 1; /* ignored?? */ - brw_set_surface_tiling(surf, I915_TILING_NONE); /* tiling now allowed */ + surf[3] = ((((w >> 20) & 0x7f) - 1) << BRW_SURFACE_DEPTH_SHIFT | + (width * 16 - 1) << BRW_SURFACE_PITCH_SHIFT); + + surf[4] = 0; + surf[5] = 0; /* Emit relocation to surface contents. Section 5.1.1 of the gen4 * bspec ("Data Cache") says that the data cache does not exist as * a separate cache and is just the sampler cache. */ drm_intel_bo_emit_reloc(brw->intel.batch.bo, - (*out_offset + - offsetof(struct brw_surface_state, ss1)), + *out_offset + 4, bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); } @@ -416,23 +400,23 @@ static void brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) { struct intel_context *intel = &brw->intel; - struct brw_surface_state *surf; - - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); - memset(surf, 0, sizeof(*surf)); + uint32_t *surf; - surf->ss0.surface_type = BRW_SURFACE_NULL; - surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); + surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT); if (intel->gen < 6) { - /* _NEW_COLOR */ - surf->ss0.color_blend = 0; - surf->ss0.writedisable_red = 1; - surf->ss0.writedisable_green = 1; - surf->ss0.writedisable_blue = 1; - surf->ss0.writedisable_alpha = 1; + surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT | + 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT | + 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | + 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); } + surf[1] = 0; + surf[2] = 0; + surf[3] = 0; + surf[4] = 0; + surf[5] = 0; } /** @@ -449,12 +433,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_region *region = irb->region; - struct brw_surface_state *surf; + uint32_t *surf; uint32_t tile_x, tile_y; + uint32_t format = 0; - surf = brw_state_batch(brw, sizeof(*surf), 32, - &brw->wm.surf_offset[unit]); - memset(surf, 0, sizeof(*surf)); + surf = brw_state_batch(brw, 6 * 4, 32, &brw->wm.surf_offset[unit]); switch (irb->Base.Format) { case MESA_FORMAT_XRGB8888: @@ -465,7 +448,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is * used. */ - surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; case MESA_FORMAT_INTENSITY_FLOAT32: case MESA_FORMAT_LUMINANCE_FLOAT32: @@ -473,25 +456,35 @@ brw_update_renderbuffer_surface(struct brw_context *brw, * channel into R, which is to say that we just treat them as * GL_RED. */ - surf->ss0.surface_format = BRW_SURFACEFORMAT_R32_FLOAT; + format = BRW_SURFACEFORMAT_R32_FLOAT; break; case MESA_FORMAT_SARGB8: /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB surfaces to the blend/update as sRGB */ if (ctx->Color.sRGBEnabled) - surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format); + format = brw_format_for_mesa_format(irb->Base.Format); else - surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; default: assert(brw_render_target_supported(irb->Base.Format)); - surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format); + format = brw_format_for_mesa_format(irb->Base.Format); } - surf->ss0.surface_type = BRW_SURFACE_2D; + surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | + format << BRW_SURFACE_FORMAT_SHIFT); + /* reloc */ - surf->ss1.base_addr = intel_region_tile_offsets(region, &tile_x, &tile_y); - surf->ss1.base_addr += region->buffer->offset; /* reloc */ + surf[1] = (intel_region_tile_offsets(region, &tile_x, &tile_y) + + region->buffer->offset); + + surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | + (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); + + surf[3] = (brw_get_surface_tiling_bits(region->tiling) | + ((region->pitch * region->cpp) - 1) << BRW_SURFACE_PITCH_SHIFT); + + surf[4] = 0; assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); /* Note that the low bits of these fields are missing, so @@ -499,35 +492,35 @@ brw_update_renderbuffer_surface(struct brw_context *brw, */ assert(tile_x % 4 == 0); assert(tile_y % 2 == 0); - surf->ss5.x_offset = tile_x / 4; - surf->ss5.y_offset = tile_y / 2; - - surf->ss2.width = rb->Width - 1; - surf->ss2.height = rb->Height - 1; - brw_set_surface_tiling(surf, region->tiling); - surf->ss3.pitch = (region->pitch * region->cpp) - 1; + surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | + (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT); if (intel->gen < 6) { /* _NEW_COLOR */ - surf->ss0.color_blend = (!ctx->Color._LogicOpEnabled && - (ctx->Color.BlendEnabled & (1 << unit))); - surf->ss0.writedisable_red = !ctx->Color.ColorMask[unit][0]; - surf->ss0.writedisable_green = !ctx->Color.ColorMask[unit][1]; - surf->ss0.writedisable_blue = !ctx->Color.ColorMask[unit][2]; + if (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))) + surf[0] |= BRW_SURFACE_BLEND_ENABLED; + + if (!ctx->Color.ColorMask[unit][0]) + surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT; + if (!ctx->Color.ColorMask[unit][1]) + surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT; + if (!ctx->Color.ColorMask[unit][2]) + surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT; + /* As mentioned above, disable writes to the alpha component when the * renderbuffer is XRGB. */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - surf->ss0.writedisable_alpha = 1; - else - surf->ss0.writedisable_alpha = !ctx->Color.ColorMask[unit][3]; + if (ctx->DrawBuffer->Visual.alphaBits == 0 || + !ctx->Color.ColorMask[unit][3]) { + surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT; + } } drm_intel_bo_emit_reloc(brw->intel.batch.bo, - brw->wm.surf_offset[unit] + - offsetof(struct brw_surface_state, ss1), + brw->wm.surf_offset[unit] + 4, region->buffer, - surf->ss1.base_addr - region->buffer->offset, + surf[1] - region->buffer->offset, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } @@ -539,16 +532,14 @@ prepare_wm_surfaces(struct brw_context *brw) int i; int nr_surfaces = 0; - if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb ? irb->region : NULL; + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; - if (region) - brw_add_validated_bo(brw, region->buffer); - nr_surfaces = SURF_INDEX_DRAW(i) + 1; - } + if (region) + brw_add_validated_bo(brw, region->buffer); + nr_surfaces = SURF_INDEX_DRAW(i) + 1; } if (brw->wm.const_bo) { @@ -558,10 +549,11 @@ prepare_wm_surfaces(struct brw_context *brw) for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - struct gl_texture_object *tObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); if (texUnit->_ReallyEnabled) { + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + brw_add_validated_bo(brw, intelObj->mt->region->buffer); nr_surfaces = SURF_INDEX_TEXTURE(i) + 1; } diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 6a7add8e562..ae7a1d6c35c 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -33,7 +33,6 @@ static void upload_vs_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2)); diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index ee551ef60d4..7eb50edc6b4 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -93,6 +93,7 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) struct intel_context *intel = intel_context(ctx); struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL; struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL; + bool fb_has_hiz = intel_framebuffer_has_hiz(fb); if (!fb) { /* this can happen during the initial context initialization */ @@ -166,11 +167,11 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) /*** *** Get depth buffer region and check if we need a software fallback. - *** Note that the depth buffer is usually a DEPTH_STENCIL buffer. ***/ if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) { irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped); if (irbDepth && irbDepth->region) { + assert(!fb_has_hiz || irbDepth->Base.Format != MESA_FORMAT_S8_Z24); FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE); depthRegion = irbDepth->region; } @@ -187,13 +188,16 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) /*** *** Stencil buffer - *** This can only be hardware accelerated if we're using a - *** combined DEPTH_STENCIL buffer. ***/ if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped); if (irbStencil && irbStencil->region) { - ASSERT(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); + if (!intel->has_separate_stencil) + assert(irbStencil->Base.Format == MESA_FORMAT_S8_Z24); + if (fb_has_hiz || intel->must_use_separate_stencil) + assert(irbStencil->Base.Format == MESA_FORMAT_S8); + if (irbStencil->Base.Format == MESA_FORMAT_S8) + assert(intel->has_separate_stencil); FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE); } else { @@ -208,8 +212,10 @@ intel_draw_buffer(struct gl_context * ctx, struct gl_framebuffer *fb) /* If we have a (packed) stencil buffer attached but no depth buffer, * we still need to set up the shared depth/stencil state so we can use it. */ - if (depthRegion == NULL && irbStencil && irbStencil->region) + if (depthRegion == NULL && irbStencil && irbStencil->region + && irbStencil->Base.Format == MESA_FORMAT_S8_Z24) { depthRegion = irbStencil->region; + } /* * Update depth and stencil test state @@ -302,18 +308,6 @@ intelReadBuffer(struct gl_context * ctx, GLenum mode) if (!was_front_buffer_reading && intel->is_front_buffer_reading) dri2InvalidateDrawable(intel->driContext->driReadablePriv); } - - if (ctx->ReadBuffer == ctx->DrawBuffer) { - /* This will update FBO completeness status. - * A framebuffer will be incomplete if the GL_READ_BUFFER setting - * refers to a missing renderbuffer. Calling glReadBuffer can set - * that straight and can make the drawing buffer complete. - */ - intel_draw_buffer(ctx, ctx->DrawBuffer); - } - /* Generally, functions which read pixels (glReadPixels, glCopyPixels, etc) - * reference ctx->ReadBuffer and do appropriate state checks. - */ } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 4516db20ffc..2ea52c26106 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -620,6 +620,53 @@ intelInitDriverFunctions(struct dd_function_table *functions) intel_init_syncobj_functions(functions); } +/** + * Override intel->has_hiz with environment variable INTEL_HIZ. + * + * Valid values for INTEL_HIZ are "0" and "1". If an invalid valid value is + * encountered, a warning is emitted and INTEL_HIZ is ignored. + */ +static void +intel_override_hiz(struct intel_context *intel) +{ + const char *s = getenv("INTEL_HIZ"); + if (!s) { + return; + } else if (!strncmp("0", s, 2)) { + intel->has_hiz = false; + } else if (!strncmp("1", s, 2)) { + intel->has_hiz = true; + } else { + _mesa_warning(&intel->ctx, + "env variable INTEL_HIZ=\"%s\" has invalid value and " + "is ignored", s); + } +} + +/** + * Override intel->has_separate_stencil with environment variable + * INTEL_SEPARATE_STENCIL. + * + * Valid values for INTEL_SEPARATE_STENCIL are "0" and "1". If an invalid + * value is encountered, a warning is emitted and INTEL_SEPARATE_STENCIL is + * ignored. + */ +static void +intel_override_separate_stencil(struct intel_context *intel) +{ + const char *s = getenv("INTEL_SEPARATE_STENCIL"); + if (!s) { + return; + } else if (!strncmp("0", s, 2)) { + intel->has_separate_stencil = false; + } else if (!strncmp("1", s, 2)) { + intel->has_separate_stencil = true; + } else { + _mesa_warning(&intel->ctx, + "env variable INTEL_SEPARATE_STENCIL=\"%s\" has invalid " + "value and is ignored", s); + } +} GLboolean intelInitContext(struct intel_context *intel, @@ -667,9 +714,14 @@ intelInitContext(struct intel_context *intel, if (IS_GEN7(intel->intelScreen->deviceID)) { intel->needs_ff_sync = GL_TRUE; intel->has_luminance_srgb = GL_TRUE; + /* FINISHME: Enable intel->has_separate_stencil on Gen7. */ + /* FINISHME: Enable intel->must_use_separate_stencil on Gen7. */ + /* FINISHME: Enable intel->has_hiz on Gen7. */ } else if (IS_GEN6(intel->intelScreen->deviceID)) { intel->needs_ff_sync = GL_TRUE; intel->has_luminance_srgb = GL_TRUE; + /* FINISHME: Enable intel->has_separate_stencil on Gen6. */ + /* FINISHME: Enable intel->has_hiz on Gen6. */ } else if (IS_GEN5(intel->intelScreen->deviceID)) { intel->needs_ff_sync = GL_TRUE; intel->has_luminance_srgb = GL_TRUE; @@ -689,6 +741,9 @@ intelInitContext(struct intel_context *intel, } } + intel_override_hiz(intel); + intel_override_separate_stencil(intel); + memset(&ctx->TextureFormatSupported, 0, sizeof(ctx->TextureFormatSupported)); ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = GL_TRUE; @@ -703,7 +758,12 @@ intelInitContext(struct intel_context *intel, ctx->TextureFormatSupported[MESA_FORMAT_AL88] = GL_TRUE; if (intel->gen >= 4) ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE; - ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE; + + /* Depth and stencil */ + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = !intel->must_use_separate_stencil; + ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = intel->has_separate_stencil; + ctx->TextureFormatSupported[MESA_FORMAT_S8] = intel->has_separate_stencil; + /* * This was disabled in initial FBO enabling to avoid combinations * of depth+stencil that wouldn't work together. We since decided diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index d3a8a659caa..f599861cba8 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -149,6 +149,10 @@ struct intel_context void (*debug_batch)(struct intel_context *intel); bool (*render_target_supported)(gl_format format); + + /** Can HiZ be enabled on a depthbuffer of the given format? */ + bool (*is_hiz_depth_format)(struct intel_context *intel, + gl_format format); } vtbl; GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ @@ -166,6 +170,9 @@ struct intel_context GLboolean is_945; GLboolean has_luminance_srgb; GLboolean has_xrgb_textures; + GLboolean has_separate_stencil; + GLboolean must_use_separate_stencil; + GLboolean has_hiz; int urb_size; diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index ad2468a3237..7434e0efff6 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -79,6 +79,9 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb) if (intel && irb->region) { intel_region_release(&irb->region); } + if (intel && irb->hiz_region) { + intel_region_release(&irb->hiz_region); + } free(irb); } @@ -129,7 +132,12 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: /* These aren't actual texture formats, so force them here. */ - rb->Format = MESA_FORMAT_S8_Z24; + if (intel->has_separate_stencil) { + rb->Format = MESA_FORMAT_S8; + } else { + assert(!intel->must_use_separate_stencil); + rb->Format = MESA_FORMAT_S8_Z24; + } break; } @@ -143,6 +151,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->region) { intel_region_release(&irb->region); } + if (irb->hiz_region) { + intel_region_release(&irb->hiz_region); + } /* allocate new memory region/renderbuffer */ @@ -154,19 +165,54 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer GLenum base_format = _mesa_get_format_base_format(rb->Format); if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || + base_format == GL_STENCIL_INDEX || base_format == GL_DEPTH_STENCIL)) tiling = I915_TILING_Y; else tiling = I915_TILING_X; } - irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, - width, height, GL_TRUE); + if (irb->Base.Format == MESA_FORMAT_S8) { + /* + * The stencil buffer has quirky pitch requirements. From Vol 2a, + * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": + * The pitch must be set to 2x the value computed based on width, as + * the stencil buffer is stored with two rows interleaved. + * To accomplish this, we resort to the nasty hack of doubling the drm + * region's cpp and halving its height. + * + * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() + * maps the memory incorrectly. + */ + irb->region = intel_region_alloc(intel->intelScreen, + I915_TILING_Y, + cpp * 2, + width, + height / 2, + GL_TRUE); + } else { + irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, + width, height, GL_TRUE); + } + if (!irb->region) return GL_FALSE; /* out of memory? */ ASSERT(irb->region->buffer); + if (intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { + irb->hiz_region = intel_region_alloc(intel->intelScreen, + I915_TILING_Y, + irb->region->cpp, + irb->region->width, + irb->region->height, + GL_TRUE); + if (!irb->hiz_region) { + intel_region_release(&irb->region); + return GL_FALSE; + } + } + rb->Width = width; rb->Height = height; @@ -374,6 +420,9 @@ static GLboolean intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(texImage); + if (!intel_span_supports_format(texImage->TexFormat)) { DBG("Render to texture BAD FORMAT %s\n", _mesa_get_format_name(texImage->TexFormat)); @@ -392,6 +441,32 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.Delete = intel_delete_renderbuffer; irb->Base.AllocStorage = intel_nop_alloc_storage; + /* Point the renderbuffer's region to the texture's region. */ + if (irb->region != intel_image->mt->region) { + intel_region_release(&irb->region); + intel_region_reference(&irb->region, intel_image->mt->region); + } + + /* Allocate the texture's hiz region if necessary. */ + if (intel->vtbl.is_hiz_depth_format(intel, texImage->TexFormat) + && !intel_image->mt->hiz_region) { + intel_image->mt->hiz_region = + intel_region_alloc(intel->intelScreen, + I915_TILING_Y, + _mesa_get_format_bytes(texImage->TexFormat), + texImage->Width, + texImage->Height, + GL_TRUE); + if (!intel_image->mt->hiz_region) + return GL_FALSE; + } + + /* Point the renderbuffer's hiz region to the texture's hiz region. */ + if (irb->hiz_region != intel_image->mt->hiz_region) { + intel_region_release(&irb->hiz_region); + intel_region_reference(&irb->hiz_region, intel_image->mt->hiz_region); + } + return GL_TRUE; } @@ -497,13 +572,6 @@ intel_render_texture(struct gl_context * ctx, att->Texture->Name, newImage->Width, newImage->Height, irb->Base.RefCount); - /* point the renderbufer's region to the texture image region */ - if (irb->region != intel_image->mt->region) { - if (irb->region) - intel_region_release(&irb->region); - intel_region_reference(&irb->region, intel_image->mt->region); - } - intel_set_draw_offset_for_image(intel_image, att->Zoffset); intel_image->used_as_render_target = GL_TRUE; @@ -597,21 +665,33 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) intel_get_renderbuffer(fb, BUFFER_STENCIL); int i; - if (depthRb && stencilRb && stencilRb != depthRb) { - if (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE && - fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE && - (fb->Attachment[BUFFER_DEPTH].Texture->Name == - fb->Attachment[BUFFER_STENCIL].Texture->Name)) { - /* OK */ - } else { - /* we only support combined depth/stencil buffers, not separate - * stencil buffers. - */ - DBG("Only supports combined depth/stencil (found %s, %s)\n", - depthRb ? _mesa_get_format_name(depthRb->Base.Format): "NULL", - stencilRb ? _mesa_get_format_name(stencilRb->Base.Format): "NULL"); - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; - } + /* + * The depth and stencil renderbuffers are the same renderbuffer or wrap + * the same texture. + */ + bool depth_stencil_are_same; + if (depthRb && stencilRb && depthRb == stencilRb) + depth_stencil_are_same = true; + else if (depthRb && stencilRb && depthRb != stencilRb + && (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE) + && (fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE) + && (fb->Attachment[BUFFER_DEPTH].Texture->Name + == fb->Attachment[BUFFER_STENCIL].Texture->Name)) + depth_stencil_are_same = true; + else + depth_stencil_are_same = false; + + bool fb_has_combined_depth_stencil_format = + (depthRb && depthRb->Base.Format == MESA_FORMAT_S8_Z24) || + (stencilRb && stencilRb->Base.Format == MESA_FORMAT_S8_Z24); + + bool fb_has_hiz = intel_framebuffer_has_hiz(fb); + + if ((intel->must_use_separate_stencil || fb_has_hiz) + && (depth_stencil_are_same || fb_has_combined_depth_stencil_format)) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + } else if (!intel->has_separate_stencil && depthRb && stencilRb && !depth_stencil_are_same) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } for (i = 0; i < Elements(fb->Attachment); i++) { diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 028f657d12d..212dd9aadc8 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -28,6 +28,7 @@ #ifndef INTEL_FBO_H #define INTEL_FBO_H +#include <stdbool.h> #include "main/formats.h" #include "intel_screen.h" @@ -40,6 +41,9 @@ struct intel_renderbuffer { struct gl_renderbuffer Base; struct intel_region *region; + + /** Only used by depth renderbuffers for which HiZ is enabled. */ + struct intel_region *hiz_region; }; @@ -80,6 +84,29 @@ intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) return NULL; } +/** + * If the framebuffer has a depth buffer attached, then return its HiZ region. + * The HiZ region may be null. + */ +static INLINE struct intel_region* +intel_framebuffer_get_hiz_region(struct gl_framebuffer *fb) +{ + struct intel_renderbuffer *rb = NULL; + if (fb) + rb = intel_get_renderbuffer(fb, BUFFER_DEPTH); + + if (rb) + return rb->hiz_region; + else + return NULL; +} + +static INLINE bool +intel_framebuffer_has_hiz(struct gl_framebuffer *fb) +{ + return intel_framebuffer_get_hiz_region(fb) != NULL; +} + extern void intel_renderbuffer_set_region(struct intel_context *intel, diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index a3409274fb7..e62905de7c3 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -200,6 +200,7 @@ intel_miptree_release(struct intel_context *intel, DBG("%s deleting %p\n", __FUNCTION__, *mt); intel_region_release(&((*mt)->region)); + intel_region_release(&((*mt)->hiz_region)); for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { free((*mt)->level[i].x_offset); diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 760a8bce601..325e3916981 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -113,6 +113,20 @@ struct intel_mipmap_tree */ struct intel_region *region; + /** + * This points to an auxillary hiz region if all of the following hold: + * 1. The texture has been attached to an FBO as a depthbuffer. + * 2. The texture format is hiz compatible. + * 3. The intel context supports hiz. + * + * When a texture is attached to multiple FBO's, a separate renderbuffer + * wrapper is created for each attachment. This necessitates storing the + * hiz region in the texture itself instead of the renderbuffer wrapper. + * + * \see intel_fbo.c:intel_wrap_texture() + */ + struct intel_region *hiz_region; + /* These are also refcounted: */ GLuint refcount; diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 16bce20317e..5290342c3e1 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -70,9 +70,6 @@ intel_set_span_functions(struct intel_context *intel, #define HW_UNLOCK() -/* Convenience macros to avoid typing the address argument over and over */ -#define NO_TILE(_X, _Y) (((_Y) * irb->region->pitch + (_X)) * irb->region->cpp) - /* r5g6b5 color span and pixel functions */ #define SPANTMP_PIXEL_FMT GL_RGB #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index befa615d1e6..6890a690ab1 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -22,6 +22,7 @@ intel_mesa_format_to_rb_datatype(gl_format format) case MESA_FORMAT_RGB565: case MESA_FORMAT_ARGB1555: case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_S8: return GL_UNSIGNED_BYTE; case MESA_FORMAT_R16: case MESA_FORMAT_RG1616: diff --git a/src/mesa/drivers/dri/mach64/Makefile b/src/mesa/drivers/dri/mach64/Makefile index c20fdece297..0474c1a165f 100644 --- a/src/mesa/drivers/dri/mach64/Makefile +++ b/src/mesa/drivers/dri/mach64/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = mach64_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ mach64_context.c \ mach64_ioctl.c \ @@ -25,5 +27,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/mga/Makefile b/src/mesa/drivers/dri/mga/Makefile index 92533bccc29..9948ee767ac 100644 --- a/src/mesa/drivers/dri/mga/Makefile +++ b/src/mesa/drivers/dri/mga/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = mga_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ mgadd.c \ mgaioctl.c \ @@ -27,5 +29,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile index 3b506a91ffa..e485a98761f 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile +++ b/src/mesa/drivers/dri/nouveau/Makefile @@ -3,11 +3,10 @@ TOP = ../../../../.. include $(TOP)/configs/current -CFLAGS += $(shell pkg-config libdrm libdrm_nouveau --cflags) -DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs) - LIBNAME = nouveau_vieux_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ nouveau_screen.c \ nouveau_context.c \ @@ -51,7 +50,9 @@ C_SOURCES = \ ASM_SOURCES = +INCLUDES += $(NOUVEAU_CFLAGS) +DRI_LIB_DEPS += $(NOUVEAU_LIBS) -include ../Makefile.template +include ../Makefile.targets symlinks: diff --git a/src/mesa/drivers/dri/r128/Makefile b/src/mesa/drivers/dri/r128/Makefile index 8144c9b43ff..8b23ccc8cbe 100644 --- a/src/mesa/drivers/dri/r128/Makefile +++ b/src/mesa/drivers/dri/r128/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = r128_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ r128_context.c \ r128_lock.c \ @@ -25,5 +27,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index 8013768e9fb..4547f7e2ee0 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -3,10 +3,10 @@ TOP = ../../../../.. include $(TOP)/configs/current -CFLAGS += $(RADEON_CFLAGS) - LIBNAME = r200_dri.so +include ../Makefile.defines + ifeq ($(RADEON_LDFLAGS),) CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c endif @@ -55,12 +55,13 @@ X86_SOURCES = DRIVER_DEFINES = -DRADEON_R200 +INCLUDES += $(RADEON_CFLAGS) DRI_LIB_DEPS += $(RADEON_LDFLAGS) ##### TARGETS ##### -include ../Makefile.template +include ../Makefile.targets #INCLUDES += -I../radeon/server diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 2245998c952..9f23a8496aa 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -3,17 +3,17 @@ TOP = ../../../../.. include $(TOP)/configs/current -CFLAGS += $(RADEON_CFLAGS) - LIBNAME = r300_dri.so +include ../Makefile.defines + ifeq ($(RADEON_LDFLAGS),) CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c endif COMMON_SOURCES = \ ../../common/driverfuncs.c \ - ../common/mm.c \ + ../common/drirenderbuffer.c \ ../common/utils.c \ ../common/texmem.c \ ../common/vblank.c \ @@ -64,6 +64,7 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) DRIVER_DEFINES = -DRADEON_R300 # -DRADEON_BO_TRACK \ +INCLUDES += $(RADEON_CFLAGS) DRI_LIB_DEPS += $(RADEON_LDFLAGS) SUBDIRS = compiler @@ -73,5 +74,5 @@ EXTRA_MODULES = compiler/libr300compiler.a ##### TARGETS ##### -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 4bedfacd632..5aa13329ac2 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -38,7 +38,9 @@ C_SOURCES = \ r3xx_vertprog.c \ r3xx_vertprog_dump.c \ \ - memory_pool.c + memory_pool.c \ + $(TOP)/src/glsl/ralloc.c \ + $(TOP)/src/mesa/program/register_allocate.c ### Basic defines ### @@ -52,6 +54,7 @@ INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ -I$(TOP)/src/glsl \ + -I$(TOP)/src/mapi ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index 9931537492e..2c748b6e214 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -4,6 +4,7 @@ env = env.Clone() env.Append(CPPPATH = '#/include') env.Append(CPPPATH = '#/src/mesa') env.Append(CPPPATH = '#/src/glsl') +env.Append(CPPPATH = '#/src/mapi') # temporary fix env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') @@ -43,6 +44,8 @@ r300compiler = env.ConvenienceLibrary( 'r3xx_vertprog.c', 'r3xx_vertprog_dump.c', 'memory_pool.c', + '#/src/glsl/ralloc.c', + '#/src/mesa/program/register_allocate.c' ]) Return('r300compiler') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index da6c8b602e1..93a0dd168f4 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1619,7 +1619,6 @@ static void r500SetupRSUnit(struct gl_context * ctx) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } -#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count) diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 200bc20f647..bec0b5a53fe 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -3,17 +3,17 @@ TOP = ../../../../.. include $(TOP)/configs/current -CFLAGS += $(RADEON_CFLAGS) - LIBNAME = r600_dri.so +include ../Makefile.defines + ifeq ($(RADEON_LDFLAGS),) CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c endif COMMON_SOURCES = \ ../../common/driverfuncs.c \ - ../common/mm.c \ + ../common/drirenderbuffer.c \ ../common/utils.c \ ../common/texmem.c \ ../common/vblank.c \ @@ -78,9 +78,10 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) DRIVER_DEFINES = -DRADEON_R600 # -DRADEON_BO_TRACK \ +INCLUDES += $(RADEON_CFLAGS) DRI_LIB_DEPS += $(RADEON_LDFLAGS) ##### TARGETS ##### -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/r600/evergreen_blit.c b/src/mesa/drivers/dri/r600/evergreen_blit.c index 0e4da5499ba..95ac4aee5ce 100644 --- a/src/mesa/drivers/dri/r600/evergreen_blit.c +++ b/src/mesa/drivers/dri/r600/evergreen_blit.c @@ -1525,6 +1525,48 @@ eg_set_default_state(context_t *context) num_hs_stack_entries = 42; num_ls_stack_entries = 42; break; + case CHIP_FAMILY_SUMO: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 25; + num_gs_threads = 25; + num_es_threads = 25; + num_hs_threads = 25; + num_ls_threads = 25; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_SUMO2: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 25; + num_gs_threads = 25; + num_es_threads = 25; + num_hs_threads = 25; + num_ls_threads = 25; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; case CHIP_FAMILY_BARTS: num_ps_gprs = 93; num_vs_gprs = 46; @@ -1592,6 +1634,8 @@ eg_set_default_state(context_t *context) if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO2) || (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CAICOS)) CLEARbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit); else diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c index 42566e537a5..388a96ff067 100644 --- a/src/mesa/drivers/dri/r600/evergreen_chip.c +++ b/src/mesa/drivers/dri/r600/evergreen_chip.c @@ -287,7 +287,9 @@ static void evergreenSetupVTXConstants(struct gl_context * ctx, return; if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) || - (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM)) + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_SUMO2)) r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c index 309c93fe088..81bf1172dc3 100644 --- a/src/mesa/drivers/dri/r600/evergreen_state.c +++ b/src/mesa/drivers/dri/r600/evergreen_state.c @@ -1470,6 +1470,22 @@ static void evergreenInitSQConfig(struct gl_context * ctx) uMaxThreads = 192; uMaxStackEntries = 256; break; + case CHIP_FAMILY_SUMO: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 96; + uMaxThreads = 248; + uMaxStackEntries = 256; + break; + case CHIP_FAMILY_SUMO2: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 96; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; case CHIP_FAMILY_BARTS: uSqNumCfInsts = 2; bVC_ENABLE = GL_TRUE; diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile index 93219e40afd..6b5d3335452 100644 --- a/src/mesa/drivers/dri/radeon/Makefile +++ b/src/mesa/drivers/dri/radeon/Makefile @@ -4,10 +4,10 @@ TOP = ../../../../.. include $(TOP)/configs/current -CFLAGS += $(RADEON_CFLAGS) - LIBNAME = radeon_dri.so +include ../Makefile.defines + ifeq ($(RADEON_LDFLAGS),) CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c endif @@ -53,9 +53,10 @@ C_SOURCES = \ DRIVER_DEFINES = -DRADEON_R100 +INCLUDES += $(RADEON_CFLAGS) DRI_LIB_DEPS += $(RADEON_LDFLAGS) X86_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 9145023826e..bd236625122 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -450,6 +450,18 @@ #define PCI_CHIP_PALM_9806 0x9806 #define PCI_CHIP_PALM_9807 0x9807 +#define PCI_CHIP_SUMO_9640 0x9640 +#define PCI_CHIP_SUMO_9641 0x9641 +#define PCI_CHIP_SUMO2_9642 0x9642 +#define PCI_CHIP_SUMO2_9643 0x9643 +#define PCI_CHIP_SUMO2_9644 0x9644 +#define PCI_CHIP_SUMO2_9645 0x9645 +#define PCI_CHIP_SUMO_9647 0x9647 +#define PCI_CHIP_SUMO_9648 0x9648 +#define PCI_CHIP_SUMO_964A 0x964a +#define PCI_CHIP_SUMO_964E 0x964e +#define PCI_CHIP_SUMO_964F 0x964f + #define PCI_CHIP_BARTS_6720 0x6720 #define PCI_CHIP_BARTS_6721 0x6721 #define PCI_CHIP_BARTS_6722 0x6722 @@ -534,6 +546,8 @@ enum { CHIP_FAMILY_CYPRESS, CHIP_FAMILY_HEMLOCK, CHIP_FAMILY_PALM, + CHIP_FAMILY_SUMO, + CHIP_FAMILY_SUMO2, CHIP_FAMILY_BARTS, CHIP_FAMILY_TURKS, CHIP_FAMILY_CAICOS, diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 5d7b3973d57..bf8925f61d0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -98,6 +98,8 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_CYPRESS: return "CYPRESS"; case CHIP_FAMILY_HEMLOCK: return "HEMLOCK"; case CHIP_FAMILY_PALM: return "PALM"; + case CHIP_FAMILY_SUMO: return "SUMO"; + case CHIP_FAMILY_SUMO2: return "SUMO2"; case CHIP_FAMILY_BARTS: return "BARTS"; case CHIP_FAMILY_TURKS: return "TURKS"; case CHIP_FAMILY_CAICOS: return "CAICOS"; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 6449229e088..6cf843406f9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1168,6 +1168,25 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_SUMO_9640: + case PCI_CHIP_SUMO_9641: + case PCI_CHIP_SUMO_9647: + case PCI_CHIP_SUMO_9648: + case PCI_CHIP_SUMO_964A: + case PCI_CHIP_SUMO_964E: + case PCI_CHIP_SUMO_964F: + screen->chip_family = CHIP_FAMILY_SUMO; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_SUMO2_9642: + case PCI_CHIP_SUMO2_9643: + case PCI_CHIP_SUMO2_9644: + case PCI_CHIP_SUMO2_9645: + screen->chip_family = CHIP_FAMILY_SUMO2; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + case PCI_CHIP_BARTS_6720: case PCI_CHIP_BARTS_6721: case PCI_CHIP_BARTS_6722: diff --git a/src/mesa/drivers/dri/savage/Makefile b/src/mesa/drivers/dri/savage/Makefile index 53511552c6d..03be3468da9 100644 --- a/src/mesa/drivers/dri/savage/Makefile +++ b/src/mesa/drivers/dri/savage/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = savage_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ savage_xmesa.c \ savagedd.c \ @@ -22,5 +24,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/sis/Makefile b/src/mesa/drivers/dri/sis/Makefile index 6b4f938bab3..0e0bf0d4eac 100644 --- a/src/mesa/drivers/dri/sis/Makefile +++ b/src/mesa/drivers/dri/sis/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = sis_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ sis6326_state.c \ sis6326_clear.c \ @@ -28,5 +30,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/swrast/Makefile b/src/mesa/drivers/dri/swrast/Makefile index d2cf6dbc55b..4cb99fd0eb7 100644 --- a/src/mesa/drivers/dri/swrast/Makefile +++ b/src/mesa/drivers/dri/swrast/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = swrast_dri.so +include ../Makefile.defines + DRIVER_DEFINES = -D__NOT_HAVE_DRM_H DRIVER_SOURCES = \ @@ -22,5 +24,5 @@ SWRAST_COMMON_SOURCES = \ ../common/utils.c \ ../common/drisw_util.c -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/tdfx/Makefile b/src/mesa/drivers/dri/tdfx/Makefile index 96bd8f8202f..ed84df20925 100644 --- a/src/mesa/drivers/dri/tdfx/Makefile +++ b/src/mesa/drivers/dri/tdfx/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = tdfx_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ tdfx_context.c \ tdfx_dd.c \ @@ -27,6 +29,6 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets diff --git a/src/mesa/drivers/dri/unichrome/Makefile b/src/mesa/drivers/dri/unichrome/Makefile index 14cf9f30386..373da6016e4 100644 --- a/src/mesa/drivers/dri/unichrome/Makefile +++ b/src/mesa/drivers/dri/unichrome/Makefile @@ -5,6 +5,8 @@ include $(TOP)/configs/current LIBNAME = unichrome_dri.so +include ../Makefile.defines + DRIVER_SOURCES = \ via_context.c \ via_fb.c \ @@ -25,5 +27,5 @@ C_SOURCES = \ ASM_SOURCES = -include ../Makefile.template +include ../Makefile.targets |