diff options
author | Stéphane Marchesin <[email protected]> | 2011-08-26 17:37:25 -0700 |
---|---|---|
committer | Stéphane Marchesin <[email protected]> | 2011-08-26 17:37:25 -0700 |
commit | f8e6d19f3f40931be741b44d3edf210c38e13f0f (patch) | |
tree | e99e4c619901412ac6448534b0f57ce1c4295c6b /src/mesa/drivers/dri | |
parent | 974c49ed176de55aadb335a2956ef5dfec774a23 (diff) | |
parent | e3b0e3776646d0367206e4544229622eb22fe9f8 (diff) |
Merge branch 'master' of git://anongit.freedesktop.org/mesa/mesa
Diffstat (limited to 'src/mesa/drivers/dri')
99 files changed, 5556 insertions, 1192 deletions
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 77967ac2a43..12dd31bb162 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -567,7 +567,7 @@ static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) { } else defaultVal = attrVal[OA_DEFAULT]; if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal)) - XML_FATAL ("illegal default value: %s.", defaultVal); + XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal); if (attrVal[OA_VALID]) { if (cache->info[opt].type == DRI_BOOL) diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h index 587517ea10a..ffea430024d 100644 --- a/src/mesa/drivers/dri/common/xmlpool.h +++ b/src/mesa/drivers/dri/common/xmlpool.h @@ -60,7 +60,7 @@ #define DRI_CONF_OPT_BEGIN(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n" -/** \brief Begin an option definition with qouted default value */ +/** \brief Begin an option definition with quoted default value */ #define DRI_CONF_OPT_BEGIN_Q(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default="#def">\n" diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h index d76595578c7..1e584ba086a 100644 --- a/src/mesa/drivers/dri/common/xmlpool/options.h +++ b/src/mesa/drivers/dri/common/xmlpool/options.h @@ -425,6 +425,66 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \ DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \ DRI_CONF_OPT_END +#define DRI_CONF_PP_CELSHADE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(de,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(es,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(nl,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(fr,"A post-processing filter to cel-shade the output") \ + DRI_CONF_DESC(sv,"A post-processing filter to cel-shade the output") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NORED(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the red channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the red channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOGREEN(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the green channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the green channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOBLUE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \ + DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(de,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(es,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(nl,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(fr,"A post-processing filter to remove the blue channel") \ + DRI_CONF_DESC(sv,"A post-processing filter to remove the blue channel") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ + DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(de,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(es,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(nl,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(fr,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ + DRI_CONF_DESC(sv,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \ +DRI_CONF_OPT_END + #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(en,"Number of texture units used") \ diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h index 5fd6ec65bf8..2427aa77f5b 100644 --- a/src/mesa/drivers/dri/common/xmlpool/t_options.h +++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h @@ -191,6 +191,36 @@ DRI_CONF_OPT_BEGIN(hyperz,bool,def) \ DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \ DRI_CONF_OPT_END +#define DRI_CONF_PP_CELSHADE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NORED(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOGREEN(def) \ +DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_NOBLUE(def) \ +DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \ + DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \ +DRI_CONF_OPT_END + +#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \ +DRI_CONF_OPT_END + #define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \ DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(en,gettext("Number of texture units used")) \ diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 6d43726beb1..ed5286fd7d9 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -881,6 +881,12 @@ i830_invalidate_state(struct intel_context *intel, GLuint new_state) i830_update_provoking_vertex(&intel->ctx); } +static bool +i830_is_hiz_depth_format(struct intel_context *intel, gl_format format) +{ + return false; +} + void i830InitVtbl(struct i830_context *i830) { @@ -898,4 +904,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; i830->intel.vtbl.render_target_supported = i830_render_target_supported; + i830->intel.vtbl.is_hiz_depth_format = i830_is_hiz_depth_format; } diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 6e1d7092237..d155b85ffca 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -175,10 +175,8 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - src = - i915_emit_param4fv(p, - program->Base.Parameters->ParameterValues[source-> - Index]); + src = i915_emit_param4fv(p, + &program->Base.Parameters->ParameterValues[source->Index][0].f); break; default: @@ -303,7 +301,7 @@ do { \ /* * TODO: consider moving this into core */ -static void calc_live_regs( struct i915_fragment_program *p ) +static bool calc_live_regs( struct i915_fragment_program *p ) { const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; @@ -317,6 +315,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) /* Register is written to: unmark as live for this and preceeding ops */ if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if (inst->DstReg.Index > 16) + return false; + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; if (live_components[inst->DstReg.Index] == 0) regsUsed &= ~(1 << inst->DstReg.Index); @@ -327,6 +328,9 @@ static void calc_live_regs( struct i915_fragment_program *p ) if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { unsigned c; + if (inst->SrcReg[a].Index > 16) + return false; + regsUsed |= 1 << inst->SrcReg[a].Index; for (c = 0; c < 4; c++) { @@ -340,6 +344,8 @@ static void calc_live_regs( struct i915_fragment_program *p ) p->usedRegs[i] = regsUsed; } + + return true; } static GLuint get_live_regs( struct i915_fragment_program *p, @@ -394,7 +400,10 @@ upload_program(struct i915_fragment_program *p) /* Not always needed: */ - calc_live_regs(p); + if (!calc_live_regs(p)) { + i915_program_error(p, "Could not allocate registers"); + return; + } while (1) { GLuint src0, src1, src2, flags; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index ca1949b223e..0a600d30bef 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -442,14 +442,16 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) void i915_program_error(struct i915_fragment_program *p, const char *fmt, ...) { - va_list args; + if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_FALLBACKS)) != 0)) { + va_list args; - fprintf(stderr, "i915_program_error: "); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); + fprintf(stderr, "i915_program_error: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); + } p->error = 1; } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 44f28cd9d15..d9c885da65b 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -124,7 +124,11 @@ CXX_SOURCES = \ brw_fs_reg_allocate.cpp \ brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp \ - brw_shader.cpp + brw_shader.cpp \ + brw_vec4.cpp \ + brw_vec4_emit.cpp \ + brw_vec4_reg_allocate.cpp \ + brw_vec4_visitor.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 471015cf9d0..df63fe1d52c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -212,6 +212,7 @@ enum state_struct_type { AUB_TRACE_BINDING_TABLE = 0x101, AUB_TRACE_SURFACE_STATE = 0x102, AUB_TRACE_VS_CONSTANTS = 0x103, + AUB_TRACE_WM_CONSTANTS = 0x104, }; /** Subclass of Mesa vertex program */ @@ -247,6 +248,7 @@ enum param_conversion { PARAM_CONVERT_F2I, PARAM_CONVERT_F2U, PARAM_CONVERT_F2B, + PARAM_CONVERT_ZERO, }; /* Data about a particular attempt to compile a program. Note that @@ -310,12 +312,20 @@ struct brw_vs_prog_data { GLuint total_grf; GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ + GLuint total_scratch; GLuint inputs_read; /* Used for calculating urb partitions: */ GLuint urb_entry_size; + + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; + + bool uses_new_param_layout; }; @@ -528,7 +538,7 @@ struct brw_context * the CURBE, the depth buffer, and a query BO. */ drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16]; - int validated_bo_count; + unsigned int validated_bo_count; } state; struct brw_cache cache; @@ -662,6 +672,7 @@ struct brw_context struct brw_vs_prog_data *prog_data; int8_t *constant_map; /* variable array following prog_data */ + drm_intel_bo *scratch_bo; drm_intel_bo *const_bo; /** Offset in the program cache to the VS program */ uint32_t prog_offset; @@ -674,6 +685,23 @@ struct brw_context uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + /** @} */ } vs; struct { @@ -726,7 +754,6 @@ struct brw_context GLuint render_surf; GLuint nr_surfaces; - GLuint max_threads; drm_intel_bo *scratch_bo; GLuint sampler_count; @@ -747,6 +774,29 @@ struct brw_context * Pre-gen6, push constants live in the CURBE. */ uint32_t push_const_offset; + + /** @{ register allocator */ + + struct ra_regs *regs; + + /** Array of the ra classes for the unaligned contiguous + * register block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + + /** @} */ } wm; @@ -827,6 +877,10 @@ void brw_validate_textures( struct brw_context *brw ); */ void brwInitFragProgFuncs( struct dd_function_table *functions ); +int brw_get_scratch_size(int size); +void brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size); + /* brw_urb.c */ @@ -874,7 +928,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p) } static inline -float convert_param(enum param_conversion conversion, float param) +float convert_param(enum param_conversion conversion, const float *param) { union { float f; @@ -884,21 +938,23 @@ float convert_param(enum param_conversion conversion, float param) switch (conversion) { case PARAM_NO_CONVERT: - return param; + return *param; case PARAM_CONVERT_F2I: - fi.i = param; + fi.i = *param; return fi.f; case PARAM_CONVERT_F2U: - fi.u = param; + fi.u = *param; return fi.f; case PARAM_CONVERT_F2B: - if (param != 0.0) + if (*param != 0.0) fi.i = 1; else fi.i = 0; return fi.f; + case PARAM_CONVERT_ZERO: + return 0.0; default: - return param; + return *param; } } diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index ae11c487a2c..960be10006e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -203,7 +203,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) { buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } } @@ -244,15 +244,22 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + } + } else { + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0a3027d04ad..d1799c0ab4f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,58 +557,93 @@ #define BRW_WE_ALL 1 /** @} */ -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_CMPN 17 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_SENDC 50 -#define BRW_OPCODE_MATH 56 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_PLN 90 -#define BRW_OPCODE_NOP 126 +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXL, + FS_OPCODE_TXS, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, + + VS_OPCODE_URB_WRITE, + VS_OPCODE_SCRATCH_READ, + VS_OPCODE_SCRATCH_WRITE, +}; #define BRW_PREDICATE_NONE 0 #define BRW_PREDICATE_NORMAL 1 @@ -734,7 +769,6 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 @@ -747,6 +781,7 @@ #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 /* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index af41c848308..927b0b4acc9 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -309,6 +309,35 @@ char *target_function[16] = { [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" }; +char *target_function_gen6[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE] = "sampler", + [GEN6_MESSAGE_TARGET_DP_RENDER_CACHE] = "render", + [GEN6_MESSAGE_TARGET_DP_CONST_CACHE] = "const", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *dp_rc_msg_type_gen6[16] = { + [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", + [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", + [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", + [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", + [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", + [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", + [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", + [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", + [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", + [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", +}; + char *math_function[16] = { [BRW_MATH_FUNCTION_INV] = "inv", [BRW_MATH_FUNCTION_LOG] = "log", @@ -927,8 +956,14 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) newline (file); pad (file, 16); space = 0; - err |= control (file, "target function", target_function, - target, &space); + + if (gen >= 6) { + err |= control (file, "target function", target_function_gen6, + target, &space); + } else { + err |= control (file, "target function", target_function, + target, &space); + } switch (target) { case BRW_MESSAGE_TARGET_MATH: @@ -985,9 +1020,16 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_read.msg_type); } break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: if (gen >= 6) { - format (file, " (%d, %d, %d, %d, %d, %d)", + format (file, " ("); + + err |= control (file, "DP rc message type", + dp_rc_msg_type_gen6, + inst->bits3.gen6_dp.msg_type, &space); + + format (file, ", %d, %d, %d, %d, %d, %d)", inst->bits3.gen6_dp.binding_table_index, inst->bits3.gen6_dp.msg_control, inst->bits3.gen6_dp.msg_type, @@ -1003,6 +1045,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_write.send_commit_msg); } break; + case BRW_MESSAGE_TARGET_URB: if (gen >= 5) { format (file, " %d", inst->bits3.urb_gen5.offset); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 56a46ced6e3..7bc69c612e3 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -689,17 +689,17 @@ static void brw_prepare_indices(struct brw_context *brw) * rebase it into a temporary. */ if ((get_size(index_buffer->type) - 1) & offset) { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - bufferobj); - map += offset; + GLubyte *map = ctx->Driver.MapBufferRange(ctx, + offset, + ib_size, + GL_MAP_WRITE_BIT, + bufferobj); intel_upload_data(&brw->intel, map, ib_size, ib_type_size, &bo, &offset); brw->ib.start_vertex_offset = offset / ib_type_size; - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + ctx->Driver.UnmapBuffer(ctx, bufferobj); } else { /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 72d50eadbce..af50305fc2b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -44,6 +44,9 @@ #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) +#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) +#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) @@ -798,6 +801,12 @@ void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); +struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode); +void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest); +void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg); + /* Helpers for regular instructions: */ @@ -852,6 +861,27 @@ ROUND(RNDE) /* Helpers for SEND instruction: */ +void brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length); + +void brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg); + void brw_urb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index e7370f36064..c5013de7ec1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -89,9 +89,9 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) } -static void brw_set_dest(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg dest) +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -221,9 +221,9 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) /* 10. Check destination issues. */ } -static void brw_set_src0(struct brw_compile *p, - struct brw_instruction *insn, - struct brw_reg reg) +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); @@ -504,17 +504,18 @@ static void brw_set_urb_message( struct brw_compile *p, } } -static void brw_set_dp_write_message( struct brw_compile *p, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - GLboolean header_present, - GLuint pixel_scoreboard_clear, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg) { struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; @@ -570,7 +571,7 @@ static void brw_set_dp_write_message( struct brw_compile *p, } } -static void +void brw_set_dp_read_message(struct brw_compile *p, struct brw_instruction *insn, GLuint binding_table_index, @@ -709,9 +710,9 @@ static void brw_set_sampler_message(struct brw_compile *p, } - -static struct brw_instruction *next_insn( struct brw_compile *p, - GLuint opcode ) +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; @@ -732,7 +733,6 @@ static struct brw_instruction *next_insn( struct brw_compile *p, return insn; } - static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, @@ -1341,8 +1341,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src1(p, insn, brw_imm_ud(0)); insn->bits3.break_cont.jip = br * (do_insn - insn); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else if (intel->gen == 6) { insn = next_insn(p, BRW_OPCODE_WHILE); @@ -1351,8 +1350,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.execution_size = do_insn->header.execution_size; - assert(insn->header.execution_size == BRW_EXECUTE_8); + insn->header.execution_size = BRW_EXECUTE_8; } else { if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); @@ -2246,10 +2244,13 @@ void brw_urb_WRITE(struct brw_compile *p, if (intel->gen == 7) { /* Enable Channel Masks in the URB_WRITE_HWORD message header */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), BRW_REGISTER_TYPE_UD), retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), brw_imm_ud(0xff00)); + brw_pop_insn_state(p); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -2311,7 +2312,7 @@ brw_find_loop_end(struct brw_compile *p, int start) if (insn->header.opcode == BRW_OPCODE_WHILE) { int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count : insn->bits3.break_cont.jip; - if (ip + jip / br < start) + if (ip + jip / br <= start) return ip; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b5ea943387d..0b0445ea142 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -143,20 +143,21 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 0; switch (inst->opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: return 1; case FS_OPCODE_FB_WRITE: return 2; @@ -181,29 +182,26 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); - - /* This slot is always unused. */ - virtual_grf_sizes[0] = 0; } virtual_grf_sizes[virtual_grf_next] = size; return virtual_grf_next++; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg) +fs_reg::fs_reg(enum register_file file, int reg) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = BRW_REGISTER_TYPE_F; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) +fs_reg::fs_reg(enum register_file file, int reg, uint32_t type) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = type; } @@ -242,11 +240,12 @@ import_uniforms_callback(const void *key, * This brings in those uniform definitions */ void -fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(src_variable_ht, + hash_table_call_foreach(v->variable_ht, import_uniforms_callback, variable_ht); + this->params_remap = v->params_remap; } /* Our support for uniforms is piggy-backed on the struct @@ -281,23 +280,27 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) assert(param < ARRAY_SIZE(c->prog_data.param)); - switch (type->base_type) { - case GLSL_TYPE_FLOAT: + if (ctx->Const.NativeIntegers) { c->prog_data.param_convert[param] = PARAM_NO_CONVERT; - break; - case GLSL_TYPE_UINT: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; - break; - case GLSL_TYPE_INT: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; - break; - case GLSL_TYPE_BOOL: - c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; - break; - default: - assert(!"not reached"); - c->prog_data.param_convert[param] = PARAM_NO_CONVERT; - break; + } else { + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + } } this->param_index[param] = loc; this->param_offset[param] = i; @@ -463,9 +466,21 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) } else { /* Perspective interpolation case. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); - emit(FS_OPCODE_LINTERP, attr, - this->delta_x, this->delta_y, fs_reg(interp)); + /* FINISHME: At some point we probably want to push + * this farther by giving similar treatment to the + * other potentially constant components of the + * attribute, as well as making brw_vs_constval.c + * handle varyings other than gl_TexCoord. + */ + if (location >= FRAG_ATTRIB_TEX0 && + location <= FRAG_ATTRIB_TEX7 && + k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { + emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); + } else { + struct brw_reg interp = interp_reg(location, k); + emit(FS_OPCODE_LINTERP, attr, + this->delta_x, this->delta_y, fs_reg(interp)); + } attr.reg_offset++; } @@ -512,16 +527,16 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: break; default: assert(!"not reached: bad math opcode"); @@ -555,12 +570,12 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { int base_mrf = 2; fs_inst *inst; - assert(opcode == FS_OPCODE_POW); + assert(opcode == SHADER_OPCODE_POW); if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -605,7 +620,7 @@ fs_visitor::setup_paramvalues_refs() /* Set up the pointers to ParamValues now that that array is finalized. */ for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { c->prog_data.param[i] = - fp->Base.Parameters->ParameterValues[this->param_index[i]] + + (const float *)fp->Base.Parameters->ParameterValues[this->param_index[i]] + this->param_offset[i]; } } @@ -621,12 +636,12 @@ fs_visitor::assign_curb_setup() } /* Map the offsets in the UNIFORM file to fixed HW regs. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -684,8 +699,8 @@ fs_visitor::assign_urb_setup() /* Offset all the urb_setup[] index by the actual position of the * setup regs, now that the location of the constants has been chosen. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == FS_OPCODE_LINTERP) { assert(inst->src[2].file == FIXED_HW_REG); @@ -739,8 +754,8 @@ fs_visitor::split_virtual_grfs() split_grf[this->delta_x.reg] = false; } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Texturing produces 4 contiguous registers, so no splitting. */ if (inst->is_tex()) { @@ -763,8 +778,8 @@ fs_visitor::split_virtual_grfs() } } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && split_grf[inst->dst.reg] && @@ -786,6 +801,86 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +bool +fs_visitor::remove_dead_constants() +{ + if (c->dispatch_width == 8) { + this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); + + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) + this->params_remap[i] = -1; + + /* Find which params are still in use. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(constant_nr < (int)c->prog_data.nr_params); + + /* For now, set this to non-negative. We'll give it the + * actual new number in a moment, in order to keep the + * register numbers nicely ordered. + */ + this->params_remap[constant_nr] = 0; + } + } + + /* Figure out what the new numbers for the params will be. At some + * point when we're doing uniform array access, we're going to want + * to keep the distinction between .reg and .reg_offset, but for + * now we don't care. + */ + unsigned int new_nr_params = 0; + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + if (this->params_remap[i] != -1) { + this->params_remap[i] = new_nr_params++; + } + } + + /* Update the list of params to be uploaded to match our new numbering. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + int remapped = this->params_remap[i]; + + if (remapped == -1) + continue; + + /* We've already done setup_paramvalues_refs() so no need to worry + * about param_index and param_offset. + */ + c->prog_data.param[remapped] = c->prog_data.param[i]; + c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i]; + } + + c->prog_data.nr_params = new_nr_params; + } else { + /* This should have been generated in the 8-wide pass already. */ + assert(this->params_remap); + } + + /* Now do the renumbering of the shader to remove unused params. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(this->params_remap[constant_nr] != -1); + inst->src[i].reg = this->params_remap[constant_nr]; + inst->src[i].reg_offset = 0; + } + } + + return true; +} + /** * Choose accesses from the UNIFORM file to demote to using the pull * constant buffer. @@ -815,14 +910,14 @@ fs_visitor::setup_pull_constants() int pull_uniform_base = max_uniform_components; int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { if (inst->src[i].file != UNIFORM) continue; - int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; if (uniform_nr < pull_uniform_base) continue; @@ -871,8 +966,8 @@ fs_visitor::calculate_live_intervals() } int ip = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode == BRW_OPCODE_DO) { if (loop_depth++ == 0) @@ -892,7 +987,7 @@ fs_visitor::calculate_live_intervals() } } else { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && inst->src[i].reg != 0) { + if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; if (!loop_depth) { @@ -908,7 +1003,7 @@ fs_visitor::calculate_live_intervals() } } } - if (inst->dst.file == GRF && inst->dst.reg != 0) { + if (inst->dst.file == GRF) { int reg = inst->dst.reg; if (!loop_depth) { @@ -945,8 +1040,8 @@ fs_visitor::propagate_constants() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || @@ -965,11 +1060,9 @@ fs_visitor::propagate_constants() /* Found a move of a constant to a GRF. Find anything else using the GRF * before it's written, and replace it with the constant if we can. */ - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || scan_inst->opcode == BRW_OPCODE_ELSE || @@ -1046,6 +1139,24 @@ fs_visitor::propagate_constants() progress = true; } break; + + case SHADER_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + scan_inst->opcode = BRW_OPCODE_MOV; + scan_inst->src[0] = inst->src[0]; + scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; + progress = true; + } + break; + + default: + break; } } @@ -1063,6 +1174,49 @@ fs_visitor::propagate_constants() return progress; } + + +/** + * Attempts to move immediate constants into the immediate + * constant slot of following instructions. + * + * Immediate constants are a bit tricky -- they have to be in the last + * operand slot, you can't do abs/negate on them, + */ + +bool +fs_visitor::opt_algebraic() +{ + bool progress = false; + + calculate_live_intervals(); + + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + switch (inst->opcode) { + case BRW_OPCODE_MUL: + if (inst->src[1].file != IMM) + continue; + + /* a * 1.0 = a */ + if (inst->src[1].type == BRW_REGISTER_TYPE_F && + inst->src[1].imm.f == 1.0) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + + break; + default: + break; + } + } + + return progress; +} + /** * Must be called after calculate_live_intervales() to remove unused * writes to registers -- register allocation will fail otherwise @@ -1077,8 +1231,8 @@ fs_visitor::dead_code_eliminate() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); @@ -1101,8 +1255,8 @@ fs_visitor::register_coalesce() int if_depth = 0; int loop_depth = 0; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; /* Make sure that we dominate the instructions we're going to * scan for interfering with our coalescing, or we won't have @@ -1123,6 +1277,8 @@ fs_visitor::register_coalesce() case BRW_OPCODE_ENDIF: if_depth--; break; + default: + break; } if (loop_depth || if_depth) continue; @@ -1130,7 +1286,8 @@ fs_visitor::register_coalesce() if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || - inst->dst.file != GRF || inst->src[0].file != GRF || + inst->dst.file != GRF || (inst->src[0].file != GRF && + inst->src[0].file != UNIFORM)|| inst->dst.type != inst->src[0].type) continue; @@ -1141,11 +1298,10 @@ fs_visitor::register_coalesce() * program. */ bool interfered = false; - exec_list_iterator scan_iter = iter; - scan_iter.next(); - for (; scan_iter.has_next(); scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); + for (fs_inst *scan_inst = (fs_inst *)inst->next; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || @@ -1153,7 +1309,8 @@ fs_visitor::register_coalesce() interfered = true; break; } - if (scan_inst->dst.reg == inst->src[0].reg && + if (inst->src[0].file == GRF && + scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || scan_inst->is_tex())) { interfered = true; @@ -1161,10 +1318,13 @@ fs_visitor::register_coalesce() } } - /* The gen6 MATH instruction can't handle source modifiers, so avoid - * coalescing those for now. We should do something more specific. + /* The gen6 MATH instruction can't handle source modifiers or + * unusual register regions, so avoid coalescing those for + * now. We should do something more specific. */ - if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) { + if (intel->gen >= 6 && + scan_inst->is_math() && + (has_source_modifiers || inst->src[0].file == UNIFORM)) { interfered = true; break; } @@ -1176,19 +1336,17 @@ fs_visitor::register_coalesce() /* Rewrite the later usage to point at the source of the move to * be removed. */ - for (exec_list_iterator scan_iter = iter; scan_iter.has_next(); - scan_iter.next()) { - fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - + for (fs_inst *scan_inst = inst; + !scan_inst->is_tail_sentinel(); + scan_inst = (fs_inst *)scan_inst->next) { for (int i = 0; i < 3; i++) { if (scan_inst->src[i].file == GRF && scan_inst->src[i].reg == inst->dst.reg && scan_inst->src[i].reg_offset == inst->dst.reg_offset) { - scan_inst->src[i].reg = inst->src[0].reg; - scan_inst->src[i].reg_offset = inst->src[0].reg_offset; - scan_inst->src[i].abs |= inst->src[0].abs; - scan_inst->src[i].negate ^= inst->src[0].negate; - scan_inst->src[i].smear = inst->src[0].smear; + fs_reg new_src = inst->src[0]; + new_src.negate ^= scan_inst->src[i].negate; + new_src.abs |= scan_inst->src[i].abs; + scan_inst->src[i] = new_src; } } } @@ -1212,8 +1370,8 @@ fs_visitor::compute_to_mrf() calculate_live_intervals(); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; int ip = next_ip; next_ip++; @@ -1228,9 +1386,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (inst->dst.reg & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (c->dispatch_width == 16 && (!inst->force_uncompressed && !inst->force_sechalf)) { @@ -1297,7 +1455,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->dst.reg = inst->dst.reg; scan_inst->saturate |= inst->saturate; inst->remove(); progress = true; @@ -1334,10 +1492,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.reg & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (c->dispatch_width == 16 && (!scan_inst->force_uncompressed && @@ -1392,8 +1550,8 @@ fs_visitor::remove_duplicate_mrf_writes() memset(last_mrf_move, 0, sizeof(last_mrf_move)); - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list_safe(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; switch (inst->opcode) { case BRW_OPCODE_DO: @@ -1409,7 +1567,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(); progress = true; @@ -1419,7 +1577,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.hw_reg] = NULL; + last_mrf_move[inst->dst.reg] = NULL; } if (inst->mlen > 0) { @@ -1445,7 +1603,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF && inst->src[0].file == GRF && !inst->predicated) { - last_mrf_move[inst->dst.hw_reg] = inst; + last_mrf_move[inst->dst.reg] = inst; } } @@ -1527,8 +1685,8 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_iter(exec_list_iterator, iter, *shader->ir) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1550,11 +1708,14 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; progress = propagate_constants() || progress; + progress = opt_algebraic() || progress; progress = register_coalesce() || progress; progress = compute_to_mrf() || progress; progress = dead_code_eliminate() || progress; } while (progress); + remove_dead_constants(); + schedule_instructions(); assign_curb_setup(); @@ -1563,7 +1724,7 @@ fs_visitor::run() if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = virtual_grf_next; - for (int i = 1; i < virtual_grf_count; i++) { + for (int i = 0; i < virtual_grf_count; i++) { spill_reg(i); } } @@ -1625,7 +1786,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, fs_visitor v(c, prog, shader); if (!v.run()) { prog->LinkStatus = GL_FALSE; - prog->InfoLog = ralloc_strdup(prog, v.fail_msg); + ralloc_strcat(&prog->InfoLog, v.fail_msg); return false; } @@ -1633,7 +1794,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; fs_visitor v2(c, prog, shader); - v2.import_uniforms(v.variable_ht); + v2.import_uniforms(&v); v2.run(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2bf850e5dea..10f45f30fe9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -25,6 +25,8 @@ * */ +#include "brw_shader.h" + extern "C" { #include <sys/types.h> @@ -51,37 +53,10 @@ enum register_file { MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[hw_reg] */ + UNIFORM, /* prog_data->params[reg] */ BAD_FILE }; -enum fs_opcodes { - FS_OPCODE_FB_WRITE = 256, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_TEX, - FS_OPCODE_TXB, - FS_OPCODE_TXD, - FS_OPCODE_TXL, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, -}; - - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's @@ -99,7 +74,6 @@ public: void init() { memset(this, 0, sizeof(*this)); - this->hw_reg = -1; this->smear = -1; } @@ -146,8 +120,8 @@ public: this->type = fixed_hw_reg.type; } - fs_reg(enum register_file file, int hw_reg); - fs_reg(enum register_file file, int hw_reg, uint32_t type); + fs_reg(enum register_file file, int reg); + fs_reg(enum register_file file, int reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); bool equals(fs_reg *r) @@ -155,7 +129,6 @@ public: return (file == r->file && reg == r->reg && reg_offset == r->reg_offset && - hw_reg == r->hw_reg && type == r->type && negate == r->negate && abs == r->abs && @@ -167,12 +140,17 @@ public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; - /** virtual register number. 0 = fixed hw reg */ + /** + * Register number. For ARF/MRF, it's the hardware register. For + * GRF, it's a virtual register number until register allocation + */ int reg; - /** Offset within the virtual register. */ + /** + * For virtual registers, this is a hardware register offset from + * the start of the register block (for example, a constant index + * in an array access). + */ int reg_offset; - /** HW register number. Generally unset until register allocation. */ - int hw_reg; /** Register type. BRW_REGISTER_TYPE_* */ int type; bool negate; @@ -224,13 +202,13 @@ public: init(); } - fs_inst(int opcode) + fs_inst(enum opcode opcode) { init(); this->opcode = opcode; } - fs_inst(int opcode, fs_reg dst) + fs_inst(enum opcode opcode, fs_reg dst) { init(); this->opcode = opcode; @@ -240,7 +218,7 @@ public: assert(dst.reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) { init(); this->opcode = opcode; @@ -253,7 +231,7 @@ public: assert(src[0].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { init(); this->opcode = opcode; @@ -269,7 +247,7 @@ public: assert(src[1].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) { init(); this->opcode = opcode; @@ -313,22 +291,23 @@ public: return (opcode == FS_OPCODE_TEX || opcode == FS_OPCODE_TXB || opcode == FS_OPCODE_TXD || - opcode == FS_OPCODE_TXL); + opcode == FS_OPCODE_TXL || + opcode == FS_OPCODE_TXS); } bool is_math() { - return (opcode == FS_OPCODE_RCP || - opcode == FS_OPCODE_RSQ || - opcode == FS_OPCODE_SQRT || - opcode == FS_OPCODE_EXP2 || - opcode == FS_OPCODE_LOG2 || - opcode == FS_OPCODE_SIN || - opcode == FS_OPCODE_COS || - opcode == FS_OPCODE_POW); + return (opcode == SHADER_OPCODE_RCP || + opcode == SHADER_OPCODE_RSQ || + opcode == SHADER_OPCODE_SQRT || + opcode == SHADER_OPCODE_EXP2 || + opcode == SHADER_OPCODE_LOG2 || + opcode == SHADER_OPCODE_SIN || + opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_POW); } - int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; bool saturate; @@ -402,7 +381,7 @@ public: this->base_ir = NULL; this->virtual_grf_sizes = NULL; - this->virtual_grf_next = 1; + this->virtual_grf_next = 0; this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; @@ -421,7 +400,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); - void import_uniforms(struct hash_table *src_variable_ht); + void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -445,27 +424,28 @@ public: fs_inst *emit(fs_inst inst); - fs_inst *emit(int opcode) + fs_inst *emit(enum opcode opcode) { return emit(fs_inst(opcode)); } - fs_inst *emit(int opcode, fs_reg dst) + fs_inst *emit(enum opcode opcode, fs_reg dst) { return emit(fs_inst(opcode, dst)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) { return emit(fs_inst(opcode, dst, src0)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { return emit(fs_inst(opcode, dst, src0, src1)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst *emit(enum opcode opcode, fs_reg dst, + fs_reg src0, fs_reg src1, fs_reg src2) { return emit(fs_inst(opcode, dst, src0, src1, src2)); } @@ -485,9 +465,11 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_dead_constants(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); @@ -524,8 +506,8 @@ public: int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, int sampler); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); @@ -565,6 +547,13 @@ public: int *virtual_grf_use; bool live_intervals_valid; + /* This is the map from UNIFORM hw_reg + reg_offset as generated by + * the visitor to the packed uniform number after + * remove_dead_constants() that represents the actual uploaded + * uniform index. + */ + int *params_remap; + struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; int first_non_payload_grf; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 1d89b8f1d11..28efbd3605f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -59,7 +59,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (inst->target > 0) { /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2), + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->target)); } @@ -145,43 +146,12 @@ void fs_visitor::generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { - int op; - - switch (inst->opcode) { - case FS_OPCODE_RCP: - op = BRW_MATH_FUNCTION_INV; - break; - case FS_OPCODE_RSQ: - op = BRW_MATH_FUNCTION_RSQ; - break; - case FS_OPCODE_SQRT: - op = BRW_MATH_FUNCTION_SQRT; - break; - case FS_OPCODE_EXP2: - op = BRW_MATH_FUNCTION_EXP; - break; - case FS_OPCODE_LOG2: - op = BRW_MATH_FUNCTION_LOG; - break; - case FS_OPCODE_POW: - op = BRW_MATH_FUNCTION_POW; - break; - case FS_OPCODE_SIN: - op = BRW_MATH_FUNCTION_SIN; - break; - case FS_OPCODE_COS: - op = BRW_MATH_FUNCTION_COS; - break; - default: - assert(!"not reached: unknown math function"); - op = 0; - break; - } + int op = brw_math_function(inst->opcode); if (intel->gen >= 6) { assert(inst->mlen == 0); - if (inst->opcode == FS_OPCODE_POW) { + if (inst->opcode == SHADER_OPCODE_POW) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); @@ -272,10 +242,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; + case FS_OPCODE_TXS: + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; + break; case FS_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; + default: + assert(!"not reached"); + break; } } else { switch (inst->opcode) { @@ -316,6 +292,14 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; + case FS_OPCODE_TXS: + assert(inst->mlen == 3); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + break; + default: + assert(!"not reached"); + break; } } assert(msg_type != -1); @@ -537,11 +521,9 @@ brw_reg_from_fs_reg(fs_reg *reg) case ARF: case MRF: if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); + brw_reg = brw_vec8_reg(reg->file, reg->reg, 0); } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); + brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) @@ -608,8 +590,8 @@ fs_visitor::generate_code() prog->Name, c->dispatch_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; struct brw_reg src[3], dst; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { @@ -656,6 +638,11 @@ fs_visitor::generate_code() case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); @@ -770,14 +757,14 @@ fs_visitor::generate_code() } break; - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: generate_math(inst, dst, src); break; case FS_OPCODE_PIXEL_X: @@ -796,6 +783,7 @@ fs_visitor::generate_code() case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: generate_tex(inst, dst, src[0]); break; case FS_OPCODE_DISCARD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index b4689d2c293..7c5414ac26c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -25,23 +25,6 @@ * */ -extern "C" { - -#include <sys/types.h> - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -50,45 +33,115 @@ extern "C" { static void assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { - if (reg->file == GRF && reg->reg != 0) { + if (reg->file == GRF) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; - reg->reg = 0; + reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; + reg->reg_offset = 0; } } void fs_visitor::assign_regs_trivial() { - int last_grf = 0; - int hw_reg_mapping[this->virtual_grf_next]; + int hw_reg_mapping[this->virtual_grf_next + 1]; int i; int reg_width = c->dispatch_width / 8; - hw_reg_mapping[0] = 0; /* Note that compressed instructions require alignment to 2 registers. */ - hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 2; i < this->virtual_grf_next; i++) { + hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); + for (i = 1; i <= this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * - reg_width); + this->grf_used = hw_reg_mapping[this->virtual_grf_next]; - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - if (last_grf >= BRW_MAX_GRF) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + this->grf_used, BRW_MAX_GRF); + } + +} + +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int reg_width, + int base_reg_count) +{ + struct intel_context *intel = &brw->intel; + + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->wm.ra_reg_to_grf); + brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->wm.regs); + brw->wm.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->wm.classes); + brw->wm.classes = ralloc_array(brw, int, class_count + 1); + + brw->wm.aligned_pairs_class = -1; + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + int pairs_base_reg = 0; + int pairs_reg_count = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs); + + /* Save this off for the aligned pair class at the end. */ + if (class_sizes[i] == 2) { + pairs_base_reg = reg; + pairs_reg_count = class_reg_count; + } + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg); + + brw->wm.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + /* Add a special class for aligned pairs, which we'll put delta_x/y + * in on gen5 so that we can do PLN. + */ + if (brw->has_pln && reg_width == 1 && intel->gen < 6) { + brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs); + + for (int i = 0; i < pairs_reg_count; i++) { + if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) { + ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class, + pairs_base_reg + i); + } + } + class_count++; } - this->grf_used = last_grf + reg_width; + ra_set_finalize(brw->wm.regs); } bool @@ -101,12 +154,11 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next + 1]; + int hw_reg_mapping[this->virtual_grf_next]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; int class_count = 0; - int aligned_pair_class = -1; calculate_live_intervals(); @@ -125,7 +177,7 @@ fs_visitor::assign_regs() */ class_sizes[class_count++] = 2; } - for (int r = 1; r < this->virtual_grf_next; r++) { + for (int r = 0; r < this->virtual_grf_next; r++) { int i; for (i = 0; i < class_count; i++) { @@ -141,94 +193,26 @@ fs_visitor::assign_regs() } } - int ra_reg_count = 0; - int class_base_reg[class_count]; - int class_reg_count[class_count]; - int classes[class_count + 1]; - - for (int i = 0; i < class_count; i++) { - class_base_reg[i] = ra_reg_count; - class_reg_count[i] = base_reg_count - (class_sizes[i] - 1); - ra_reg_count += class_reg_count[i]; - } - - struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count); - for (int i = 0; i < class_count; i++) { - classes[i] = ra_alloc_reg_class(regs); - - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r); - } + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, + reg_width, base_reg_count); - /* Add conflicts between our contiguous registers aliasing - * base regs and other register classes' contiguous registers - * that alias base regs, or the base regs themselves for classes[0]. - */ - for (int c = 0; c <= i; c++) { - for (int i_r = 0; i_r < class_reg_count[i]; i_r++) { - for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1)); - c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]); - c_r++) { - - if (0) { - printf("%d/%d conflicts %d/%d\n", - class_sizes[i], first_assigned_grf + i_r, - class_sizes[c], first_assigned_grf + c_r); - } - - ra_add_reg_conflict(regs, - class_base_reg[i] + i_r, - class_base_reg[c] + c_r); - } - } - } - } - - /* Add a special class for aligned pairs, which we'll put delta_x/y - * in on gen5 so that we can do PLN. - */ - if (brw->has_pln && reg_width == 1 && intel->gen < 6) { - int reg_count = (base_reg_count - 1) / 2; - int unaligned_pair_class = 1; - assert(class_sizes[unaligned_pair_class] == 2); - - aligned_pair_class = class_count; - classes[aligned_pair_class] = ra_alloc_reg_class(regs); - class_sizes[aligned_pair_class] = 2; - class_base_reg[aligned_pair_class] = 0; - class_reg_count[aligned_pair_class] = 0; - int start = (first_assigned_grf & 1) ? 1 : 0; - - for (int i = 0; i < reg_count; i++) { - ra_class_add_reg(regs, classes[aligned_pair_class], - class_base_reg[unaligned_pair_class] + i * 2 + start); - } - class_count++; - } - - ra_set_finalize(regs); - - struct ra_graph *g = ra_alloc_interference_graph(regs, + struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs, this->virtual_grf_next); - /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 - * with nodes. - */ - ra_set_node_class(g, 0, classes[0]); - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { - if (aligned_pair_class >= 0 && + if (brw->wm.aligned_pairs_class >= 0 && this->delta_x.reg == i) { - ra_set_node_class(g, i, classes[aligned_pair_class]); + ra_set_node_class(g, i, brw->wm.aligned_pairs_class); } else { - ra_set_node_class(g, i, classes[c]); + ra_set_node_class(g, i, brw->wm.classes[c]); } break; } } - for (int j = 1; j < i; j++) { + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -253,7 +237,6 @@ fs_visitor::assign_regs() ralloc_free(g); - ralloc_free(regs); return false; } @@ -263,28 +246,18 @@ fs_visitor::assign_regs() * numbers. */ this->grf_used = first_assigned_grf; - hw_reg_mapping[0] = 0; /* unused */ - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); - int hw_reg = -1; - - for (int c = 0; c < class_count; c++) { - if (reg >= class_base_reg[c] && - reg < class_base_reg[c] + class_reg_count[c]) { - hw_reg = reg - class_base_reg[c]; - break; - } - } - assert(hw_reg >= 0); - hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width; + hw_reg_mapping[i] = (first_assigned_grf + + brw->wm.ra_reg_to_grf[reg] * reg_width); this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->virtual_grf_sizes[i] * reg_width); } - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; assign_reg(hw_reg_mapping, &inst->dst, reg_width); assign_reg(hw_reg_mapping, &inst->src[0], reg_width); @@ -292,7 +265,6 @@ fs_visitor::assign_regs() } ralloc_free(g); - ralloc_free(regs); return true; } @@ -336,8 +308,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { @@ -370,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) no_spill[inst->dst.reg] = true; break; + + default: + break; } } @@ -394,8 +369,8 @@ fs_visitor::spill_reg(int spill_reg) * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_iter(exec_list_iterator, iter, this->instructions) { - fs_inst *inst = (fs_inst *)iter.get(); + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index d8218c26edb..0ea4e5c36f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -25,21 +25,6 @@ * */ -extern "C" { - -#include <sys/types.h> - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -84,26 +69,26 @@ public: int math_latency = 22; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: this->latency = 1 * chans * math_latency; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; - case FS_OPCODE_SQRT: - case FS_OPCODE_LOG2: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: this->latency = 8 * chans * math_latency; break; - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: /* minimum latency, max is 12 rounds. */ this->latency = 5 * chans * math_latency; break; @@ -283,8 +268,8 @@ instruction_scheduler::calculate_deps() memset(last_mrf_write, 0, sizeof(last_mrf_write)); /* top-to-bottom dependencies: RAW and WAW. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; fs_inst *inst = n->inst; /* read-after-write deps. */ @@ -321,12 +306,12 @@ instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -401,12 +386,12 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -437,8 +422,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) int time = 0; /* Remove non-DAG heads from the list. */ - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list_safe(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->parent_count != 0) n->remove(); } @@ -447,8 +432,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) schedule_node *chosen = NULL; int chosen_time = 0; - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (!chosen || n->unblocked_time < chosen_time) { chosen = n; @@ -490,8 +475,8 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header) * progress until the first is done. */ if (chosen->inst->is_math()) { - foreach_iter(exec_list_iterator, iter, instructions) { - schedule_node *n = (schedule_node *)iter.get(); + foreach_list(node, &instructions) { + schedule_node *n = (schedule_node *)node; if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 530ffa26580..a9a60c2fd8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -122,8 +122,8 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var) break; } - foreach_iter(exec_list_iterator, iter, this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) return entry; } @@ -222,8 +222,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var) if (!var->type->is_vector()) return NULL; - foreach_iter(exec_list_iterator, iter, *this->variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &*this->variable_list) { + variable_entry *entry = (variable_entry *)node; if (entry->var == var) { return entry; } @@ -341,8 +341,8 @@ brw_do_vector_splitting(exec_list *instructions) visit_list_elements(&refs, instructions); /* Trim out variables we can't split. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list_safe(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; if (debug) { printf("vector %s@%p: decl %d, whole_access %d\n", @@ -363,8 +363,8 @@ brw_do_vector_splitting(exec_list *instructions) /* Replace the decls of the vectors to be split with their split * components. */ - foreach_iter(exec_list_iterator, iter, refs.variable_list) { - variable_entry *entry = (variable_entry *)iter.get(); + foreach_list(node, &refs.variable_list) { + variable_entry *entry = (variable_entry *)node; const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cbe5cf428c5..cdaf543c88b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir) this->result.type = brw_type_for_base_type(ir->type); if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); + assert(this->result.file == UNIFORM || this->result.file == GRF); this->result.reg_offset += index->value.i[0] * element_size; } else { assert(!"FINISHME: non-constant array element"); @@ -252,14 +250,14 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); + emit_math(SHADER_OPCODE_RCP, this->result, op[0]); break; case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); + emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); break; case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); + emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); break; case ir_unop_exp: case ir_unop_log: @@ -267,11 +265,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sin: case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); + emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); + emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; case ir_unop_dFdx: @@ -289,7 +287,23 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_mul: - emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, reg_null_d, op[0], op[1]); + emit(BRW_OPCODE_MOV, this->result, fs_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, this->result, op[0], op[1]); + } break; case ir_binop_div: assert(!"not reached: should be handled by ir_div_to_mul_rcp"); @@ -342,11 +356,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); + emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); break; case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); + emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); break; case ir_unop_i2u: @@ -425,7 +439,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; case ir_unop_bit_not: @@ -496,7 +510,7 @@ fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r, void fs_visitor::visit(ir_assignment *ir) { - struct fs_reg l, r; + fs_reg l, r; fs_inst *inst; /* FINISHME: arrays on the lhs */ @@ -603,9 +617,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; } else if (ir->op == ir_txd) { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -620,6 +636,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, * dPdx = dudx, dvdx, drdx * dPdy = dudy, dvdy, drdy * + * 1-arg: Does not exist. + * * 2-arg: dudx dvdx dudy dvdy * dPdx.x dPdx.y dPdy.x dPdy.y * m4 m5 m6 m7 @@ -631,18 +649,26 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdx); dPdx.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2); for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) { emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), dPdy); dPdy.reg_offset++; - mlen++; } + mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2); + } else if (ir->op == ir_txs) { + /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */ + simd16 = true; + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += 2; } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. */ + simd16 = true; assert(ir->op == ir_txb || ir->op == ir_txl); for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { @@ -671,16 +697,19 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* The unused upper half. */ mlen++; + } + if (simd16) { /* Now, since we're doing simd16, the return is 2 interleaved * vec4s where the odd-indexed ones are junk. We'll need to move * this weirdness around to the expected layout. */ - simd16 = true; orig_dst = dst; - dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, - 2)); - dst.type = BRW_REGISTER_TYPE_F; + const glsl_type *vec_type = + glsl_type::get_instance(ir->type->base_type, 4, 1); + dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2)); + dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type) + : BRW_REGISTER_TYPE_F; } fs_inst *inst = NULL; @@ -697,6 +726,9 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; + case ir_txs: + inst = emit(FS_OPCODE_TXS, dst); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -732,6 +764,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, int base_mrf = 2; int reg_width = c->dispatch_width / 8; bool header_present = false; + const int vector_elements = + ir->coordinate ? ir->coordinate->type->vector_elements : 0; if (ir->offset) { /* The offsets set up by the ir_texture visitor are in the @@ -742,7 +776,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, base_mrf--; } - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + for (int i = 0; i < vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width), coordinate); @@ -750,7 +784,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->saturate = true; coordinate.reg_offset++; } - mlen += ir->coordinate->type->vector_elements * reg_width; + mlen += vector_elements * reg_width; if (ir->shadow_comparitor && ir->op != ir_txd) { mlen = MAX2(mlen, header_present + 4 * reg_width); @@ -786,9 +820,11 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: { + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -816,6 +852,13 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXD, dst); break; } + case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + inst = emit(FS_OPCODE_TXS, dst); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -850,6 +893,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } if (ir->shadow_comparitor && ir->op != ir_txd) { + this->result = reg_undef; ir->shadow_comparitor->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -860,11 +904,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tex: break; case ir_txb: + this->result = reg_undef; ir->lod_info.bias->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; break; case ir_txl: + this->result = reg_undef; ir->lod_info.lod->accept(this); emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result); mlen += reg_width; @@ -873,9 +919,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, if (c->dispatch_width == 16) fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); + this->result = reg_undef; ir->lod_info.grad.dPdx->accept(this); fs_reg dPdx = this->result; + this->result = reg_undef; ir->lod_info.grad.dPdy->accept(this); fs_reg dPdy = this->result; @@ -900,13 +948,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } break; } + case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } /* Set up the coordinate (except for TXD where it was done earlier) */ - if (ir->op != ir_txd) { + if (ir->op != ir_txd && ir->op != ir_txs) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); @@ -924,7 +978,8 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; - case ir_txf: assert(!"TXF unsupported."); + case ir_txf: assert(!"TXF unsupported."); break; + case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -959,7 +1014,8 @@ fs_visitor::visit(ir_texture *ir) } this->result = reg_undef; - ir->coordinate->accept(this); + if (ir->coordinate) + ir->coordinate->accept(this); fs_reg coordinate = this->result; if (ir->offset != NULL) { @@ -1000,7 +1056,8 @@ fs_visitor::visit(ir_texture *ir) * texture coordinates. We use the program parameter state * tracking to get the scaling factor. */ - if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { + if (intel->gen < 6 && + ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_RECT) { struct gl_program_parameter_list *params = c->fp->program.Base.Parameters; int tokens[STATE_LENGTH] = { STATE_INTERNAL, @@ -1046,7 +1103,7 @@ fs_visitor::visit(ir_texture *ir) /* Writemasking doesn't eliminate channels on SIMD8 texture * samples, so don't worry about them. */ - fs_reg dst = fs_reg(this, glsl_type::vec4_type); + fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1)); if (intel->gen >= 7) { inst = emit_texture_gen7(ir, dst, coordinate, sampler); @@ -1070,6 +1127,7 @@ fs_visitor::visit(ir_texture *ir) if (hw_compare_supported) { inst->shadow_compare = true; } else { + this->result = reg_undef; ir->shadow_comparitor->accept(this); fs_reg ref = this->result; @@ -1465,8 +1523,8 @@ fs_visitor::visit(ir_if *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->then_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->then_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1475,8 +1533,8 @@ fs_visitor::visit(ir_if *ir) if (!ir->else_instructions.is_empty()) { emit(BRW_OPCODE_ELSE); - foreach_iter(exec_list_iterator, iter, ir->else_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->else_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1526,8 +1584,8 @@ fs_visitor::visit(ir_loop *ir) inst->predicated = true; } - foreach_iter(exec_list_iterator, iter, ir->body_instructions) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &ir->body_instructions) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; @@ -1583,8 +1641,8 @@ fs_visitor::visit(ir_function *ir) assert(sig); - foreach_iter(exec_list_iterator, iter, sig->body) { - ir_instruction *ir = (ir_instruction *)iter.get(); + foreach_list(node, &sig->body) { + ir_instruction *ir = (ir_instruction *)node; this->base_ir = ir; this->result = reg_undef; ir->accept(this); @@ -1684,7 +1742,7 @@ fs_visitor::emit_interpolation_setup_gen4() interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); this->current_annotation = NULL; } @@ -1721,7 +1779,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); @@ -1733,6 +1791,7 @@ void fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) { int reg_width = c->dispatch_width / 8; + fs_inst *inst; if (c->dispatch_width == 8 || intel->gen == 6) { /* SIMD8 write looks like: @@ -1751,8 +1810,10 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * m + 6: a0 * m + 7: a1 */ - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width), - color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, first_color_mrf + index * reg_width), + color); + inst->saturate = c->key.clamp_fragment_color; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -1770,16 +1831,22 @@ fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color) * usual destination + 1 for the second half we get * destination + 4. */ - emit(BRW_OPCODE_MOV, - fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, + fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; } else { push_force_uncompressed(); - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_uncompressed(); push_force_sechalf(); color.sechalf = true; - emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color); + inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), + color); + inst->saturate = c->key.clamp_fragment_color; pop_force_sechalf(); color.sechalf = false; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 03cebbb824b..f7e6e7c81d1 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -46,7 +46,7 @@ static void upload_drawing_rect(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); /* xmin, ymin */ OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | ((ctx->DrawBuffer->Height - 1) << 16)); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 6674f1640c8..09b5be4c96e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -226,6 +226,34 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, return GL_TRUE; } +/* Per-thread scratch space is a power-of-two multiple of 1KB. */ +int +brw_get_scratch_size(int size) +{ + int i; + + for (i = 1024; i < size; i *= 2) + ; + + return i; +} + +void +brw_get_scratch_bo(struct intel_context *intel, + drm_intel_bo **scratch_bo, int size) +{ + drm_intel_bo *old_bo = *scratch_bo; + + if (old_bo && old_bo->size < size) { + drm_intel_bo_unreference(old_bo); + old_bo = NULL; + } + + if (!old_bo) { + *scratch_bo = drm_intel_bo_alloc(intel->bufmgr, "scratch bo", size, 4096); + } +} + void brwInitFragProgFuncs( struct dd_function_table *functions ) { assert(functions->ProgramStringNotify == _tnl_program_string); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9471883fb2b..3ff6bbaed47 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -24,6 +24,7 @@ extern "C" { #include "main/macros.h" #include "brw_context.h" +#include "brw_vs.h" } #include "brw_fs.h" #include "../glsl/ir_optimization.h" @@ -67,6 +68,9 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog) if (!brw_fs_precompile(ctx, prog)) return false; + if (!brw_vs_precompile(ctx, prog)) + return false; + return true; } @@ -75,10 +79,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { struct brw_context *brw = brw_context(ctx); struct intel_context *intel = &brw->intel; + unsigned int stage; + + for (stage = 0; stage < ARRAY_SIZE(prog->_LinkedShaders); stage++) { + struct brw_shader *shader = + (struct brw_shader *)prog->_LinkedShaders[stage]; + + if (!shader) + continue; - struct brw_shader *shader = - (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (shader != NULL) { void *mem_ctx = ralloc_context(NULL); bool progress; @@ -106,18 +115,22 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) brw_do_cubemap_normalize(shader->ir); lower_noise(shader->ir); lower_quadop_vector(shader->ir, false); + + bool input = true; + bool output = stage == MESA_SHADER_FRAGMENT; + bool temp = stage == MESA_SHADER_FRAGMENT; + bool uniform = true; + lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ); + input, output, temp, uniform); do { progress = false; - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); + if (stage == MESA_SHADER_FRAGMENT) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ @@ -192,3 +205,29 @@ brw_conditional_for_comparison(unsigned int op) return BRW_CONDITIONAL_NZ; } } + +uint32_t +brw_math_function(enum opcode op) +{ + switch (op) { + case SHADER_OPCODE_RCP: + return BRW_MATH_FUNCTION_INV; + case SHADER_OPCODE_RSQ: + return BRW_MATH_FUNCTION_RSQ; + case SHADER_OPCODE_SQRT: + return BRW_MATH_FUNCTION_SQRT; + case SHADER_OPCODE_EXP2: + return BRW_MATH_FUNCTION_EXP; + case SHADER_OPCODE_LOG2: + return BRW_MATH_FUNCTION_LOG; + case SHADER_OPCODE_POW: + return BRW_MATH_FUNCTION_POW; + case SHADER_OPCODE_SIN: + return BRW_MATH_FUNCTION_SIN; + case SHADER_OPCODE_COS: + return BRW_MATH_FUNCTION_COS; + default: + assert(!"not reached: unknown math function"); + return 0; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 4c568a26caa..1054d7a589e 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,5 +21,11 @@ * IN THE SOFTWARE. */ +#include <stdint.h> +#include "brw_defines.h" + +#pragma once + int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); +uint32_t brw_math_function(enum opcode op); diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b9e5cc1a534..cb7a3ef73d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -455,6 +455,23 @@ dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) } } +static void +dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "WM_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + static void dump_binding_table(struct brw_context *brw, uint32_t offset, uint32_t size) { @@ -602,6 +619,9 @@ dump_state_batch(struct brw_context *brw) case AUB_TRACE_VS_CONSTANTS: dump_vs_constants(brw, offset, size); break; + case AUB_TRACE_WM_CONSTANTS: + dump_wm_constants(brw, offset, size); + break; default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index f462f32b19a..46a417a08ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -60,7 +60,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, * given in Volume 1 of the BSpec. */ h0 = ALIGN(mt->height0, align_h); - h1 = ALIGN(minify(h0), align_h); + h1 = ALIGN(minify(mt->height0), align_h); qpitch = (h0 + h1 + (intel->gen >= 7 ? 12 : 11) * align_h); if (mt->compressed) qpitch /= 4; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp new file mode 100644 index 00000000000..760bc1f7acd --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -0,0 +1,161 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +#include "program/prog_parameter.h" +} + +#define MAX_INSTRUCTION (1 << 30) + +namespace brw { + +void +vec4_visitor::calculate_live_intervals() +{ + int *def = ralloc_array(mem_ctx, int, virtual_grf_count); + int *use = ralloc_array(mem_ctx, int, virtual_grf_count); + int loop_depth = 0; + int loop_start = 0; + + if (this->live_intervals_valid) + return; + + for (int i = 0; i < virtual_grf_count; i++) { + def[i] = MAX_INSTRUCTION; + use[i] = -1; + } + + int ip = 0; + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->opcode == BRW_OPCODE_DO) { + if (loop_depth++ == 0) + loop_start = ip; + } else if (inst->opcode == BRW_OPCODE_WHILE) { + loop_depth--; + + if (loop_depth == 0) { + /* Patches up the use of vars marked for being live across + * the whole loop. + */ + for (int i = 0; i < virtual_grf_count; i++) { + if (use[i] == loop_start) { + use[i] = ip; + } + } + } + } else { + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + int reg = inst->src[i].reg; + + if (!loop_depth) { + use[reg] = ip; + } else { + def[reg] = MIN2(loop_start, def[reg]); + use[reg] = loop_start; + + /* Nobody else is going to go smash our start to + * later in the loop now, because def[reg] now + * points before the bb header. + */ + } + } + } + if (inst->dst.file == GRF) { + int reg = inst->dst.reg; + + if (!loop_depth) { + def[reg] = MIN2(def[reg], ip); + } else { + def[reg] = MIN2(def[reg], loop_start); + } + } + } + + ip++; + } + + ralloc_free(this->virtual_grf_def); + ralloc_free(this->virtual_grf_use); + this->virtual_grf_def = def; + this->virtual_grf_use = use; + + this->live_intervals_valid = true; +} + +bool +vec4_visitor::virtual_grf_interferes(int a, int b) +{ + int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); + int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); + + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); + + return start < end; +} + +/** + * Must be called after calculate_live_intervales() to remove unused + * writes to registers -- register allocation will fail otherwise + * because something deffed but not used won't be considered to + * interfere with other regs. + */ +bool +vec4_visitor::dead_code_eliminate() +{ + bool progress = false; + int pc = 0; + + calculate_live_intervals(); + + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { + inst->remove(); + progress = true; + } + + pc++; + } + + if (progress) + live_intervals_valid = false; + + return progress; +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h new file mode 100644 index 00000000000..1db910e2b99 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -0,0 +1,489 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_VEC4_H +#define BRW_VEC4_H + +#include <stdint.h> +#include "brw_shader.h" +#include "main/compiler.h" +#include "program/hash_table.h" + +extern "C" { +#include "brw_vs.h" +#include "brw_context.h" +#include "brw_eu.h" +}; + +#include "../glsl/ir.h" + +namespace brw { + +class dst_reg; + +/** + * Common helper for constructing swizzles. When only a subset of + * channels of a vec4 are used, we don't want to reference the other + * channels, as that will tell optimization passes that those other + * channels are used. + */ +static int +swizzle_for_size(int size) +{ + int size_swizzles[4] = { + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), + BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), + }; + + assert((size >= 1) && (size <= 4)); + return size_swizzles[size - 1]; +} + +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + HW_REG, /* a struct brw_reg */ + ATTR, + UNIFORM, /* prog_data->params[hw_reg] */ + BAD_FILE +}; + +class reg +{ +public: + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** virtual register number. 0 = fixed hw reg */ + int reg; + /** Offset within the virtual register. */ + int reg_offset; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + bool sechalf; + struct brw_reg fixed_hw_reg; + int smear; /* -1, or a channel of the reg to smear to all channels. */ + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +class src_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + + this->file = BAD_FILE; + } + + src_reg(register_file file, int reg, const glsl_type *type) + { + init(); + + this->file = file; + this->reg = reg; + if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) + this->swizzle = swizzle_for_size(type->vector_elements); + else + this->swizzle = SWIZZLE_XYZW; + } + + /** Generic unset register constructor. */ + src_reg() + { + init(); + } + + src_reg(float f) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + src_reg(uint32_t u) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.f = u; + } + + src_reg(int32_t i) + { + init(); + + this->file = IMM; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + src_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit src_reg(dst_reg reg); + + GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */ + bool negate; + bool abs; + + src_reg *reladdr; +}; + +class dst_reg : public reg +{ +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = ralloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + void init() + { + memset(this, 0, sizeof(*this)); + this->file = BAD_FILE; + this->writemask = WRITEMASK_XYZW; + } + + dst_reg() + { + init(); + } + + dst_reg(register_file file, int reg) + { + init(); + + this->file = file; + this->reg = reg; + } + + dst_reg(struct brw_reg reg) + { + init(); + + this->file = HW_REG; + this->fixed_hw_reg = reg; + } + + dst_reg(class vec4_visitor *v, const struct glsl_type *type); + + explicit dst_reg(src_reg reg); + + int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ + + src_reg *reladdr; +}; + +class vec4_instruction : public exec_node { +public: + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = rzalloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + struct brw_reg get_dst(void); + struct brw_reg get_src(int i); + + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + dst_reg dst; + src_reg src[3]; + + bool saturate; + bool predicate_inverse; + uint32_t predicate; + + int conditional_mod; /**< BRW_CONDITIONAL_* */ + + int sampler; + int target; /**< MRT target. */ + bool shadow_compare; + + bool eot; + bool header_present; + int mlen; /**< SEND message length */ + int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ + + uint32_t offset; /* spill/unspill offset */ + /** @{ + * Annotation for the generated IR. One of the two can be set. + */ + ir_instruction *ir; + const char *annotation; +}; + +class vec4_visitor : public ir_visitor +{ +public: + vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, struct brw_shader *shader); + ~vec4_visitor(); + + dst_reg dst_null_f() + { + return dst_reg(brw_null_reg()); + } + + dst_reg dst_null_d() + { + return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } + + dst_reg dst_null_cmp() + { + if (intel->gen > 4) + return dst_null_d(); + else + return dst_null_f(); + } + + struct brw_context *brw; + const struct gl_vertex_program *vp; + struct intel_context *intel; + struct gl_context *ctx; + struct brw_vs_compile *c; + struct brw_vs_prog_data *prog_data; + struct brw_compile *p; + struct brw_shader *shader; + struct gl_shader_program *prog; + void *mem_ctx; + exec_list instructions; + + char *fail_msg; + bool failed; + + /** + * GLSL IR currently being processed, which is associated with our + * driver IR instructions for debugging purposes. + */ + ir_instruction *base_ir; + const char *current_annotation; + + int *virtual_grf_sizes; + int virtual_grf_count; + int virtual_grf_array_size; + int first_non_payload_grf; + int *virtual_grf_def; + int *virtual_grf_use; + bool live_intervals_valid; + + dst_reg *variable_storage(ir_variable *var); + + void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); + + src_reg src_reg_for_float(float val); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_variable *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_texture *); + virtual void visit(ir_if *); + /*@}*/ + + src_reg result; + + /* Regs for vertex results. Generated at ir_variable visiting time + * for the ir->location's used. + */ + dst_reg output_reg[VERT_RESULT_MAX]; + int uniform_size[MAX_UNIFORMS]; + int uniforms; + + struct hash_table *variable_ht; + + bool run(void); + void fail(const char *msg, ...); + + int virtual_grf_alloc(int size); + int setup_uniform_values(int loc, const glsl_type *type); + void setup_builtin_uniform_values(ir_variable *ir); + int setup_attributes(int payload_reg); + int setup_uniforms(int payload_reg); + void setup_payload(); + void reg_allocate_trivial(); + void reg_allocate(); + void move_grf_array_access_to_scratch(); + void calculate_live_intervals(); + bool dead_code_eliminate(); + bool virtual_grf_interferes(int a, int b); + + vec4_instruction *emit(enum opcode opcode); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1); + + vec4_instruction *emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2); + + bool try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst); + + /** Walks an exec_list of ir_instruction and sends it through this visitor. */ + void visit_instructions(const exec_list *list); + + void emit_bool_to_cond_code(ir_rvalue *ir); + void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1); + void emit_if_gen6(ir_if *ir); + + void emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated); + + void emit_constant_values(dst_reg *dst, ir_constant *value); + + /** + * Emit the correct dot-product instruction for the type of arguments + */ + void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0); + + void emit_scalar(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, src_reg src0, src_reg src1); + + void emit_scs(ir_instruction *ir, enum prog_opcode op, + dst_reg dst, const src_reg &src); + + void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src); + void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1); + + int emit_vue_header_gen6(int header_mrf); + int emit_vue_header_gen4(int header_mrf); + void emit_urb_writes(void); + + src_reg get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset); + void emit_scratch_read(vec4_instruction *inst, + dst_reg dst, + src_reg orig_src, + int base_offset); + void emit_scratch_write(vec4_instruction *inst, + src_reg temp, + dst_reg orig_dst, + int base_offset); + + GLboolean try_emit_sat(ir_expression *ir); + + bool process_move_condition(ir_rvalue *ir); + + void generate_code(); + void generate_vs_instruction(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg *src); + + void generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + + void generate_urb_write(vec4_instruction *inst); + void generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index); + void generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index); + void generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index); +}; + +} /* namespace brw */ + +#endif /* BRW_VEC4_H */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp new file mode 100644 index 00000000000..65ac7d9dc09 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -0,0 +1,854 @@ +/* Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +extern "C" { +#include "brw_eu.h" +}; + +using namespace brw; + +namespace brw { + +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + + /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED + * attributes come in as floating point conversions of the + * integer values. + */ + if (c->key.gl_fixed_input_size[i] != 0) { + struct brw_reg reg = brw_vec8_grf(attribute_map[i], 0); + + brw_MUL(p, + brw_writemask(reg, (1 << c->key.gl_fixed_input_size[i]) - 1), + reg, brw_imm_f(1.0 / 65536.0)); + } + } + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = reg; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + return payload_reg + nr_attributes; +} + +int +vec4_visitor::setup_uniforms(int reg) +{ + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + if (intel->gen >= 6) { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (int i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } + } + + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (intel->gen < 6 && this->uniforms == 0) { + this->uniform_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + + c->prog_data.param[slot] = NULL; + c->prog_data.param_convert[slot] = PARAM_CONVERT_ZERO; + } + + this->uniforms++; + reg++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + /* for now, we are not doing any elimination of unused slots, nor + * are we packing our uniforms. + */ + c->prog_data.nr_params = this->uniforms * 4; + + c->prog_data.curb_read_length = reg - 1; + c->prog_data.uses_new_param_layout = true; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +struct brw_reg +vec4_instruction::get_dst(void) +{ + struct brw_reg brw_reg; + + switch (dst.file) { + case GRF: + brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + brw_reg = retype(brw_reg, dst.type); + brw_reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + brw_reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + brw_reg = brw_null_reg(); + break; + + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + return brw_reg; +} + +struct brw_reg +vec4_instruction::get_src(int i) +{ + struct brw_reg brw_reg; + + switch (src[i].file) { + case GRF: + brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case IMM: + switch (src[i].type) { + case BRW_REGISTER_TYPE_F: + brw_reg = brw_imm_f(src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + brw_reg = brw_imm_d(src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + brw_reg = brw_imm_ud(src[i].imm.u); + break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + break; + + case UNIFORM: + brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2, + ((src[i].reg + src[i].reg_offset) % 2) * 4), + 0, 4, 1); + brw_reg = retype(brw_reg, src[i].type); + brw_reg.dw1.bits.swizzle = src[i].swizzle; + if (src[i].abs) + brw_reg = brw_abs(brw_reg); + if (src[i].negate) + brw_reg = negate(brw_reg); + break; + + case HW_REG: + brw_reg = src[i].fixed_hw_reg; + break; + + case BAD_FILE: + /* Probably unused. */ + brw_reg = brw_null_reg(); + break; + case ATTR: + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; + } + + return brw_reg; +} + +void +vec4_visitor::generate_math1_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); +} + +static void +check_gen6_math_src_arg(struct brw_reg src) +{ + /* Source swizzles are ignored. */ + assert(!src.abs); + assert(!src.negate); + assert(src.dw1.bits.swizzle = BRW_SWIZZLE_XYZW); +} + +void +vec4_visitor::generate_math1_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + check_gen6_math_src_arg(src); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen6(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + /* Source swizzles are ignored. */ + check_gen6_math_src_arg(src0); + check_gen6_math_src_arg(src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + dst, + brw_math_function(inst->opcode), + src0, src1); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_math2_gen4(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* Can't do writemask because math can't be align16. */ + assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); + + brw_MOV(p, brw_message_reg(inst->base_mrf + 1), src1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math(p, + dst, + brw_math_function(inst->opcode), + BRW_MATH_SATURATE_NONE, + inst->base_mrf, + src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + brw_set_access_mode(p, BRW_ALIGN_16); +} + +void +vec4_visitor::generate_urb_write(vec4_instruction *inst) +{ + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + inst->base_mrf, /* starting mrf reg nr */ + brw_vec8_grf(0, 0), /* src */ + false, /* allocate */ + true, /* used */ + inst->mlen, + 0, /* response len */ + inst->eot, /* eot */ + inst->eot, /* writes complete */ + inst->offset, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); +} + +void +vec4_visitor::generate_oword_dual_block_offsets(struct brw_reg m1, + struct brw_reg index) +{ + int second_vertex_offset; + + if (intel->gen >= 6) + second_vertex_offset = 1; + else + second_vertex_offset = 16; + + m1 = retype(m1, BRW_REGISTER_TYPE_D); + + /* Set up M1 (message payload). Only the block offsets in M1.0 and + * M1.4 are used, and the rest are ignored. + */ + struct brw_reg m1_0 = suboffset(vec1(m1), 0); + struct brw_reg m1_4 = suboffset(vec1(m1), 4); + struct brw_reg index_0 = suboffset(vec1(index), 0); + struct brw_reg index_4 = suboffset(vec1(index), 4); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MOV(p, m1_0, index_0); + + brw_set_predicate_inverse(p, true); + if (index.file == BRW_IMMEDIATE_VALUE) { + index_4.dw1.ud++; + brw_MOV(p, m1_4, index_4); + } else { + brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset)); + } + + brw_pop_insn_state(p); +} + +void +vec4_visitor::generate_scratch_read(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) +{ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else if (intel->gen == 5 || intel->is_g4x) + msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + else + msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_read_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, + 2, /* mlen */ + 1 /* rlen */); +} + +void +vec4_visitor::generate_scratch_write(vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) +{ + /* If the instruction is predicated, we'll predicate the send, not + * the header setup. + */ + brw_set_predicate_control(p, false); + + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_D), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); + } + + generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + index); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), + retype(src, BRW_REGISTER_TYPE_D)); + + uint32_t msg_type; + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; + + brw_set_predicate_control(p, inst->predicate); + + /* Each of the 8 channel enables is considered for whether each + * dword is written. + */ + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_set_dest(p, send, dst); + brw_set_src0(p, send, brw_message_reg(inst->base_mrf)); + brw_set_dp_write_message(p, send, + 255, /* binding table index: stateless access */ + BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, + msg_type, + 3, /* mlen */ + true, /* header present */ + false, /* pixel scoreboard */ + 0, /* rlen */ + false, /* eot */ + false /* commit */); +} + +void +vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, + struct brw_reg dst, + struct brw_reg *src) +{ + vec4_instruction *inst = (vec4_instruction *)instruction; + + switch (inst->opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math1_gen4(inst, dst, src[0]); + } + break; + + case SHADER_OPCODE_POW: + if (intel->gen >= 6) { + generate_math2_gen6(inst, dst, src[0], src[1]); + } else { + generate_math2_gen4(inst, dst, src[0], src[1]); + } + break; + + case VS_OPCODE_URB_WRITE: + generate_urb_write(inst); + break; + + case VS_OPCODE_SCRATCH_READ: + generate_scratch_read(inst, dst, src[0]); + break; + + case VS_OPCODE_SCRATCH_WRITE: + generate_scratch_write(inst, dst, src[0], src[1]); + break; + + default: + if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { + fail("unsupported opcode in `%s' in VS\n", + brw_opcodes[inst->opcode].name); + } else { + fail("Unsupported opcode %d in VS", inst->opcode); + } + } +} + +bool +vec4_visitor::run() +{ + /* Generate VS IR for main(). (the visitor only descends into + * functions called "main"). + */ + visit_instructions(shader->ir); + + emit_urb_writes(); + + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + move_grf_array_access_to_scratch(); + + bool progress; + do { + progress = false; + progress = dead_code_eliminate() || progress; + } while (progress); + + if (failed) + return false; + + setup_payload(); + reg_allocate(); + + if (failed) + return false; + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + +void +vec4_visitor::generate_code() +{ + int last_native_inst = p->nr_insn; + const char *last_annotation_string = NULL; + ir_instruction *last_annotation_ir = NULL; + + int loop_stack_array_size = 16; + int loop_stack_depth = 0; + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("Native code for vertex shader %d:\n", prog->Name); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + struct brw_reg src[3], dst; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (last_annotation_ir != inst->ir) { + last_annotation_ir = inst->ir; + if (last_annotation_ir) { + printf(" "); + last_annotation_ir->print(); + printf("\n"); + } + } + if (last_annotation_string != inst->annotation) { + last_annotation_string = inst->annotation; + if (last_annotation_string) + printf(" %s\n", last_annotation_string); + } + } + + for (unsigned int i = 0; i < 3; i++) { + src[i] = inst->get_src(i); + } + dst = inst->get_dst(); + + brw_set_conditionalmod(p, inst->conditional_mod); + brw_set_predicate_control(p, inst->predicate); + brw_set_predicate_inverse(p, inst->predicate_inverse); + brw_set_saturate(p, inst->saturate); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case BRW_OPCODE_ADD: + brw_ADD(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MUL: + brw_MUL(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_MACH: + brw_set_acc_write_control(p, 1); + brw_MACH(p, dst, src[0], src[1]); + brw_set_acc_write_control(p, 0); + break; + + case BRW_OPCODE_FRC: + brw_FRC(p, dst, src[0]); + break; + case BRW_OPCODE_RNDD: + brw_RNDD(p, dst, src[0]); + break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; + case BRW_OPCODE_RNDZ: + brw_RNDZ(p, dst, src[0]); + break; + + case BRW_OPCODE_AND: + brw_AND(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_OR: + brw_OR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_XOR: + brw_XOR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_NOT: + brw_NOT(p, dst, src[0]); + break; + case BRW_OPCODE_ASR: + brw_ASR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHR: + brw_SHR(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_SHL: + brw_SHL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_CMP: + brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); + break; + case BRW_OPCODE_SEL: + brw_SEL(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP4: + brw_DP4(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP3: + brw_DP3(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_DP2: + brw_DP2(p, dst, src[0], src[1]); + break; + + case BRW_OPCODE_IF: + if (inst->src[0].file != BAD_FILE) { + /* The instruction has an embedded compare (only allowed on gen6) */ + assert(intel->gen == 6); + gen6_IF(p, inst->conditional_mod, src[0], src[1]); + } else { + struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8); + brw_inst->header.predicate_control = inst->predicate; + } + if_depth_in_loop[loop_stack_depth]++; + break; + + case BRW_OPCODE_ELSE: + brw_ELSE(p); + break; + case BRW_OPCODE_ENDIF: + brw_ENDIF(p); + if_depth_in_loop[loop_stack_depth]--; + break; + + case BRW_OPCODE_DO: + loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } + if_depth_in_loop[loop_stack_depth] = 0; + break; + + case BRW_OPCODE_BREAK: + brw_BREAK(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case BRW_OPCODE_CONTINUE: + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + gen6_CONT(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + + case BRW_OPCODE_WHILE: { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (intel->gen >= 5) + br = 2; + + assert(loop_stack_depth > 0); + loop_stack_depth--; + inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } + break; + + default: + generate_vs_instruction(inst, dst, src); + break; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = last_native_inst; i < p->nr_insn; i++) { + if (0) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + } + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + + last_native_inst = p->nr_insn; + } + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("\n"); + } + + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } +} + +extern "C" { + +bool +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) +{ + if (!prog) + return false; + + struct brw_shader *shader = + (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + if (!shader) + return false; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + prog->LinkStatus = GL_FALSE; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + return false; + } + + return true; +} + +} /* extern "C" */ + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp new file mode 100644 index 00000000000..3f052ff64cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -0,0 +1,234 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +extern "C" { +#include "main/macros.h" +#include "program/register_allocate.h" +} /* extern "C" */ + +#include "brw_vec4.h" +#include "../glsl/ir_print_visitor.h" + +using namespace brw; + +namespace brw { + +static void +assign(int *reg_hw_locations, reg *reg) +{ + if (reg->file == GRF) { + reg->reg = reg_hw_locations[reg->reg]; + } +} + +void +vec4_visitor::reg_allocate_trivial() +{ + int hw_reg_mapping[this->virtual_grf_count]; + bool virtual_grf_used[this->virtual_grf_count]; + int i; + int next; + + /* Calculate which virtual GRFs are actually in use after whatever + * optimization passes have occurred. + */ + for (int i = 0; i < this->virtual_grf_count; i++) { + virtual_grf_used[i] = false; + } + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + if (inst->dst.file == GRF) + virtual_grf_used[inst->dst.reg] = true; + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) + virtual_grf_used[inst->src[i].reg] = true; + } + } + + hw_reg_mapping[0] = this->first_non_payload_grf; + next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; + for (i = 1; i < this->virtual_grf_count; i++) { + if (virtual_grf_used[i]) { + hw_reg_mapping[i] = next; + next += this->virtual_grf_sizes[i]; + } + } + prog_data->total_grf = next; + + foreach_iter(exec_list_iterator, iter, this->instructions) { + vec4_instruction *inst = (vec4_instruction *)iter.get(); + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + if (prog_data->total_grf > BRW_MAX_GRF) { + fail("Ran out of regs on trivial allocator (%d/%d)\n", + prog_data->total_grf, BRW_MAX_GRF); + } +} + +static void +brw_alloc_reg_set_for_classes(struct brw_context *brw, + int *class_sizes, + int class_count, + int base_reg_count) +{ + /* Compute the total number of registers across all classes. */ + int ra_reg_count = 0; + for (int i = 0; i < class_count; i++) { + ra_reg_count += base_reg_count - (class_sizes[i] - 1); + } + + ralloc_free(brw->vs.ra_reg_to_grf); + brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count); + ralloc_free(brw->vs.regs); + brw->vs.regs = ra_alloc_reg_set(ra_reg_count); + ralloc_free(brw->vs.classes); + brw->vs.classes = ralloc_array(brw, int, class_count + 1); + + /* Now, add the registers to their classes, and add the conflicts + * between them and the base GRF registers (and also each other). + */ + int reg = 0; + for (int i = 0; i < class_count; i++) { + int class_reg_count = base_reg_count - (class_sizes[i] - 1); + brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs); + + for (int j = 0; j < class_reg_count; j++) { + ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg); + + brw->vs.ra_reg_to_grf[reg] = j; + + for (int base_reg = j; + base_reg < j + class_sizes[i]; + base_reg++) { + ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg); + } + + reg++; + } + } + assert(reg == ra_reg_count); + + ra_set_finalize(brw->vs.regs); +} + +void +vec4_visitor::reg_allocate() +{ + int hw_reg_mapping[virtual_grf_count]; + int first_assigned_grf = this->first_non_payload_grf; + int base_reg_count = BRW_MAX_GRF - first_assigned_grf; + int class_sizes[base_reg_count]; + int class_count = 0; + + /* Using the trivial allocator can be useful in debugging undefined + * register access as a result of broken optimization passes. + */ + if (0) { + reg_allocate_trivial(); + return; + } + + calculate_live_intervals(); + + /* Set up the register classes. + * + * The base registers store a vec4. However, we'll need larger + * storage for arrays, structures, and matrices, which will be sets + * of contiguous registers. + */ + class_sizes[class_count++] = 1; + + for (int r = 0; r < virtual_grf_count; r++) { + int i; + + for (i = 0; i < class_count; i++) { + if (class_sizes[i] == this->virtual_grf_sizes[r]) + break; + } + if (i == class_count) { + if (this->virtual_grf_sizes[r] >= base_reg_count) { + fail("Object too large to register allocate.\n"); + } + + class_sizes[class_count++] = this->virtual_grf_sizes[r]; + } + } + + brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count); + + struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs, + virtual_grf_count); + + for (int i = 0; i < virtual_grf_count; i++) { + for (int c = 0; c < class_count; c++) { + if (class_sizes[c] == this->virtual_grf_sizes[i]) { + ra_set_node_class(g, i, brw->vs.classes[c]); + break; + } + } + + for (int j = 0; j < i; j++) { + if (virtual_grf_interferes(i, j)) { + ra_add_node_interference(g, i, j); + } + } + } + + if (!ra_allocate_no_spills(g)) { + ralloc_free(g); + fail("No register spilling support yet\n"); + } + + /* Get the chosen virtual registers for each node, and map virtual + * regs in the register classes back down to real hardware reg + * numbers. + */ + prog_data->total_grf = first_assigned_grf; + for (int i = 0; i < virtual_grf_count; i++) { + int reg = ra_get_node_reg(g, i); + + hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg]; + prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + 1); + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + assign(hw_reg_mapping, &inst->dst); + assign(hw_reg_mapping, &inst->src[0]); + assign(hw_reg_mapping, &inst->src[1]); + assign(hw_reg_mapping, &inst->src[2]); + } + + ralloc_free(g); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp new file mode 100644 index 00000000000..b3a07bd0539 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -0,0 +1,2156 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_vec4.h" +extern "C" { +#include "main/macros.h" +#include "program/prog_parameter.h" +} + +namespace brw { + +src_reg::src_reg(dst_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; + + int swizzles[4]; + int next_chan = 0; + int last = 0; + + for (int i = 0; i < 4; i++) { + if (!(reg.writemask & (1 << i))) + continue; + + swizzles[next_chan++] = last = i; + } + + for (; next_chan < 4; next_chan++) { + swizzles[next_chan] = last; + } + + this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); +} + +dst_reg::dst_reg(src_reg reg) +{ + init(); + + this->file = reg.file; + this->reg = reg.reg; + this->reg_offset = reg.reg_offset; + this->type = reg.type; + this->writemask = WRITEMASK_XYZW; + this->reladdr = reg.reladdr; + this->fixed_hw_reg = reg.fixed_hw_reg; +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, + src_reg src0, src_reg src1, src_reg src2) +{ + vec4_instruction *inst = new(mem_ctx) vec4_instruction(); + + inst->opcode = opcode; + inst->dst = dst; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + inst->ir = this->base_ir; + inst->annotation = this->current_annotation; + + this->instructions.push_tail(inst); + + return inst; +} + + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1) +{ + return emit(opcode, dst, src0, src1, src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0) +{ + assert(dst.writemask != 0); + return emit(opcode, dst, src0, src_reg(), src_reg()); +} + +vec4_instruction * +vec4_visitor::emit(enum opcode opcode) +{ + return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg()); +} + +void +vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements) +{ + static enum opcode dot_opcodes[] = { + BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4 + }; + + emit(dot_opcodes[elements - 2], dst, src0, src1); +} + +void +vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src) +{ + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. + */ + src_reg temp_src = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(temp_src), src); + + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, temp_src); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, temp_src); + } +} + +void +vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src) +{ + vec4_instruction *inst = emit(opcode, dst, src); + inst->base_mrf = 1; + inst->mlen = 1; +} + +void +vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src) +{ + switch (opcode) { + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: + break; + default: + assert(!"not reached: bad math opcode"); + return; + } + + if (intel->gen >= 6) { + return emit_math1_gen6(opcode, dst, src); + } else { + return emit_math1_gen4(opcode, dst, src); + } +} + +void +vec4_visitor::emit_math2_gen6(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + src_reg expanded; + + /* The gen6 math instruction ignores the source modifiers -- + * swizzle, abs, negate, and at least some parts of the register + * region description. Move the sources to temporaries to make it + * generally work. + */ + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src0); + src0 = expanded; + + expanded = src_reg(this, glsl_type::vec4_type); + emit(BRW_OPCODE_MOV, dst_reg(expanded), src1); + src1 = expanded; + + if (dst.writemask != WRITEMASK_XYZW) { + /* The gen6 math instruction must be align1, so we can't do + * writemasks. + */ + dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type); + + emit(opcode, temp_dst, src0, src1); + + emit(BRW_OPCODE_MOV, dst, src_reg(temp_dst)); + } else { + emit(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::emit_math2_gen4(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + vec4_instruction *inst = emit(opcode, dst, src0, src1); + inst->base_mrf = 1; + inst->mlen = 2; +} + +void +vec4_visitor::emit_math(enum opcode opcode, + dst_reg dst, src_reg src0, src_reg src1) +{ + assert(opcode == SHADER_OPCODE_POW); + + if (intel->gen >= 6) { + return emit_math2_gen6(opcode, dst, src0, src1); + } else { + return emit_math2_gen4(opcode, dst, src0, src1); + } +} + +void +vec4_visitor::visit_instructions(const exec_list *list) +{ + foreach_list(node, list) { + ir_instruction *ir = (ir_instruction *)node; + + base_ir = ir; + ir->accept(this); + } +} + + +static int +type_size(const struct glsl_type *type) +{ + unsigned int i; + int size; + + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (type->is_matrix()) { + return type->matrix_columns; + } else { + /* Regardless of size of vector, it gets a vec4. This is bad + * packing for things like floats, but otherwise arrays become a + * mess. Hopefully a later pass over the code can pack scalars + * down if appropriate. + */ + return 1; + } + case GLSL_TYPE_ARRAY: + assert(type->length > 0); + return type_size(type->fields.array) * type->length; + case GLSL_TYPE_STRUCT: + size = 0; + for (i = 0; i < type->length; i++) { + size += type_size(type->fields.structure[i].type); + } + return size; + case GLSL_TYPE_SAMPLER: + /* Samplers take up one slot in UNIFORMS[], but they're baked in + * at link time. + */ + return 1; + default: + assert(0); + return 0; + } +} + +int +vec4_visitor::virtual_grf_alloc(int size) +{ + if (virtual_grf_array_size <= virtual_grf_count) { + if (virtual_grf_array_size == 0) + virtual_grf_array_size = 16; + else + virtual_grf_array_size *= 2; + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); + } + virtual_grf_sizes[virtual_grf_count] = size; + return virtual_grf_count++; +} + +src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->swizzle = BRW_SWIZZLE_NOOP; + } else { + this->swizzle = swizzle_for_size(type->vector_elements); + } + + this->type = brw_type_for_base_type(type); +} + +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); + + this->file = GRF; + this->reg = v->virtual_grf_alloc(type_size(type)); + + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } + + this->type = brw_type_for_base_type(type); +} + +/* Our support for uniforms is piggy-backed on the struct + * gl_fragment_program, because that's where the values actually + * get stored, rather than in some global gl_shader_program uniform + * store. + */ +int +vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) +{ + unsigned int offset = 0; + float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f; + + if (type->is_matrix()) { + const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, + 1); + + for (unsigned int i = 0; i < type->matrix_columns; i++) { + offset += setup_uniform_values(loc + offset, column); + } + + return offset; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (unsigned int i = 0; i < type->vector_elements; i++) { + int slot = this->uniforms * 4 + i; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[slot] = PARAM_CONVERT_F2B; + break; + default: + assert(!"not reached"); + c->prog_data.param_convert[slot] = PARAM_NO_CONVERT; + break; + } + c->prog_data.param[slot] = &values[i]; + } + + for (unsigned int i = type->vector_elements; i < 4; i++) { + c->prog_data.param_convert[this->uniforms * 4 + i] = + PARAM_CONVERT_ZERO; + c->prog_data.param[this->uniforms * 4 + i] = NULL; + } + + this->uniform_size[this->uniforms] = type->vector_elements; + this->uniforms++; + + return 1; + + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, + type->fields.structure[i].type); + } + return offset; + + case GLSL_TYPE_ARRAY: + for (unsigned int i = 0; i < type->length; i++) { + offset += setup_uniform_values(loc + offset, type->fields.array); + } + return offset; + + case GLSL_TYPE_SAMPLER: + /* The sampler takes up a slot, but we don't use any values from it. */ + return 1; + + default: + assert(!"not reached"); + return 0; + } +} + +/* Our support for builtin uniforms is even scarier than non-builtin. + * It sits on top of the PROG_STATE_VAR parameters that are + * automatically updated from GL context state. + */ +void +vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) +{ + const ir_state_slot *const slots = ir->state_slots; + assert(ir->state_slots != NULL); + + for (unsigned int i = 0; i < ir->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, + * but we'll get the same index back here. We can reference + * ParameterValues directly, since unlike brw_fs.cpp, we never + * add new state references during compile. + */ + int index = _mesa_add_state_reference(this->vp->Base.Parameters, + (gl_state_index *)slots[i].tokens); + float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f; + + this->uniform_size[this->uniforms] = 0; + /* Add each of the unique swizzled channels of the element. + * This will end up matching the size of the glsl_type of this field. + */ + int last_swiz = -1; + for (unsigned int j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + last_swiz = swiz; + + c->prog_data.param[this->uniforms * 4 + j] = &values[swiz]; + c->prog_data.param_convert[this->uniforms * 4 + j] = PARAM_NO_CONVERT; + if (swiz <= last_swiz) + this->uniform_size[this->uniforms]++; + } + this->uniforms++; + } +} + +dst_reg * +vec4_visitor::variable_storage(ir_variable *var) +{ + return (dst_reg *)hash_table_find(this->variable_ht, var); +} + +void +vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + assert(expr->operands[i]->type->is_scalar()); + + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + break; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_or: + inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_logic_and: + inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_f2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_unop_i2b: + if (intel->gen >= 6) { + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + } else { + inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]); + } + inst->conditional_mod = BRW_CONDITIONAL_NZ; + break; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_all_equal: + case ir_binop_nequal: + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + break; + + default: + assert(!"not reached"); + break; + } + return; + } + + ir->accept(this); + + if (intel->gen >= 6) { + vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(), + this->result, src_reg(1)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } else { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + } +} + +/** + * Emit a gen6 IF statement with the comparison folded into the IF + * instruction. + */ +void +vec4_visitor::emit_if_gen6(ir_if *ir) +{ + ir_expression *expr = ir->condition->as_expression(); + + if (expr) { + src_reg op[2]; + vec4_instruction *inst; + dst_reg temp; + + assert(expr->get_num_operands() <= 2); + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + expr->operands[i]->accept(this); + op[i] = this->result; + } + + switch (expr->operation) { + case ir_unop_logic_not: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_Z; + return; + + case ir_binop_logic_xor: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_or: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_OR, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_logic_and: + temp = dst_reg(this, glsl_type::bool_type); + emit(BRW_OPCODE_AND, temp, op[0], op[1]); + inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_f2b: + inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_unop_i2b: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_equal: + case ir_binop_nequal: + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); + return; + + case ir_binop_all_equal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + return; + + case ir_binop_any_nequal: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + return; + + default: + assert(!"not reached"); + inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + return; + } + return; + } + + ir->condition->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(), + this->result, src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; +} + +void +vec4_visitor::visit(ir_variable *ir) +{ + dst_reg *reg = NULL; + + if (variable_storage(ir)) + return; + + switch (ir->mode) { + case ir_var_in: + reg = new(mem_ctx) dst_reg(ATTR, ir->location); + break; + + case ir_var_out: + reg = new(mem_ctx) dst_reg(this, ir->type); + + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F; + } + break; + + case ir_var_auto: + case ir_var_temporary: + reg = new(mem_ctx) dst_reg(this, ir->type); + break; + + case ir_var_uniform: + reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); + + if (!strncmp(ir->name, "gl_", 3)) { + setup_builtin_uniform_values(ir); + } else { + setup_uniform_values(ir->location, ir->type); + } + break; + + default: + assert(!"not reached"); + } + + reg->type = brw_type_for_base_type(ir->type); + hash_table_insert(this->variable_ht, reg, ir); +} + +void +vec4_visitor::visit(ir_loop *ir) +{ + dst_reg counter; + + /* We don't want debugging output to print the whole body of the + * loop as the annotation. + */ + this->base_ir = NULL; + + if (ir->counter != NULL) { + this->base_ir = ir->counter; + ir->counter->accept(this); + counter = *(variable_storage(ir->counter)); + + if (ir->from != NULL) { + this->base_ir = ir->from; + ir->from->accept(this); + + emit(BRW_OPCODE_MOV, counter, this->result); + } + } + + emit(BRW_OPCODE_DO); + + if (ir->to) { + this->base_ir = ir->to; + ir->to->accept(this); + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst_null_d(), + src_reg(counter), this->result); + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); + + inst = emit(BRW_OPCODE_BREAK); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + visit_instructions(&ir->body_instructions); + + + if (ir->increment) { + this->base_ir = ir->increment; + ir->increment->accept(this); + emit(BRW_OPCODE_ADD, counter, src_reg(counter), this->result); + } + + emit(BRW_OPCODE_WHILE); +} + +void +vec4_visitor::visit(ir_loop_jump *ir) +{ + switch (ir->mode) { + case ir_loop_jump::jump_break: + emit(BRW_OPCODE_BREAK); + break; + case ir_loop_jump::jump_continue: + emit(BRW_OPCODE_CONTINUE); + break; + } +} + + +void +vec4_visitor::visit(ir_function_signature *ir) +{ + assert(0); + (void)ir; +} + +void +vec4_visitor::visit(ir_function *ir) +{ + /* Ignore function bodies other than main() -- we shouldn't see calls to + * them since they should all be inlined. + */ + if (strcmp(ir->name, "main") == 0) { + const ir_function_signature *sig; + exec_list empty; + + sig = ir->matching_signature(&empty); + + assert(sig); + + visit_instructions(&sig->body); + } +} + +GLboolean +vec4_visitor::try_emit_sat(ir_expression *ir) +{ + ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); + if (!sat_src) + return false; + + sat_src->accept(this); + src_reg src = this->result; + + this->result = src_reg(this, ir->type); + vec4_instruction *inst; + inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src); + inst->saturate = true; + + return true; +} + +void +vec4_visitor::emit_bool_comparison(unsigned int op, + dst_reg dst, src_reg src0, src_reg src1) +{ + /* original gen4 does destination conversion before comparison. */ + if (intel->gen < 5) + dst.type = src0.type; + + vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1); + inst->conditional_mod = brw_conditional_for_comparison(op); + + dst.type = BRW_REGISTER_TYPE_D; + emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1)); +} + +void +vec4_visitor::visit(ir_expression *ir) +{ + unsigned int operand; + src_reg op[Elements(ir->operands)]; + src_reg result_src; + dst_reg result_dst; + vec4_instruction *inst; + + if (try_emit_sat(ir)) + return; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + this->result.file = BAD_FILE; + ir->operands[operand]->accept(this); + if (this->result.file == BAD_FILE) { + printf("Failed to get tree for expression operand:\n"); + ir->operands[operand]->print(); + exit(1); + } + op[operand] = this->result; + + /* Matrix expression operands should have been broken down to vector + * operations already. + */ + assert(!ir->operands[operand]->type->is_matrix()); + } + + int vector_elements = ir->operands[0]->type->vector_elements; + if (ir->operands[1]) { + vector_elements = MAX2(vector_elements, + ir->operands[1]->type->vector_elements); + } + + this->result.file = BAD_FILE; + + /* Storage for our result. Ideally for an assignment we'd be using + * the actual storage for the result here, instead. + */ + result_src = src_reg(this, ir->type); + /* convenience for the emit functions below. */ + result_dst = dst_reg(result_src); + /* If nothing special happens, this is the result. */ + this->result = result_src; + /* Limit writes to the channels that will be used by result_src later. + * This does limit this temp's use as a temporary for multi-instruction + * sequences. + */ + result_dst.writemask = (1 << ir->type->vector_elements) - 1; + + switch (ir->operation) { + case ir_unop_logic_not: + /* Note that BRW_OPCODE_NOT is not appropriate here, since it is + * ones complement of the whole register, not just bit 0. + */ + emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1)); + break; + case ir_unop_neg: + op[0].negate = !op[0].negate; + this->result = op[0]; + break; + case ir_unop_abs: + op[0].abs = true; + op[0].negate = false; + this->result = op[0]; + break; + + case ir_unop_sign: + emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f)); + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_G; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f)); + inst->predicate = BRW_PREDICATE_NORMAL; + + break; + + case ir_unop_rcp: + emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); + break; + + case ir_unop_exp2: + emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); + break; + case ir_unop_log2: + emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); + break; + case ir_unop_exp: + case ir_unop_log: + assert(!"not reached: should be handled by ir_explog_to_explog2"); + break; + case ir_unop_sin: + case ir_unop_sin_reduced: + emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); + break; + case ir_unop_cos: + case ir_unop_cos_reduced: + emit_math(SHADER_OPCODE_COS, result_dst, op[0]); + break; + + case ir_unop_dFdx: + case ir_unop_dFdy: + assert(!"derivatives not valid in vertex shader"); + break; + + case ir_unop_noise: + assert(!"not reached: should be handled by lower_noise"); + break; + + case ir_binop_add: + emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]); + break; + case ir_binop_sub: + assert(!"not reached: should be handled by ir_sub_to_add_neg"); + break; + + case ir_binop_mul: + if (ir->type->is_integer()) { + /* For integer multiplication, the MUL uses the low 16 bits + * of one of the operands (src0 on gen6, src1 on gen7). The + * MACH accumulates in the contribution of the upper 16 bits + * of that operand. + * + * FINISHME: Emit just the MUL if we know an operand is small + * enough. + */ + struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); + + emit(BRW_OPCODE_MUL, acc, op[0], op[1]); + emit(BRW_OPCODE_MACH, dst_null_d(), op[0], op[1]); + emit(BRW_OPCODE_MOV, result_dst, src_reg(acc)); + } else { + emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]); + } + break; + case ir_binop_div: + assert(!"not reached: should be handled by ir_div_to_mul_rcp"); + case ir_binop_mod: + assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); + emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1)); + break; + } + + case ir_binop_all_equal: + /* "==" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + case ir_binop_any_nequal: + /* "!=" operator producing a scalar boolean. */ + if (ir->operands[0]->type->is_vector() || + ir->operands[1]->type->is_vector()) { + inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + } else { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1)); + } + break; + + case ir_unop_any: + inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + + emit(BRW_OPCODE_MOV, result_dst, src_reg(0)); + + inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1)); + inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; + break; + + case ir_binop_logic_xor: + emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_or: + emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_logic_and: + emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + + case ir_binop_dot: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); + break; + + case ir_unop_sqrt: + emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); + break; + case ir_unop_rsq: + emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); + break; + case ir_unop_i2f: + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_u2f: + case ir_unop_b2f: + case ir_unop_b2i: + case ir_unop_f2i: + emit(BRW_OPCODE_MOV, result_dst, op[0]); + break; + case ir_unop_f2b: + case ir_unop_i2b: { + dst_reg temp = result_dst; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1)); + break; + } + + case ir_unop_trunc: + emit(BRW_OPCODE_RNDZ, result_dst, op[0]); + break; + case ir_unop_ceil: + op[0].negate = !op[0].negate; + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + this->result.negate = true; + break; + case ir_unop_floor: + inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]); + break; + case ir_unop_fract: + inst = emit(BRW_OPCODE_FRC, result_dst, op[0]); + break; + case ir_unop_round_even: + emit(BRW_OPCODE_RNDE, result_dst, op[0]); + break; + + case ir_binop_min: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_binop_max: + inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]); + inst->conditional_mod = BRW_CONDITIONAL_G; + + inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + + case ir_binop_pow: + emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); + break; + + case ir_unop_bit_not: + inst = emit(BRW_OPCODE_NOT, result_dst, op[0]); + break; + case ir_binop_bit_and: + inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]); + break; + case ir_binop_bit_xor: + inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]); + break; + case ir_binop_bit_or: + inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(!"GLSL 1.30 features unsupported"); + break; + + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + } +} + + +void +vec4_visitor::visit(ir_swizzle *ir) +{ + src_reg src; + int i = 0; + int swizzle[4]; + + /* Note that this is only swizzles in expressions, not those on the left + * hand side of an assignment, which do write masking. See ir_assignment + * for that. + */ + + ir->val->accept(this); + src = this->result; + assert(src.file != BAD_FILE); + + for (i = 0; i < ir->type->vector_elements; i++) { + switch (i) { + case 0: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x); + break; + case 1: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y); + break; + case 2: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z); + break; + case 3: + swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w); + break; + } + } + for (; i < 4; i++) { + /* Replicate the last channel out. */ + swizzle[i] = swizzle[ir->type->vector_elements - 1]; + } + + src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_variable *ir) +{ + const struct glsl_type *type = ir->type; + dst_reg *reg = variable_storage(ir->var); + + if (!reg) { + fail("Failed to find variable storage for %s\n", ir->var->name); + this->result = src_reg(brw_null_reg()); + return; + } + + this->result = src_reg(*reg); + + if (type->is_scalar() || type->is_vector() || type->is_matrix()) + this->result.swizzle = swizzle_for_size(type->vector_elements); +} + +void +vec4_visitor::visit(ir_dereference_array *ir) +{ + ir_constant *constant_index; + src_reg src; + int element_size = type_size(ir->type); + + constant_index = ir->array_index->constant_expression_value(); + + ir->array->accept(this); + src = this->result; + + if (constant_index) { + src.reg_offset += constant_index->value.i[0] * element_size; + } else { + /* Variable index array dereference. It eats the "vec4" of the + * base of the array and an index that offsets the Mesa register + * index. + */ + ir->array_index->accept(this); + + src_reg index_reg; + + if (element_size == 1) { + index_reg = this->result; + } else { + index_reg = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_MUL, dst_reg(index_reg), + this->result, src_reg(element_size)); + } + + if (src.reladdr) { + src_reg temp = src_reg(this, glsl_type::int_type); + + emit(BRW_OPCODE_ADD, dst_reg(temp), *src.reladdr, index_reg); + + index_reg = temp; + } + + src.reladdr = ralloc(mem_ctx, src_reg); + memcpy(src.reladdr, &index_reg, sizeof(index_reg)); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + src.swizzle = swizzle_for_size(ir->type->vector_elements); + else + src.swizzle = BRW_SWIZZLE_NOOP; + src.type = brw_type_for_base_type(ir->type); + + this->result = src; +} + +void +vec4_visitor::visit(ir_dereference_record *ir) +{ + unsigned int i; + const glsl_type *struct_type = ir->record->type; + int offset = 0; + + ir->record->accept(this); + + for (i = 0; i < struct_type->length; i++) { + if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) + break; + offset += type_size(struct_type->fields.structure[i].type); + } + + /* If the type is smaller than a vec4, replicate the last channel out. */ + if (ir->type->is_scalar() || ir->type->is_vector()) + this->result.swizzle = swizzle_for_size(ir->type->vector_elements); + else + this->result.swizzle = BRW_SWIZZLE_NOOP; + this->result.type = brw_type_for_base_type(ir->type); + + this->result.reg_offset += offset; +} + +/** + * We want to be careful in assignment setup to hit the actual storage + * instead of potentially using a temporary like we might with the + * ir_dereference handler. + */ +static dst_reg +get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) +{ + /* The LHS must be a dereference. If the LHS is a variable indexed array + * access of a vector, it must be separated into a series conditional moves + * before reaching this point (see ir_vec_index_to_cond_assign). + */ + assert(ir->as_dereference()); + ir_dereference_array *deref_array = ir->as_dereference_array(); + if (deref_array) { + assert(!deref_array->array->type->is_vector()); + } + + /* Use the rvalue deref handler for the most part. We'll ignore + * swizzles in it and write swizzles using writemask, though. + */ + ir->accept(v); + return dst_reg(v->result); +} + +void +vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, + const struct glsl_type *type, bool predicated) +{ + if (type->base_type == GLSL_TYPE_STRUCT) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.structure[i].type, predicated); + } + return; + } + + if (type->is_array()) { + for (unsigned int i = 0; i < type->length; i++) { + emit_block_move(dst, src, type->fields.array, predicated); + } + return; + } + + if (type->is_matrix()) { + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + type->vector_elements, 1); + + for (int i = 0; i < type->matrix_columns; i++) { + emit_block_move(dst, src, vec_type, predicated); + } + return; + } + + assert(type->is_scalar() || type->is_vector()); + + dst->type = brw_type_for_base_type(type); + src->type = dst->type; + + dst->writemask = (1 << type->vector_elements) - 1; + + /* Do we need to worry about swizzling a swizzle? */ + assert(src->swizzle = BRW_SWIZZLE_NOOP); + src->swizzle = swizzle_for_size(type->vector_elements); + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, *dst, *src); + if (predicated) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst->reg_offset++; + src->reg_offset++; +} + + +/* If the RHS processing resulted in an instruction generating a + * temporary value, and it would be easy to rewrite the instruction to + * generate its result right into the LHS instead, do so. This ends + * up reliably removing instructions where it can be tricky to do so + * later without real UD chain information. + */ +bool +vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, + dst_reg dst, + src_reg src, + vec4_instruction *pre_rhs_inst, + vec4_instruction *last_rhs_inst) +{ + /* This could be supported, but it would take more smarts. */ + if (ir->condition) + return false; + + if (pre_rhs_inst == last_rhs_inst) + return false; /* No instructions generated to work with. */ + + /* Make sure the last instruction generated our source reg. */ + if (src.file != GRF || + src.file != last_rhs_inst->dst.file || + src.reg != last_rhs_inst->dst.reg || + src.reg_offset != last_rhs_inst->dst.reg_offset || + src.reladdr || + src.abs || + src.negate || + last_rhs_inst->predicate != BRW_PREDICATE_NONE) + return false; + + /* Check that that last instruction fully initialized the channels + * we want to use, in the order we want to use them. We could + * potentially reswizzle the operands of many instructions so that + * we could handle out of order channels, but don't yet. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + if (!(last_rhs_inst->dst.writemask & (1 << i))) + return false; + + if (BRW_GET_SWZ(src.swizzle, i) != i) + return false; + } + } + + /* Success! Rewrite the instruction. */ + last_rhs_inst->dst.file = dst.file; + last_rhs_inst->dst.reg = dst.reg; + last_rhs_inst->dst.reg_offset = dst.reg_offset; + last_rhs_inst->dst.reladdr = dst.reladdr; + last_rhs_inst->dst.writemask &= dst.writemask; + + return true; +} + +void +vec4_visitor::visit(ir_assignment *ir) +{ + dst_reg dst = get_assignment_lhs(ir->lhs, this); + + if (!ir->lhs->type->is_scalar() && + !ir->lhs->type->is_vector()) { + ir->rhs->accept(this); + src_reg src = this->result; + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + emit_block_move(&dst, &src, ir->rhs->type, ir->condition != NULL); + return; + } + + /* Now we're down to just a scalar/vector with writemasks. */ + int i; + + vec4_instruction *pre_rhs_inst, *last_rhs_inst; + pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + + ir->rhs->accept(this); + + last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + + src_reg src = this->result; + + int swizzles[4]; + int first_enabled_chan = 0; + int src_chan = 0; + + assert(ir->lhs->type->is_vector() || + ir->lhs->type->is_scalar()); + dst.writemask = ir->write_mask; + + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) { + first_enabled_chan = BRW_GET_SWZ(src.swizzle, i); + break; + } + } + + /* Swizzle a small RHS vector into the channels being written. + * + * glsl ir treats write_mask as dictating how many channels are + * present on the RHS while in our instructions we need to make + * those channels appear in the slots of the vec4 they're written to. + */ + for (int i = 0; i < 4; i++) { + if (dst.writemask & (1 << i)) + swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++); + else + swizzles[i] = first_enabled_chan; + } + src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1], + swizzles[2], swizzles[3]); + + if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { + return; + } + + if (ir->condition) { + emit_bool_to_cond_code(ir->condition); + } + + for (i = 0; i < type_size(ir->lhs->type); i++) { + vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src); + + if (ir->condition) + inst->predicate = BRW_PREDICATE_NORMAL; + + dst.reg_offset++; + src.reg_offset++; + } +} + +void +vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) +{ + if (ir->type->base_type == GLSL_TYPE_STRUCT) { + foreach_list(node, &ir->components) { + ir_constant *field_value = (ir_constant *)node; + + emit_constant_values(dst, field_value); + } + return; + } + + if (ir->type->is_array()) { + for (unsigned int i = 0; i < ir->type->length; i++) { + emit_constant_values(dst, ir->array_elements[i]); + } + return; + } + + if (ir->type->is_matrix()) { + for (int i = 0; i < ir->type->matrix_columns; i++) { + for (int j = 0; j < ir->type->vector_elements; j++) { + dst->writemask = 1 << j; + dst->type = BRW_REGISTER_TYPE_F; + + emit(BRW_OPCODE_MOV, *dst, + src_reg(ir->value.f[i * ir->type->vector_elements + j])); + } + dst->reg_offset++; + } + return; + } + + for (int i = 0; i < ir->type->vector_elements; i++) { + dst->writemask = 1 << i; + dst->type = brw_type_for_base_type(ir->type); + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.f[i])); + break; + case GLSL_TYPE_INT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.i[i])); + break; + case GLSL_TYPE_UINT: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.u[i])); + break; + case GLSL_TYPE_BOOL: + emit(BRW_OPCODE_MOV, *dst, src_reg(ir->value.b[i])); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + break; + } + } + dst->reg_offset++; +} + +void +vec4_visitor::visit(ir_constant *ir) +{ + dst_reg dst = dst_reg(this, ir->type); + this->result = src_reg(dst); + + emit_constant_values(&dst, ir); +} + +void +vec4_visitor::visit(ir_call *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_texture *ir) +{ + /* FINISHME: Implement vertex texturing. + * + * With 0 vertex samplers available, the linker will reject + * programs that do vertex texturing, but after our visitor has + * run. + */ +} + +void +vec4_visitor::visit(ir_return *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_discard *ir) +{ + assert(!"not reached"); +} + +void +vec4_visitor::visit(ir_if *ir) +{ + /* Don't point the annotation at the if statement, because then it plus + * the then and else blocks get printed. + */ + this->base_ir = ir->condition; + + if (intel->gen == 6) { + emit_if_gen6(ir); + } else { + emit_bool_to_cond_code(ir->condition); + vec4_instruction *inst = emit(BRW_OPCODE_IF); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + visit_instructions(&ir->then_instructions); + + if (!ir->else_instructions.is_empty()) { + this->base_ir = ir->condition; + emit(BRW_OPCODE_ELSE); + + visit_instructions(&ir->else_instructions); + } + + this->base_ir = ir->condition; + emit(BRW_OPCODE_ENDIF); +} + +int +vec4_visitor::emit_vue_header_gen4(int header_mrf) +{ + /* Get the position */ + src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]); + + /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ + dst_reg ndc = dst_reg(this, glsl_type::vec4_type); + + current_annotation = "NDC"; + dst_reg ndc_w = ndc; + ndc_w.writemask = WRITEMASK_W; + src_reg pos_w = pos; + pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W); + emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w); + + dst_reg ndc_xyz = ndc; + ndc_xyz.writemask = WRITEMASK_XYZ; + + emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w)); + + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { + dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); + GLuint i; + + emit(BRW_OPCODE_MOV, header1, 0u); + + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + assert(!"finishme: psiz"); + src_reg psiz; + + header1.writemask = WRITEMASK_W; + emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11); + emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + vec4_instruction *inst; + + inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()), + pos, src_reg(c->userplane[i])); + inst->conditional_mod = BRW_CONDITIONAL_L; + + emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i); + inst->predicate = BRW_PREDICATE_NORMAL; + } + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (brw->has_negative_rhw_bug) { +#if 0 + /* FINISHME */ + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif + } + + header1.writemask = WRITEMASK_XYZW; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1)); + } else { + emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++), + BRW_REGISTER_TYPE_UD), 0u); + } + + if (intel->gen == 5) { + /* There are 20 DWs (D0-D19) in VUE header on Ironlake: + * dword 0-3 (m1) of the header is indices, point width, clip flags. + * dword 4-7 (m2) is the ndc position (set above) + * dword 8-11 (m3) of the vertex header is the 4D space position + * dword 12-19 (m4,m5) of the vertex header is the user clip distance. + * m6 is a pad so that the vertex element data is aligned + * m7 is the first vertex data we fill. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + + /* user clip distance. */ + header_mrf += 2; + + /* Pad so that vertex element data is aligned. */ + header_mrf++; + } else { + /* There are 8 dwords in VUE header pre-Ironlake: + * dword 0-3 (m1) is indices, point width, clip flags. + * dword 4-7 (m2) is ndc position (set above) + * + * dword 8-11 (m3) is the first vertex data. + */ + current_annotation = "NDC"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc)); + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos); + } + + return header_mrf; +} + +int +vec4_visitor::emit_vue_header_gen6(int header_mrf) +{ + struct brw_reg reg; + + /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge: + * dword 0-3 (m2) of the header is indices, point width, clip flags. + * dword 4-7 (m3) is the 4D space position + * dword 8-15 (m4,m5) of the vertex header is the user clip distance if + * enabled. + * + * m4 or 6 is the first vertex element data we fill. + */ + + current_annotation = "indices, point width, clip flags"; + reg = brw_message_reg(header_mrf++); + emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W), + src_reg(output_reg[VERT_RESULT_PSIZ])); + } + + current_annotation = "gl_Position"; + emit(BRW_OPCODE_MOV, + brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS])); + + current_annotation = "user clip distances"; + if (c->key.nr_userclip) { + for (int i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(header_mrf); + else + m = brw_message_reg(header_mrf + 1); + + emit(BRW_OPCODE_DP4, + dst_reg(brw_writemask(m, 1 << (i & 3))), + src_reg(c->userplane[i])); + } + header_mrf += 2; + } + + current_annotation = NULL; + + return header_mrf; +} + +static int +align_interleaved_urb_mlen(struct brw_context *brw, int mlen) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 6) { + /* URB data written (does not include the message header reg) must + * be a multiple of 256 bits, or 2 VS registers. See vol5c.5, + * section 5.4.3.2.2: URB_INTERLEAVED. + * + * URB entries are allocated on a multiple of 1024 bits, so an + * extra 128 bits written here to make the end align to 256 is + * no problem. + */ + if ((mlen % 2) != 1) + mlen++; + } + + return mlen; +} + +/** + * Generates the VUE payload plus the 1 or 2 URB write instructions to + * complete the VS thread. + * + * The VUE layout is documented in Volume 2a. + */ +void +vec4_visitor::emit_urb_writes() +{ + /* MRF 0 is reserved for the debugger, so start with message header + * in MRF 1. + */ + int base_mrf = 1; + int mrf = base_mrf; + int urb_entry_size; + uint64_t outputs_remaining = c->prog_data.outputs_written; + /* In the process of generating our URB write message contents, we + * may need to unspill a register or load from an array. Those + * reads would use MRFs 14-15. + */ + int max_usable_mrf = 13; + + /* FINISHME: edgeflag */ + + /* First mrf is the g0-based message header containing URB handles and such, + * which is implied in VS_OPCODE_URB_WRITE. + */ + mrf++; + + if (intel->gen >= 6) { + mrf = emit_vue_header_gen6(mrf); + } else { + mrf = emit_vue_header_gen4(mrf); + } + + /* Set up the VUE data for the first URB write */ + int attr; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + outputs_remaining &= ~BITFIELD64_BIT(attr); + + /* This is set up in the VUE header. */ + if (attr == VERT_RESULT_HPOS) + continue; + + /* This is loaded into the VUE header, and thus doesn't occupy + * an attribute slot. + */ + if (attr == VERT_RESULT_PSIZ) + continue; + + vec4_instruction *inst = emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), + src_reg(output_reg[attr])); + + if ((attr == VERT_RESULT_COL0 || + attr == VERT_RESULT_COL1 || + attr == VERT_RESULT_BFC0 || + attr == VERT_RESULT_BFC1) && + c->key.clamp_vertex_color) { + inst->saturate = true; + } + + /* If this was MRF 15, we can't fit anything more into this URB + * WRITE. Note that base_mrf of 1 means that MRF 15 is an + * even-numbered amount of URB write data, which will meet + * gen6's requirements for length alignment. + */ + if (mrf > max_usable_mrf) { + attr++; + break; + } + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = !outputs_remaining; + + urb_entry_size = mrf - base_mrf; + + /* Optional second URB write */ + if (outputs_remaining) { + mrf = base_mrf + 1; + + for (; attr < VERT_RESULT_MAX; attr++) { + if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr))) + continue; + + assert(mrf < max_usable_mrf); + + emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr])); + } + + inst = emit(VS_OPCODE_URB_WRITE); + inst->base_mrf = base_mrf; + inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->eot = true; + /* URB destination offset. In the previous write, we got MRFs + * 2-13 minus the one header MRF, so 12 regs. URB offset is in + * URB row increments, and each of our MRFs is half of one of + * those, since we're doing interleaved writes. + */ + inst->offset = (max_usable_mrf - base_mrf) / 2; + + urb_entry_size += mrf - base_mrf; + } + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4; +} + +src_reg +vec4_visitor::get_scratch_offset(vec4_instruction *inst, + src_reg *reladdr, int reg_offset) +{ + /* Because we store the values to scratch interleaved like our + * vertex data, we need to scale the vec4 index by 2. + */ + int message_header_scale = 2; + + /* Pre-gen6, the message header uses byte offsets instead of vec4 + * (16-byte) offset units. + */ + if (intel->gen < 6) + message_header_scale *= 16; + + if (reladdr) { + src_reg index = src_reg(this, glsl_type::int_type); + + vec4_instruction *add = emit(BRW_OPCODE_ADD, + dst_reg(index), + *reladdr, + src_reg(reg_offset)); + /* Move our new instruction from the tail to its correct place. */ + add->remove(); + inst->insert_before(add); + + vec4_instruction *mul = emit(BRW_OPCODE_MUL, dst_reg(index), + index, src_reg(message_header_scale)); + mul->remove(); + inst->insert_before(mul); + + return index; + } else { + return src_reg(reg_offset * message_header_scale); + } +} + +/** + * Emits an instruction before @inst to load the value named by @orig_src + * from scratch space at @base_offset to @temp. + */ +void +vec4_visitor::emit_scratch_read(vec4_instruction *inst, + dst_reg temp, src_reg orig_src, + int base_offset) +{ + int reg_offset = base_offset + orig_src.reg_offset; + src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset); + + vec4_instruction *scratch_read_inst = emit(VS_OPCODE_SCRATCH_READ, + temp, index); + + scratch_read_inst->base_mrf = 14; + scratch_read_inst->mlen = 1; + /* Move our instruction from the tail to its correct place. */ + scratch_read_inst->remove(); + inst->insert_before(scratch_read_inst); +} + +/** + * Emits an instruction after @inst to store the value to be written + * to @orig_dst to scratch space at @base_offset, from @temp. + */ +void +vec4_visitor::emit_scratch_write(vec4_instruction *inst, + src_reg temp, dst_reg orig_dst, + int base_offset) +{ + int reg_offset = base_offset + orig_dst.reg_offset; + src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset); + + dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), + orig_dst.writemask)); + vec4_instruction *scratch_write_inst = emit(VS_OPCODE_SCRATCH_WRITE, + dst, temp, index); + scratch_write_inst->base_mrf = 13; + scratch_write_inst->mlen = 2; + scratch_write_inst->predicate = inst->predicate; + /* Move our instruction from the tail to its correct place. */ + scratch_write_inst->remove(); + inst->insert_after(scratch_write_inst); +} + +/** + * We can't generally support array access in GRF space, because a + * single instruction's destination can only span 2 contiguous + * registers. So, we send all GRF arrays that get variable index + * access to scratch space. + */ +void +vec4_visitor::move_grf_array_access_to_scratch() +{ + int scratch_loc[this->virtual_grf_count]; + + for (int i = 0; i < this->virtual_grf_count; i++) { + scratch_loc[i] = -1; + } + + /* First, calculate the set of virtual GRFs that need to be punted + * to scratch due to having any array access on them, and where in + * scratch. + */ + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + if (inst->dst.file == GRF && inst->dst.reladdr && + scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4; + } + + for (int i = 0 ; i < 3; i++) { + src_reg *src = &inst->src[i]; + + if (src->file == GRF && src->reladdr && + scratch_loc[src->reg] == -1) { + scratch_loc[src->reg] = c->last_scratch; + c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4; + } + } + } + + /* Now, for anything that will be accessed through scratch, rewrite + * it to load/store. Note that this is a _safe list walk, because + * we may generate a new scratch_write instruction after the one + * we're processing. + */ + foreach_list_safe(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* Set up the annotation tracking for new generated instructions. */ + base_ir = inst->ir; + current_annotation = inst->annotation; + + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { + src_reg temp = src_reg(this, glsl_type::vec4_type); + + emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]); + + inst->dst.file = temp.file; + inst->dst.reg = temp.reg; + inst->dst.reg_offset = temp.reg_offset; + inst->dst.reladdr = NULL; + } + + for (int i = 0 ; i < 3; i++) { + if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) + continue; + + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + + emit_scratch_read(inst, temp, inst->src[i], + scratch_loc[inst->src[i].reg]); + + inst->src[i].file = temp.file; + inst->src[i].reg = temp.reg; + inst->src[i].reg_offset = temp.reg_offset; + inst->src[i].reladdr = NULL; + } + } +} + + +vec4_visitor::vec4_visitor(struct brw_vs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader) +{ + this->c = c; + this->p = &c->func; + this->brw = p->brw; + this->intel = &brw->intel; + this->ctx = &intel->ctx; + this->prog = prog; + this->shader = shader; + + this->mem_ctx = ralloc_context(NULL); + this->failed = false; + + this->base_ir = NULL; + this->current_annotation = NULL; + + this->c = c; + this->vp = prog->VertexProgram; + this->prog_data = &c->prog_data; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); + + this->virtual_grf_def = NULL; + this->virtual_grf_use = NULL; + this->virtual_grf_sizes = NULL; + this->virtual_grf_count = 0; + this->virtual_grf_array_size = 0; + this->live_intervals_valid = false; + + this->uniforms = 0; + + this->variable_ht = hash_table_ctor(0, + hash_table_pointer_hash, + hash_table_pointer_compare); +} + +vec4_visitor::~vec4_visitor() +{ + ralloc_free(this->mem_ctx); + hash_table_dtor(this->variable_ht); +} + + +void +vec4_visitor::fail(const char *format, ...) +{ + va_list va; + char *msg; + + if (failed) + return; + + failed = true; + + va_start(va, format); + msg = ralloc_vasprintf(mem_ctx, format, va); + va_end(va); + msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg); + + this->fail_msg = msg; + + if (INTEL_DEBUG & DEBUG_VS) { + fprintf(stderr, "%s", msg); + } +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a9ad5311fe3..3373e707d98 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -30,6 +30,7 @@ */ +#include "main/compiler.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" @@ -39,17 +40,21 @@ #include "../glsl/ralloc.h" -static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key ) +static bool +do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; GLuint program_size; const GLuint *program; struct brw_vs_compile c; void *mem_ctx; int aux_size; int i; + static int new_vs = -1; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -85,7 +90,25 @@ static void do_vs_prog( struct brw_context *brw, /* Emit GEN4 code. */ - brw_vs_emit(&c); + if (new_vs == -1) + new_vs = getenv("INTEL_NEW_VS") != NULL; + + if (new_vs && prog) { + if (!brw_vs_emit(prog, &c)) { + ralloc_free(mem_ctx); + return false; + } + } else { + brw_old_vs_emit(&c); + } + + /* Scratch space is used for register spilling */ + if (c.last_scratch) { + c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch); + + brw_get_scratch_bo(intel, &brw->vs.scratch_bo, + c.prog_data.total_scratch * brw->vs_max_threads); + } /* get the program */ @@ -111,6 +134,8 @@ static void do_vs_prog( struct brw_context *brw, &c.prog_data, aux_size, &brw->vs.prog_offset, &brw->vs.prog_data); ralloc_free(mem_ctx); + + return true; } @@ -155,13 +180,15 @@ static void brw_upload_vs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), &brw->vs.prog_offset, &brw->vs.prog_data)) { - do_vs_prog(brw, vp, &key); + bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram, + vp, &key); + + assert(success); } brw->vs.constant_map = ((int8_t *)brw->vs.prog_data + sizeof(*brw->vs.prog_data)); } - /* See brw_vs.c: */ const struct brw_tracked_state brw_vs_prog = { @@ -174,3 +201,30 @@ const struct brw_tracked_state brw_vs_prog = { }, .prepare = brw_upload_vs_prog }; + +bool +brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_vs_prog_key key; + struct gl_vertex_program *vp = prog->VertexProgram; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + uint32_t old_prog_offset = brw->vs.prog_offset; + struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data; + bool success; + + if (!vp) + return true; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = bvp->id; + key.clamp_vertex_color = true; + + success = do_vs_prog(brw, prog, bvp, &key); + + brw->vs.prog_offset = old_prog_offset; + brw->vs.prog_data = old_prog_data; + + return success; +} diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 432994a8534..beccb381ee2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -66,6 +66,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint last_scratch; GLuint first_tmp; GLuint last_tmp; @@ -92,6 +93,8 @@ struct brw_vs_compile { GLboolean needs_stack; }; -void brw_vs_emit( struct brw_vs_compile *c ); +bool brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c); +void brw_old_vs_emit(struct brw_vs_compile *c); +bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog); #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9fdfebe9f76..47cc0a7da7a 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -194,19 +194,11 @@ static void calc_wm_input_sizes( struct brw_context *brw ) /* BRW_NEW_VERTEX_PROGRAM */ const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - /* BRW_NEW_FRAGMENT_PROGRAM */ - struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; GLuint i; - /* If we're going to go through brw_fs.cpp, we don't end up using - * brw->wm.input_size_masks. - */ - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) - return; - memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ @@ -246,9 +238,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) const struct brw_tracked_state brw_wm_input_sizes = { .dirty = { .mesa = _NEW_LIGHT, - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_INPUT_DIMENSIONS), + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, .prepare = calc_wm_input_sizes diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 9d733344a26..bfee811e13d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1096,31 +1096,6 @@ static void emit_lrp_noalias(struct brw_vs_compile *c, brw_MAC(p, dst, arg0, arg1); } -/** 3 or 4-component vector normalization */ -static void emit_nrm( struct brw_vs_compile *c, - struct brw_reg dst, - struct brw_reg arg0, - int num_comps) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp = get_tmp(c); - - /* tmp = dot(arg0, arg0) */ - if (num_comps == 3) - brw_DP3(p, tmp, arg0, arg0); - else - brw_DP4(p, tmp, arg0, arg0); - - /* tmp = 1 / sqrt(tmp) */ - emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL); - - /* dst = arg0 * tmp */ - brw_MUL(p, dst, arg0, tmp); - - release_tmp(c, tmp); -} - - static struct brw_reg get_constant(struct brw_vs_compile *c, const struct prog_instruction *inst, @@ -1359,7 +1334,7 @@ get_src_reg( struct brw_vs_compile *c, if (component >= 0) { params = c->vp->program.Base.Parameters; - f = params->ParameterValues[src->Index][component]; + f = params->ParameterValues[src->Index][component].f; if (src->Abs) f = fabs(f); @@ -1821,6 +1796,9 @@ accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) if (val.address_mode != BRW_ADDRESS_DIRECT) return GL_FALSE; + if (val.negate || val.abs) + return GL_FALSE; + switch (prev_insn->header.opcode) { case BRW_OPCODE_MOV: case BRW_OPCODE_MAC: @@ -1900,7 +1878,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c) /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_old_vs_emit(struct brw_vs_compile *c ) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 @@ -1980,9 +1958,22 @@ void brw_vs_emit(struct brw_vs_compile *c ) const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) { + /* Can't just make get_arg "do the right thing" here because + * other callers of get_arg and get_src_reg don't expect any + * special behavior for the c->output_regs[index].used_in_src + * case. + */ + args[i] = c->output_regs[index].reg; + args[i].dw1.bits.swizzle = + BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle ARB_vp instructions */ + args[i].negate = src->Negate ? 1 : 0; + } else args[i] = get_arg(c, inst, i); } @@ -1993,7 +1984,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) index = inst->DstReg.Index; file = inst->DstReg.File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; + /* Can't just make get_dst "do the right thing" here because other + * callers of get_dst don't expect any special behavior for the + * c->output_regs[index].used_in_src case. + */ + dst = brw_writemask(c->output_regs[index].reg, inst->DstReg.WriteMask); else dst = get_dst(c, inst->DstReg); @@ -2025,12 +2020,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case OPCODE_NRM3: - emit_nrm(c, dst, args[0], 3); - break; - case OPCODE_NRM4: - emit_nrm(c, dst, args[0], 4); - break; case OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index fc4373ab311..29b3e47ab0c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -77,6 +77,16 @@ brw_prepare_vs_unit(struct brw_context *brw) else vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces; + if (brw->vs.prog_data->total_scratch != 0) { + vs->thread2.scratch_space_base_pointer = + brw->vs.scratch_bo->offset >> 10; /* reloc */ + vs->thread2.per_thread_scratch_space = + ffs(brw->vs.prog_data->total_scratch) - 11; + } else { + vs->thread2.scratch_space_base_pointer = 0; + vs->thread2.per_thread_scratch_space = 0; + } + vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; vs->thread3.dispatch_grf_start_reg = 1; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 55dbd4fa8b0..40360b23fff 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -213,6 +213,7 @@ static void brw_new_batch( struct intel_context *intel ) brw->state_batch_count = 0; brw->vb.nr_current_buffers = 0; + brw->ib.type = -1; /* Mark that the current program cache BO has been used by the GPU. * It will be reallocated if we need to put new programs in for the diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index b0dfdd536aa..e76832515fe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -206,10 +206,6 @@ bool do_wm_prog(struct brw_context *brw, */ return false; } - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); } else { void *instruction = c->instruction; void *prog_instructions = c->prog_instructions; @@ -232,6 +228,13 @@ bool do_wm_prog(struct brw_context *brw, if (!brw_wm_fs_emit(brw, c, prog)) return false; } else { + if (!c->instruction) { + c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); + c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); + c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); + c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); + } + /* Fallback for fixed function and ARB_fp shaders. */ c->dispatch_width = 16; brw_wm_payload_setup(brw, c); @@ -241,29 +244,10 @@ bool do_wm_prog(struct brw_context *brw, /* Scratch space is used for register spilling */ if (c->last_scratch) { - uint32_t total_scratch; - - /* Per-thread scratch space is power-of-two sized. */ - for (c->prog_data.total_scratch = 1024; - c->prog_data.total_scratch <= c->last_scratch; - c->prog_data.total_scratch *= 2) { - /* empty */ - } - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads; + c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch); - if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total_scratch, - 4096); - } - } - else { - c->prog_data.total_scratch = 0; + brw_get_scratch_bo(intel, &brw->wm.scratch_bo, + c->prog_data.total_scratch * brw->wm_max_threads); } if (unlikely(INTEL_DEBUG & DEBUG_WM)) diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index f61757a8cac..6ea4a7d6e50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1094,9 +1094,16 @@ void emit_tex(struct brw_wm_compile *c, if (intel->gen < 5 && c->dispatch_width == 8) nr_texcoords = 3; - /* For shadow comparisons, we have to supply u,v,r. */ - if (shadow) - nr_texcoords = 3; + if (shadow) { + if (intel->gen < 7) { + /* For shadow comparisons, we have to supply u,v,r. */ + nr_texcoords = 3; + } else { + /* On Ivybridge, the shadow comparitor comes first. Just load it. */ + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } + } /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { @@ -1113,7 +1120,7 @@ void emit_tex(struct brw_wm_compile *c, } /* Fill in the shadow comparison reference value. */ - if (shadow) { + if (shadow && intel->gen < 7) { if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 7cd3edad235..bd46bd8de43 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -535,15 +535,15 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, GLfloat s3) { struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; + gl_constant_value values[4]; GLuint idx; GLuint swizzle; struct prog_src_register reg; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; + values[0].f = s0; + values[1].f = s1; + values[2].f = s2; + values[3].f = s3; idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); reg = src_reg(PROGRAM_STATE_VAR, idx); @@ -664,6 +664,8 @@ static void precalc_lit( struct brw_wm_compile *c, static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; struct prog_src_register coord; struct prog_dst_register tmpcoord = { 0 }; const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; @@ -727,7 +729,7 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp0); release_temp(c, tmp1); } - else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index f78bdc31866..ccf9dc2bc18 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -205,14 +205,14 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_const_ref(c, &plist->ParameterValues[idx][component].f); break; case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + ref = get_param_ref(c, &plist->ParameterValues[idx][component].f ); break; default: diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 98146136703..6834ebad780 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -289,6 +289,13 @@ static void brw_update_sampler_state(struct brw_context *brw, sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6); sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6); + /* On Gen6+, the sampler can handle non-normalized texture + * rectangle coordinates natively + */ + if (intel->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) { + sampler->ss3.non_normalized_coord = 1; + } + upload_default_color(brw, gl_sampler, unit); if (intel->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index fb4fb146f8d..ad909789d82 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -342,7 +342,7 @@ prepare_wm_pull_constants(struct brw_context *brw) constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], - *brw->wm.prog_data->pull_param[i]); + brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index fb4cdbaadf9..b94121e8437 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -81,12 +81,21 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) params_uploaded++; } - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; + if (brw->vs.prog_data->uses_new_param_layout) { + for (i = 0; i < brw->vs.prog_data->nr_params; i++) { + *param = convert_param(brw->vs.prog_data->param_convert[i], + brw->vs.prog_data->param[i]); + param++; + } + params_uploaded += brw->vs.prog_data->nr_params / 4; + } else { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } } } @@ -151,7 +160,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); @@ -160,6 +177,32 @@ upload_vs_state(struct brw_context *brw) GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); + + /* Based on my reading of the simulator, the VS constants don't get + * pulled into the VS FF unit until an appropriate pipeline flush + * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds + * references to them into a little FIFO. The flushes are common, + * but don't reliably happen between this and a 3DPRIMITIVE, causing + * the primitive to use the wrong constants. Then the FIFO + * containing the constant setup gets added to again on the next + * constants change, and eventually when a flush does happen the + * unit is overwhelmed by constant changes and dies. + * + * To avoid this, send a PIPE_CONTROL down the line that will + * update the unit immediately loading the constants. The flush + * type bits here were those set by the STATE_BASE_ADDRESS whose + * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the + * bug reports that led to this workaround, and may be more than + * what is strictly required to avoid the issue. + */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); } const struct brw_tracked_state gen6_vs_state = { diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 185da9c355f..07e9995f53b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -54,14 +54,14 @@ gen6_prepare_wm_push_constants(struct brw_context *brw) float *constants; unsigned int i; - constants = brw_state_batch(brw, AUB_TRACE_NO_TYPE, + constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, brw->wm.prog_data->nr_params * sizeof(float), 32, &brw->wm.push_const_offset); for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index e787c21f4d1..aee67c87472 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -157,6 +157,13 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8); sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8); + /* The sampler can handle non-normalized texture rectangle coordinates + * natively + */ + if (texObj->Target == GL_TEXTURE_RECTANGLE) { + sampler->ss3.non_normalized_coord = 1; + } + upload_default_color(brw, gl_sampler, unit); sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5; diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0fad3d2fb68..f3cd5d15bf0 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -71,7 +71,15 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ + + if (brw->vs.prog_data->total_scratch) { + OUT_RELOC(brw->vs.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->vs.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } + OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index a102ca772b3..55a603e887a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -58,7 +58,7 @@ gen7_prepare_wm_constants(struct brw_context *brw) for (i = 0; i < brw->wm.prog_data->nr_params; i++) { constants[i] = convert_param(brw->wm.prog_data->param_convert[i], - *brw->wm.prog_data->param[i]); + brw->wm.prog_data->param[i]); } if (0) { @@ -228,7 +228,13 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(brw->wm.prog_offset); OUT_BATCH(dw2); - OUT_BATCH(0); /* scratch space base offset */ + if (brw->wm.prog_data->total_scratch) { + OUT_RELOC(brw->wm.scratch_bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + ffs(brw->wm.prog_data->total_scratch) - 11); + } else { + OUT_BATCH(0); + } OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(0); /* kernel 1 pointer */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index b61a2ffef19..db4343be10c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -308,12 +308,29 @@ emit: * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. * - * XXX: There is also a workaround that would appear to apply to this - * workaround, but it doesn't appear to be necessary so far: + * And the workaround for these two requires this workaround first: * - * Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent * BEFORE the pipe-control with a post-sync op and no write-cache * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. */ void intel_emit_post_sync_nonzero_flush(struct intel_context *intel) @@ -323,9 +340,17 @@ intel_emit_post_sync_nonzero_flush(struct intel_context *intel) BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL); OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); OUT_RELOC(intel->batch.workaround_bo, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0); + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); OUT_BATCH(0); /* write data */ ADVANCE_BATCH(); @@ -365,6 +390,7 @@ intel_batchbuffer_emit_mi_flush(struct intel_context *intel) OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_TC_FLUSH | PIPE_CONTROL_NO_WRITE); OUT_BATCH(0); /* write address */ OUT_BATCH(0); /* write data */ diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 30be1b9382f..b18dd2922d9 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -541,8 +541,8 @@ intel_set_teximage_alpha_to_one(struct gl_context *ctx, /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intel_image->mt, - intel_image->level, - intel_image->face, + intel_image->base.Level, + intel_image->base.Face, 0, &image_x, &image_y); diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 439d6fc8247..d908975fc87 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -41,8 +41,7 @@ #include "intel_regions.h" static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj); +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); /** Allocates a new drm_intel_bo to store the data for the buffer object. */ static void @@ -122,7 +121,7 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) * (though it does if you call glDeleteBuffers) */ if (obj->Pointer) - intel_bufferobj_unmap(ctx, 0, obj); + intel_bufferobj_unmap(ctx, obj); free(intel_obj->sys_buffer); if (intel_obj->region) { @@ -203,7 +202,6 @@ intel_bufferobj_data(struct gl_context * ctx, */ static void intel_bufferobj_subdata(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, struct gl_buffer_object *obj) @@ -276,82 +274,28 @@ intel_bufferobj_subdata(struct gl_context * ctx, */ static void intel_bufferobj_get_subdata(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, struct gl_buffer_object *obj) { struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); assert(intel_obj); if (intel_obj->sys_buffer) memcpy(data, (char *)intel_obj->sys_buffer + offset, size); - else - drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); -} - - - -/** - * Called via glMapBufferARB(). - */ -static void * -intel_bufferobj_map(struct gl_context * ctx, - GLenum target, - GLenum access, struct gl_buffer_object *obj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - GLboolean read_only = (access == GL_READ_ONLY_ARB); - GLboolean write_only = (access == GL_WRITE_ONLY_ARB); - - assert(intel_obj); - - if (intel_obj->sys_buffer) { - if (!read_only && intel_obj->source) { - release_buffer(intel_obj); - } - - if (!intel_obj->buffer || intel_obj->source) { - obj->Pointer = intel_obj->sys_buffer; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); } - - free(intel_obj->sys_buffer); - intel_obj->sys_buffer = NULL; - } - - /* Flush any existing batchbuffer that might reference this data. */ - if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) - intel_flush(ctx); - - if (intel_obj->region) - intel_bufferobj_cow(intel, intel_obj); - - if (intel_obj->buffer == NULL) { - obj->Pointer = NULL; - return NULL; - } - - if (write_only) { - drm_intel_gem_bo_map_gtt(intel_obj->buffer); - intel_obj->mapped_gtt = GL_TRUE; - } else { - drm_intel_bo_map(intel_obj->buffer, !read_only); - intel_obj->mapped_gtt = GL_FALSE; + drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); } +} - obj->Pointer = intel_obj->buffer->virtual; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; -} /** - * Called via glMapBufferRange(). + * Called via glMapBufferRange and glMapBuffer * * The goal of this extension is to allow apps to accumulate their rendering * at the same time as they accumulate their buffer object. Without it, @@ -368,12 +312,11 @@ intel_bufferobj_map(struct gl_context * ctx, */ static void * intel_bufferobj_map_range(struct gl_context * ctx, - GLenum target, GLintptr offset, GLsizeiptr length, + GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - GLboolean read_only = (access == GL_READ_ONLY_ARB); assert(intel_obj); @@ -385,6 +328,9 @@ intel_bufferobj_map_range(struct gl_context * ctx, obj->AccessFlags = access; if (intel_obj->sys_buffer) { + const bool read_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; + if (!read_only && intel_obj->source) release_buffer(intel_obj); @@ -468,7 +414,7 @@ intel_bufferobj_map_range(struct gl_context * ctx, * would defeat the point. */ static void -intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, +intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLintptr offset, GLsizeiptr length, struct gl_buffer_object *obj) { @@ -502,8 +448,7 @@ intel_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, * Called via glUnmapBuffer(). */ static GLboolean -intel_bufferobj_unmap(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj) +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); @@ -758,23 +703,23 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, * not overlap. */ if (src == dst) { - char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, - GL_READ_WRITE, dst); + char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_READ_BIT, dst); memmove(ptr + write_offset, ptr + read_offset, size); - intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + intel_bufferobj_unmap(ctx, dst); } else { const char *src_ptr; char *dst_ptr; - src_ptr = intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER, - GL_READ_ONLY, src); - dst_ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, - GL_WRITE_ONLY, dst); + src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); - intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src); - intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); + intel_bufferobj_unmap(ctx, src); + intel_bufferobj_unmap(ctx, dst); } return; } @@ -924,7 +869,6 @@ intelInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferData = intel_bufferobj_data; functions->BufferSubData = intel_bufferobj_subdata; functions->GetBufferSubData = intel_bufferobj_get_subdata; - functions->MapBuffer = intel_bufferobj_map; functions->MapBufferRange = intel_bufferobj_map_range; functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; functions->UnmapBuffer = intel_bufferobj_unmap; diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index dfca03c14bf..76d33f9b37e 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -116,13 +116,13 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } /* HW color buffers (front, back, aux, generic FBO, etc) */ - if (colorMask == ~0) { + if (intel->gen < 6 && colorMask == ~0) { /* clear all R,G,B,A */ blit_mask |= (mask & BUFFER_BITS_COLOR); } else { /* glColorMask in effect */ - tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT)); + tri_mask |= (mask & BUFFER_BITS_COLOR); } /* Make sure we have up to date buffers before we start looking at @@ -143,6 +143,12 @@ intelClear(struct gl_context *ctx, GLbitfield mask) */ tri_mask |= BUFFER_BIT_STENCIL; } + else if (intel->has_separate_stencil && + stencilRegion->tiling == I915_TILING_NONE) { + /* The stencil buffer is actually W tiled, which the hardware + * cannot blit to. */ + tri_mask |= BUFFER_BIT_STENCIL; + } else { /* clearing all stencil bits, use blitting */ blit_mask |= BUFFER_BIT_STENCIL; @@ -182,7 +188,10 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (tri_mask) { debug_mask("tri", tri_mask); - _mesa_meta_Clear(&intel->ctx, tri_mask); + if (ctx->Extensions.ARB_fragment_shader) + _mesa_meta_glsl_Clear(&intel->ctx, tri_mask); + else + _mesa_meta_Clear(&intel->ctx, tri_mask); } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 2ba13632569..14342ef6246 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -1439,7 +1439,12 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, assert(stencil_rb->Base.Format == MESA_FORMAT_S8); assert(depth_rb && depth_rb->Base.Format == MESA_FORMAT_X8_Z24); - if (stencil_rb->region->tiling == I915_TILING_Y) { + if (stencil_rb->region->tiling == I915_TILING_NONE) { + /* + * The stencil buffer is actually W tiled. The region's tiling is + * I915_TILING_NONE, however, because the GTT is incapable of W + * fencing. + */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_TRUE; return; } else { @@ -1449,6 +1454,13 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * a combined depth/stencil buffer. Discard the hiz buffer too. */ intel->intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_FALSE; + if (intel->must_use_separate_stencil) { + _mesa_problem(&intel->ctx, + "intel_context requires separate stencil, but the " + "DRIscreen does not support it. You may need to " + "upgrade the Intel X driver to 2.16.0"); + abort(); + } /* 1. Discard depth and stencil renderbuffers. */ _mesa_remove_renderbuffer(fb, BUFFER_DEPTH); @@ -1527,7 +1539,7 @@ intel_verify_dri2_has_hiz(struct intel_context *intel, * Presently, however, no verification or clean up is necessary, and * execution should not reach here. If the framebuffer still has a hiz * region, then we have already set dri2_has_hiz to true after - * confirming above that the stencil buffer is Y tiled. + * confirming above that the stencil buffer is W tiled. */ assert(0); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 55bcc757873..754f9f202d1 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -173,6 +173,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer if (irb->Base.Format == MESA_FORMAT_S8) { /* + * The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + * * The stencil buffer has quirky pitch requirements. From Vol 2a, * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": * The pitch must be set to 2x the value computed based on width, as @@ -180,14 +183,13 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer * To accomplish this, we resort to the nasty hack of doubling the drm * region's cpp and halving its height. * - * If we neglect to double the pitch, then drm_intel_gem_bo_map_gtt() - * maps the memory incorrectly. + * If we neglect to double the pitch, then render corruption occurs. */ irb->region = intel_region_alloc(intel->intelScreen, - I915_TILING_Y, + I915_TILING_NONE, cpp * 2, - width, - height / 2, + ALIGN(width, 64), + ALIGN((height + 1) / 2, 64), GL_TRUE); if (!irb->region) return false; @@ -594,17 +596,15 @@ intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb, struct intel_texture_image *intel_image, int zoffset) { - struct intel_mipmap_tree *mt = intel_image->mt; unsigned int dst_x, dst_y; /* compute offset of the particular 2D image within the texture region */ intel_miptree_get_image_offset(intel_image->mt, - intel_image->level, - intel_image->face, + intel_image->base.Level, + intel_image->base.Face, zoffset, &dst_x, &dst_y); - irb->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; irb->draw_x = dst_x; irb->draw_y = dst_y; } @@ -645,6 +645,22 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb, } } +#ifndef I915 +static bool +need_tile_offset_workaround(struct brw_context *brw, + struct intel_renderbuffer *irb) +{ + uint32_t tile_x, tile_y; + + if (brw->has_surface_tile_offset) + return false; + + intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); + + return tile_x != 0 || tile_y != 0; +} +#endif + /** * Called by glFramebufferTexture[123]DEXT() (and other places) to * prepare for rendering into texture memory. This might be called @@ -698,8 +714,7 @@ intel_render_texture(struct gl_context * ctx, intel_image->used_as_render_target = GL_TRUE; #ifndef I915 - if (!brw_context(ctx)->has_surface_tile_offset && - (irb->draw_offset & 4095) != 0) { + if (need_tile_offset_workaround(brw_context(ctx), irb)) { /* Original gen4 hardware couldn't draw to a non-tile-aligned * destination in a miptree unless you actually setup your * renderbuffer as a miptree and used the fragile @@ -713,8 +728,8 @@ intel_render_texture(struct gl_context * ctx, new_mt = intel_miptree_create(intel, image->TexObject->Target, intel_image->base.TexFormat, - intel_image->level, - intel_image->level, + intel_image->base.Level, + intel_image->base.Level, intel_image->base.Width, intel_image->base.Height, intel_image->base.Depth, @@ -722,8 +737,8 @@ intel_render_texture(struct gl_context * ctx, intel_miptree_image_copy(intel, new_mt, - intel_image->face, - intel_image->level, + intel_image->base.Face, + intel_image->base.Level, old_mt); intel_miptree_release(intel, &intel_image->mt); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index f7f99a4f00c..2487994fde5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -58,7 +58,6 @@ struct intel_renderbuffer /** \} */ - GLuint draw_offset; /**< Offset of drawing address within the region */ GLuint draw_x, draw_y; /**< Offset of drawing within the region */ }; diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 4e711de1ce1..f36240d7f1d 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -227,7 +227,7 @@ intel_miptree_match_image(struct intel_mipmap_tree *mt, struct gl_texture_image *image) { struct intel_texture_image *intelImage = intel_texture_image(image); - GLuint level = intelImage->level; + GLuint level = intelImage->base.Level; /* Images with borders are never pulled into mipmap trees. */ if (image->Border) diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 86d0ef2d748..d9873a303ee 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -74,9 +74,9 @@ static const GLubyte *map_pbo( struct gl_context *ctx, return NULL; } - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); if (!buf) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); return NULL; @@ -292,8 +292,7 @@ out: if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); } intel_check_front_buffer_rendering(intel); diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 5aa629150cf..a98a669af21 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -75,6 +75,7 @@ #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) #define PIPE_CONTROL_PPGTT_WRITE (0 << 2) #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index b2013af1a29..9dd6a525566 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -63,9 +63,12 @@ * x8_z24 and s8). * * Eventually, intel_update_renderbuffers() makes a DRI2 request for - * DRI2BufferStencil and DRI2BufferHiz. If the returned buffers are Y tiled, - * then we joyfully set intel_screen.dri2_has_hiz to true and continue as if - * nothing happend. + * DRI2BufferStencil and DRI2BufferHiz. If the stencil buffer's tiling is + * I915_TILING_NONE [1], then we joyfully set intel_screen.dri2_has_hiz to + * true and continue as if nothing happend. + * + * [1] The stencil buffer is actually W tiled. However, we request from the + * kernel a non-tiled buffer because the GTT is incapable of W fencing. * * If the buffers are X tiled, however, the handshake has failed and we must * clean up. diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 153803fba09..2e1c80c4766 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -131,38 +131,84 @@ intel_set_span_functions(struct intel_context *intel, int miny = 0; \ int maxx = rb->Width; \ int maxy = rb->Height; \ - int stride = rb->RowStride; \ - uint8_t *buf = rb->Data; \ + \ + /* \ + * Here we ignore rb->Data and rb->RowStride as set by \ + * intelSpanRenderStart. Since intel_offset_S8 decodes the W tile \ + * manually, the region's *real* base address and stride is \ + * required. \ + */ \ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ + uint8_t *buf = irb->region->buffer->virtual; \ + unsigned stride = irb->region->pitch; \ + unsigned height = 2 * irb->region->height; \ + bool flip = rb->Name == 0; \ + int y_scale = flip ? -1 : 1; \ + int y_bias = flip ? (height - 1) : 0; \ -/* Don't flip y. */ #undef Y_FLIP -#define Y_FLIP(y) y +#define Y_FLIP(y) (y_scale * (y) + y_bias) /** * \brief Get pointer offset into stencil buffer. * - * The stencil buffer interleaves two rows into one. Yay for crazy hardware. - * The table below demonstrates how the pointer arithmetic behaves for a buffer - * with positive stride (s=stride). - * - * x | y | byte offset - * -------------------------- - * 0 | 0 | 0 - * 0 | 1 | 1 - * 1 | 0 | 2 - * 1 | 1 | 3 - * ... | ... | ... - * 0 | 2 | s - * 0 | 3 | s + 1 - * 1 | 2 | s + 2 - * 1 | 3 | s + 3 + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) */ static inline intptr_t -intel_offset_S8(int stride, GLint x, GLint y) +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y) { - return 2 * ((y / 2) * stride + x) + y % 2; + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + /* + * Errata for Gen5: + * + * An additional offset is needed which is not documented in the PRM. + * + * if ((byte_x / 8) % 2 == 1) { + * if ((byte_y / 8) % 2) == 0) { + * u += 64; + * } else { + * u -= 64; + * } + * } + * + * The offset is expressed more tersely as + * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1)); + */ + + return u; } #define WRITE_STENCIL(x, y, src) buf[intel_offset_S8(stride, x, y)] = src; diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 21c4a1dddba..ee0cd252375 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -95,17 +95,12 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, if (!_mesa_is_format_compressed(first_image->TexFormat)) { GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; GLuint face, i; - /* Update the level information in our private data in the new images, - * since it didn't get set as part of a normal TexImage path. - */ for (face = 0; face < nr_faces; face++) { for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { struct intel_texture_image *intelImage = intel_texture_image(texObj->Image[face][i]); if (!intelImage) break; - intelImage->level = i; - intelImage->face = face; /* Unreference the miptree to signal that the new Data is a * bare pointer from mesa. */ diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 1a3643da593..600bd1251e0 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -118,8 +118,8 @@ intel_copy_texsubimage(struct intel_context *intel, /* get dest x/y in destination texture */ intel_miptree_get_image_offset(intelImage->mt, - intelImage->level, - intelImage->face, + intelImage->base.Level, + intelImage->base.Face, 0, &image_x, &image_y); @@ -164,101 +164,6 @@ intel_copy_texsubimage(struct intel_context *intel, static void -intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty, height; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, - GL_RGBA, CHAN_TYPE, NULL, - &ctx->DefaultPacking, texObj, texImage); - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - height = 1; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y, - width, border); -} - - -static void -intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) - return; - - if (!intel_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) - goto fail; - - return; - - fail: - fallback_debug("%s - fallback to swrast\n", __FUNCTION__); - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - - -static void intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -312,8 +217,6 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, void intelInitTextureCopyImageFuncs(struct dd_function_table *functions) { - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; functions->CopyTexSubImage1D = intelCopyTexSubImage1D; functions->CopyTexSubImage2D = intelCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 1f8b885bbec..4ee66847255 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -63,7 +63,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, if (intelImage->base.Border) return NULL; - if (intelImage->level > intelObj->base.BaseLevel && + if (intelImage->base.Level > intelObj->base.BaseLevel && (intelImage->base.Width == 1 || (intelObj->base.Target != GL_TEXTURE_1D && intelImage->base.Height == 1) || @@ -74,19 +74,19 @@ intel_miptree_create_for_teximage(struct intel_context *intel, * likely base level width/height/depth for a full mipmap stack * from this info, so just allocate this one level. */ - firstLevel = intelImage->level; - lastLevel = intelImage->level; + firstLevel = intelImage->base.Level; + lastLevel = intelImage->base.Level; } else { /* If this image disrespects BaseLevel, allocate from level zero. * Usually BaseLevel == 0, so it's unlikely to happen. */ - if (intelImage->level < intelObj->base.BaseLevel) + if (intelImage->base.Level < intelObj->base.BaseLevel) firstLevel = 0; else firstLevel = intelObj->base.BaseLevel; /* Figure out image dimensions at start level. */ - for (i = intelImage->level; i > firstLevel; i--) { + for (i = intelImage->base.Level; i > firstLevel; i--) { width <<= 1; if (height != 1) height <<= 1; @@ -101,7 +101,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, */ if ((intelObj->base.Sampler.MinFilter == GL_NEAREST || intelObj->base.Sampler.MinFilter == GL_LINEAR) && - intelImage->level == firstLevel && + intelImage->base.Level == firstLevel && (intel->gen < 4 || firstLevel == 0)) { lastLevel = firstLevel; } else { @@ -186,8 +186,8 @@ try_pbo_upload(struct intel_context *intel, else src_stride = width; - intel_miptree_get_image_offset(intelImage->mt, intelImage->level, - intelImage->face, 0, + intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level, + intelImage->base.Face, 0, &dst_x, &dst_y); dst_stride = intelImage->mt->region->pitch; @@ -243,8 +243,8 @@ try_pbo_zcopy(struct intel_context *intel, else src_stride = width; - intel_miptree_get_image_offset(intelImage->mt, intelImage->level, - intelImage->face, 0, + intel_miptree_get_image_offset(intelImage->mt, intelImage->base.Level, + intelImage->base.Face, 0, &dst_x, &dst_y); dst_stride = intelImage->mt->region->pitch; @@ -407,9 +407,6 @@ intelTexImage(struct gl_context * ctx, DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = level; - if (_mesa_is_format_compressed(texImage->TexFormat)) { texelBytes = 0; } @@ -514,8 +511,8 @@ intelTexImage(struct gl_context * ctx, } texImage->Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &dstRowStride, intelImage->base.ImageOffsets); } @@ -684,8 +681,8 @@ intel_get_tex_image(struct gl_context * ctx, GLenum target, GLint level, intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &intelImage->base.RowStride, intelImage->base.ImageOffsets); intelImage->base.RowStride /= intelImage->mt->cpp; @@ -816,8 +813,6 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->region->width, rb->region->height, 1, 0, internalFormat, texFormat); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = level; texImage->RowStride = rb->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); @@ -874,8 +869,6 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, image->region->width, image->region->height, 1, 0, image->internal_format, image->format); - intelImage->face = _mesa_tex_target_to_face(target); - intelImage->level = 0; texImage->RowStride = image->region->pitch; intel_miptree_reference(&intelImage->mt, intelObj->mt); diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index a9ae2ec5429..e7a4318b8d8 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -52,11 +52,6 @@ struct intel_texture_image { struct gl_texture_image base; - /* These aren't stored in gl_texture_image - */ - GLuint level; - GLuint face; - /* If intelImage->mt != NULL, image data is stored here. * Else if intelImage->base.Data != NULL, image is stored there. * Else there is no image data. diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index 8b43c406cf9..5fd2cc36234 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -113,7 +113,7 @@ intelTexSubimage(struct gl_context * ctx, dstRowStride = pitch; intel_miptree_get_image_offset(intelImage->mt, level, - intelImage->face, 0, + intelImage->base.Face, 0, &blit_x, &blit_y); blit_x += xoffset; blit_y += yoffset; @@ -122,8 +122,8 @@ intelTexSubimage(struct gl_context * ctx, } else { texImage->Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &dstRowStride, texImage->ImageOffsets); } diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 7135a6276fe..31ac689ad77 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -42,8 +42,8 @@ copy_image_data_to_tree(struct intel_context *intel, */ intel_miptree_image_copy(intel, intelObj->mt, - intelImage->face, - intelImage->level, intelImage->mt); + intelImage->base.Face, + intelImage->base.Level, intelImage->mt); intel_miptree_release(intel, &intelImage->mt); } @@ -54,8 +54,8 @@ copy_image_data_to_tree(struct intel_context *intel, */ intel_miptree_image_data(intel, intelObj->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, intelImage->base.Data, intelImage->base.RowStride, intelImage->base.RowStride * @@ -177,8 +177,8 @@ intel_tex_map_level_images(struct intel_context *intel, intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, - intelImage->face, - intelImage->level, + intelImage->base.Face, + intelImage->base.Level, &intelImage->base.RowStride, intelImage->base.ImageOffsets); /* convert stride to texels, not bytes */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c index e60b91f64be..433590c4181 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c @@ -107,7 +107,7 @@ nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size } static void -nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset, +nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, const GLvoid *data, struct gl_buffer_object *obj) { @@ -115,7 +115,7 @@ nouveau_bufferobj_subdata(struct gl_context *ctx, GLenum target, GLintptrARB off } static void -nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB offset, +nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset, GLsizeiptrARB size, GLvoid *data, struct gl_buffer_object *obj) { @@ -123,23 +123,6 @@ nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLenum target, GLintptrARB } static void * -nouveau_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj) -{ - unsigned flags = 0; - - if (access == GL_READ_ONLY_ARB || - access == GL_READ_WRITE_ARB) - flags |= GL_MAP_READ_BIT; - if (access == GL_WRITE_ONLY_ARB || - access == GL_READ_WRITE_ARB) - flags |= GL_MAP_WRITE_BIT; - - return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, flags, - obj); -} - -static void * nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access, struct gl_buffer_object *obj) @@ -169,7 +152,7 @@ nouveau_bufferobj_map_range(struct gl_context *ctx, GLenum target, GLintptr offs } static GLboolean -nouveau_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj) { assert(obj->Pointer); @@ -189,7 +172,6 @@ nouveau_bufferobj_functions_init(struct dd_function_table *functions) functions->BufferData = nouveau_bufferobj_data; functions->BufferSubData = nouveau_bufferobj_subdata; functions->GetBufferSubData = nouveau_bufferobj_get_subdata; - functions->MapBuffer = nouveau_bufferobj_map; functions->MapBufferRange = nouveau_bufferobj_map_range; functions->UnmapBuffer = nouveau_bufferobj_unmap; } diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 02201cb53d6..44a794da396 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -185,7 +185,6 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) r200ContextPtr rmesa = R200_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if ( R200_DEBUG & RADEON_IOCTL ) { @@ -206,13 +205,11 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index d42e8f12041..91e77f9f7da 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -527,7 +527,6 @@ void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *fu functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 7adf9ad73ed..8c9bd6d00b2 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -773,18 +773,12 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r200ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 63e03b0e0c7..cf44d7f459c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -126,10 +126,10 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ case PROGRAM_NAMED_PARAM: //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); case PROGRAM_CONSTANT: - *fcmd++ = paramList->ParameterValues[pi][0]; - *fcmd++ = paramList->ParameterValues[pi][1]; - *fcmd++ = paramList->ParameterValues[pi][2]; - *fcmd++ = paramList->ParameterValues[pi][3]; + *fcmd++ = paramList->ParameterValues[pi][0].f; + *fcmd++ = paramList->ParameterValues[pi][1].f; + *fcmd++ = paramList->ParameterValues[pi][2].f; + *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index b24274259f4..39dcb21d4f4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -561,28 +561,29 @@ static int peephole_add_presub_add( struct rc_instruction * inst_add) { unsigned dstmask = inst_add->U.I.DstReg.WriteMask; - struct rc_src_register * src1 = NULL; - unsigned int i; - - if (!is_presub_candidate(c, inst_add)) - return 0; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; - /* XXX This isn't fully implemented, is it? */ - /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */ - for (i = 0; i < 2; i++) { - if (inst_add->U.I.SrcReg[i].Abs) - return 0; + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; - /* XXX This looks weird, but it's basically what was here before this commit (see git blame): */ - if ((inst_add->U.I.SrcReg[i].Negate & dstmask) != dstmask && !src1) { - src1 = &inst_add->U.I.SrcReg[i]; - } - } + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; - if (!src1) + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { @@ -615,7 +616,7 @@ static void presub_replace_inv( * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. - * @return + * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 0c4d8537c61..5587c16dd44 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -84,7 +84,8 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -138,7 +139,7 @@ static void r300FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde r300->ind_buf.count = mesa_ind_buf->count; if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -163,7 +164,10 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, + mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, + mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -184,7 +188,7 @@ static void r300SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde r300->ind_buf.count = mesa_ind_buf->count; if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else { r300FixupIndexBuffer(ctx, mesa_ind_buf); @@ -235,7 +239,8 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -286,7 +291,7 @@ static void r300ConvertAttrib(struct gl_context *ctx, int count, const struct gl radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -302,7 +307,8 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_ radeon_bo_map(attr->bo, 1); if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -321,7 +327,7 @@ static void r300AlignDataToDword(struct gl_context *ctx, const struct gl_client_ } if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } radeon_bo_unmap(attr->bo); diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 590d9afe14a..93d8fe185ef 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -379,7 +379,6 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index e24ad6f088d..e4388a021ed 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -427,13 +427,8 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index e527c379b62..cc584ca2b35 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -752,10 +752,10 @@ GLboolean evergreenSetupFPconstants(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* alloc multiple of 16 constants */ diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c index 4507be29d86..74563caf47c 100644 --- a/src/mesa/drivers/dri/r600/evergreen_render.c +++ b/src/mesa/drivers/dri/r600/evergreen_render.c @@ -403,7 +403,8 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count, { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -456,7 +457,7 @@ static void evergreenConvertAttrib(struct gl_context *ctx, int count, if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -470,7 +471,8 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -531,7 +533,7 @@ static void evergreenFixupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -606,7 +608,8 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -629,7 +632,7 @@ static void evergreenSetupIndexBuffer(struct gl_context *ctx, const struct _mesa if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else @@ -655,7 +658,8 @@ static void evergreenAlignDataToDword(struct gl_context *ctx, if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj->obj); mapped_named_bo = GL_TRUE; } @@ -675,7 +679,7 @@ static void evergreenAlignDataToDword(struct gl_context *ctx, radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } attr->stride = dst_stride; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 33a5f277683..d240a216817 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -1288,19 +1288,12 @@ void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_textur struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - context_t *rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; @@ -1688,7 +1681,6 @@ void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c index 018869b9996..117916ac78f 100644 --- a/src/mesa/drivers/dri/r600/evergreen_vertprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c @@ -684,17 +684,17 @@ GLboolean evergreenSetupVPconstants(struct gl_context * ctx) for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { - evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index ce2f7779563..74f048b1062 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -259,13 +259,11 @@ static int r600_cs_process_relocs(struct radeon_cs_int *csi, uint32_t * reloc_chunk, uint32_t * length_dw_reloc_chunk) { - struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm; struct r600_cs_reloc_legacy *relocs; int i, j, r; uint32_t offset_dw = 0; - csm = (struct r600_cs_manager_legacy*)csi->csm; relocs = (struct r600_cs_reloc_legacy *)csi->relocs; restart: for (i = 0; i < csi->crelocs; i++) { diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index eb7ed30c7a3..3efa1d197fa 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -470,7 +470,6 @@ void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 949db29c189..65fae7195fd 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -1141,13 +1141,8 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; gl_format texFormat; - type = GL_BGRA; - format = GL_UNSIGNED_BYTE; - internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); - radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 40494cd6af0..6f9834e68fe 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -778,10 +778,10 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } /* Load fp constants to gpu */ diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 0f7a7a46b71..a565c9f2087 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -490,7 +490,8 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count, { if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -543,7 +544,7 @@ static void r700ConvertAttrib(struct gl_context *ctx, int count, if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } } @@ -564,7 +565,8 @@ static void r700AlignDataToDword(struct gl_context *ctx, if (!input->BufferObj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + ctx->Driver.MapBufferRange(ctx, 0, input->BufferObj->Size, + GL_MAP_READ_BIT, input->BufferObj); mapped_named_bo = GL_TRUE; } @@ -584,7 +586,7 @@ static void r700AlignDataToDword(struct gl_context *ctx, radeon_bo_unmap(attr->bo); if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + ctx->Driver.UnmapBuffer(ctx, input->BufferObj); } attr->stride = dst_stride; @@ -727,7 +729,8 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } @@ -788,7 +791,7 @@ static void r700FixupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } @@ -813,7 +816,8 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + ctx->Driver.MapBufferRange(ctx, 0, mesa_ind_buf->obj->Size, + GL_MAP_READ_BIT, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); mapped_named_bo = GL_TRUE; } @@ -836,7 +840,7 @@ static void r700SetupIndexBuffer(struct gl_context *ctx, const struct _mesa_inde if (mapped_named_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + ctx->Driver.UnmapBuffer(ctx, mesa_ind_buf->obj); } } else diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7d4be9180a0..b1e2742b27d 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -720,17 +720,17 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) for(ui=0; ui<unNumParamData; ui++) { if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) { - r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3].f; } else { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0].f; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1].f; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2].f; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3].f; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 607b7470d4b..a74c6c7a575 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -78,6 +78,9 @@ static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) static inline void *radeon_bo_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } @@ -87,6 +90,9 @@ static inline void radeon_bo_manager_gem_dtor(void *dummy) static inline void *radeon_cs_manager_gem_ctor(int fd) { + fprintf(stderr, "[%s:%u] Mesa built without Radeon libdrm support.\n", + __func__, __LINE__); + return NULL; } diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c index 0d1af726c07..7b59c0377f8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c +++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c @@ -130,7 +130,6 @@ radeonBufferData(struct gl_context * ctx, */ static void radeonBufferSubData(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid * data, @@ -155,7 +154,6 @@ radeonBufferSubData(struct gl_context * ctx, */ static void radeonGetBufferSubData(struct gl_context * ctx, - GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid * data, @@ -171,17 +169,18 @@ radeonGetBufferSubData(struct gl_context * ctx, } /** - * Called via glMapBufferARB() + * Called via glMapBuffer() and glMapBufferRange() */ static void * -radeonMapBuffer(struct gl_context * ctx, - GLenum target, - GLenum access, - struct gl_buffer_object *obj) +radeonMapBufferRange(struct gl_context * ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) { struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); + const GLboolean write_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT; - if (access == GL_WRITE_ONLY_ARB) { + if (write_only) { ctx->Driver.Flush(ctx); } @@ -190,12 +189,13 @@ radeonMapBuffer(struct gl_context * ctx, return NULL; } - radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB); + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; - obj->Pointer = radeon_obj->bo->ptr; - obj->Length = obj->Size; - obj->Offset = 0; + radeon_bo_map(radeon_obj->bo, write_only); + obj->Pointer = radeon_obj->bo->ptr + offset; return obj->Pointer; } @@ -205,7 +205,6 @@ radeonMapBuffer(struct gl_context * ctx, */ static GLboolean radeonUnmapBuffer(struct gl_context * ctx, - GLenum target, struct gl_buffer_object *obj) { struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj); @@ -229,6 +228,6 @@ radeonInitBufferObjectFuncs(struct dd_function_table *functions) functions->BufferData = radeonBufferData; functions->BufferSubData = radeonBufferSubData; functions->GetBufferSubData = radeonGetBufferSubData; - functions->MapBuffer = radeonMapBuffer; + functions->MapBufferRange = radeonMapBufferRange; functions->UnmapBuffer = radeonUnmapBuffer; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index bfc307ca987..e7a6623cf84 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -436,7 +436,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, const drm_clip_rect_t *rect) { radeonContextPtr rmesa; - struct radeon_framebuffer *rfb; GLint nbox, i, ret; assert(dPriv); @@ -447,8 +446,6 @@ void radeonCopyBuffer( __DRIdrawable *dPriv, LOCK_HARDWARE(rmesa); - rfb = dPriv->driverPrivate; - if ( RADEON_DEBUG & RADEON_IOCTL ) { fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); } @@ -527,8 +524,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) { radeonContextPtr radeon; GLint ret; - __DRIscreen *psp; - struct radeon_renderbuffer *rrb; struct radeon_framebuffer *rfb; assert(dPriv); @@ -537,9 +532,6 @@ static GLboolean radeonPageFlip( __DRIdrawable *dPriv ) radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; rfb = dPriv->driverPrivate; - rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - - psp = dPriv->driScreenPriv; LOCK_HARDWARE(radeon); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index bf8925f61d0..c08b79484af 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -515,7 +515,6 @@ void radeon_prepare_render(radeonContextPtr radeon) __DRIcontext *driContext = radeon->dri.context; __DRIdrawable *drawable; __DRIscreen *screen; - struct radeon_framebuffer *draw; screen = driContext->driScreenPriv; if (!screen->dri2.loader) @@ -527,7 +526,6 @@ void radeon_prepare_render(radeonContextPtr radeon) radeon_update_renderbuffers(driContext, drawable, GL_FALSE); /* Intel driver does the equivalent of this, no clue if it is needed:*/ - draw = drawable->driverPrivate; radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index c2722a4e195..5595b705b15 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -218,11 +218,9 @@ static int cs_end(struct radeon_cs_int *cs, static int cs_process_relocs(struct radeon_cs_int *cs) { - struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; struct cs_reloc_legacy *relocs; int i, j, r; - csm = (struct cs_manager_legacy*)cs->csm; relocs = (struct cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index a91d8727792..c23e9c2d2a2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -560,7 +560,6 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) r100ContextPtr rmesa = R100_CONTEXT(ctx); __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLuint flags = 0; - GLuint color_mask = 0; GLuint orig_mask = mask; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { @@ -582,13 +581,11 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( mask & BUFFER_BIT_FRONT_LEFT ) { flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_FRONT_LEFT; } if ( mask & BUFFER_BIT_BACK_LEFT ) { flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; mask &= ~BUFFER_BIT_BACK_LEFT; } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 7b6bd36dcf7..ae8a212f806 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -114,16 +114,6 @@ void radeon_lock_hardware(radeonContextPtr radeon ) { char ret = 0; - struct radeon_framebuffer *rfb = NULL; - struct radeon_renderbuffer *rrb = NULL; - - if (radeon_get_drawable(radeon)) { - rfb = radeon_get_drawable(radeon)->driverPrivate; - - if (rfb) - rrb = radeon_get_renderbuffer(&rfb->base, - rfb->base._ColorDrawBufferIndexes[0]); - } if (!radeon->radeonScreen->driScreen->dri2.enabled) { if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1) diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 25a8ddf7b6a..a0b5506ae76 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -455,7 +455,6 @@ void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table * functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; if (radeon->radeonScreen->kernel_mm) { - functions->CopyTexImage2D = radeonCopyTexImage2D; functions->CopyTexSubImage2D = radeonCopyTexSubImage2D; } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index f14dfa25d40..94ff3c4a727 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -141,61 +141,6 @@ do_copy_texsubimage(struct gl_context *ctx, } void -radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border) -{ - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, texUnit, target); - struct gl_texture_image *texImage = - _mesa_select_tex_image(ctx, texObj, target, level); - int srcx, srcy, dstx, dsty; - - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_prepare_render(radeon); - - if (border) - goto fail; - - /* Setup or redefine the texture object, mipmap tree and texture - * image. Don't populate yet. - */ - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, - GL_RGBA, GL_UNSIGNED_BYTE, NULL, - &ctx->DefaultPacking, texObj, texImage); - - srcx = x; - srcy = y; - dstx = 0; - dsty = 0; - if (!_mesa_clip_copytexsubimage(ctx, - &dstx, &dsty, - &srcx, &srcy, - &width, &height)) { - return; - } - - if (!do_copy_texsubimage(ctx, target, level, - radeon_tex_obj(texObj), (radeon_texture_image *)texImage, - 0, 0, x, y, width, height)) { - goto fail; - } - - return; - -fail: - radeon_print(RADEON_FALLBACKS, RADEON_NORMAL, - "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n", - _mesa_lookup_enum_by_nr(internalFormat), border); - - _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, - width, height, border); -} - -void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 9ba98e303a7..430309392a0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -648,18 +648,12 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_renderbuffer *rb; radeon_texture_image *rImage; radeonContextPtr radeon; - r100ContextPtr rmesa; struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, format; gl_format texFormat; - format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); - radeon = pDRICtx->driverPrivate; - rmesa = pDRICtx->driverPrivate; rfb = dPriv->driverPrivate; texUnit = _mesa_get_current_tex_unit(radeon->glCtx); @@ -1018,7 +1012,7 @@ static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit ) static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { const struct gl_texture_image *firstImage; - GLint log2Width, log2Height, log2Depth, texelBytes; + GLint log2Width, log2Height, texelBytes; if ( t->bo ) { return GL_TRUE; @@ -1033,7 +1027,6 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int log2Width = firstImage->WidthLog2; log2Height = firstImage->HeightLog2; - log2Depth = firstImage->DepthLog2; texelBytes = _mesa_get_format_bytes(firstImage->TexFormat); if (!t->image_override) { diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index ce0df32bfe4..ad7e4c146a4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -787,18 +787,6 @@ static void radeon_teximage( radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s %dd: texObj %p, texImage %p, face %d, level %d\n", __func__, dims, texObj, texImage, face, level); - { - struct radeon_bo *bo; - bo = !image->mt ? image->bo : image->mt->bo; - if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { - radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, - "%s Calling teximage for texture that is " - "queued for GPU processing.\n", - __func__); - radeon_firevertices(rmesa); - } - } - t->validated = GL_FALSE; @@ -820,6 +808,18 @@ static void radeon_teximage( } } + { + struct radeon_bo *bo; + bo = !image->mt ? image->bo : image->mt->bo; + if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) { + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, + "%s Calling teximage for texture that is " + "queued for GPU processing.\n", + __func__); + radeon_firevertices(rmesa); + } + } + /* Upload texture image; note that the spec allows pixels to be NULL */ if (compressed) { pixels = _mesa_validate_pbo_compressed_teximage( diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index 538a07fbba8..6fc06d967dd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -126,11 +126,6 @@ void radeonGetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint le struct gl_texture_object *texObj, struct gl_texture_image *texImage); -void radeonCopyTexImage2D(struct gl_context *ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - void radeonCopyTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, |