summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKeith Whitwell <[email protected]>2009-10-26 01:11:36 +0000
committerKeith Whitwell <[email protected]>2009-10-26 01:35:05 +0000
commit5a304995e09d8dbfd40a2dfab32eacb7e85798e3 (patch)
tree05d518939574519c466e3b34cfc2b1a1b8db2c3c /src
parent09c231f84a20a306a173b60c82484ce1f9331edf (diff)
i965g: still working on compilation
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/i965/brw_context.h15
-rw-r--r--src/gallium/drivers/i965/brw_gs.c2
-rw-r--r--src/gallium/drivers/i965/brw_swtnl.c144
-rw-r--r--src/gallium/drivers/i965/brw_urb.c5
-rw-r--r--src/gallium/drivers/i965/brw_vs.c31
-rw-r--r--src/gallium/drivers/i965/brw_vs.h14
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c733
-rw-r--r--src/gallium/drivers/i965/brw_wm_glsl.c4
-rw-r--r--src/gallium/drivers/i965/brw_wm_surface_state.c7
9 files changed, 485 insertions, 470 deletions
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 10c1cf6f33a..8aaf895d20a 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -154,6 +154,7 @@ struct brw_vertex_shader {
const struct tgsi_token *tokens;
struct tgsi_shader_info info;
+ unsigned id;
struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
};
@@ -165,6 +166,7 @@ struct brw_fragment_shader {
GLboolean isGLSL;
+ unsigned id;
struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */
GLboolean use_const_buffer;
};
@@ -280,10 +282,13 @@ struct brw_vs_prog_data {
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
- GLuint nr_outputs_written;
- GLuint nr_params; /**< number of float params/constants */
- GLuint inputs_read;
+ GLuint nr_outputs;
+ GLuint nr_inputs;
+
+ GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */
+
+ GLboolean copy_edgeflag;
/* Used for calculating urb partitions:
*/
@@ -475,8 +480,8 @@ struct brw_context
/* Active state from the state tracker:
*/
struct {
- const struct brw_vertex_shader *vertex_shader;
- const struct brw_fragment_shader *fragment_shader;
+ struct brw_vertex_shader *vertex_shader;
+ struct brw_fragment_shader *fragment_shader;
const struct brw_blend_state *blend;
const struct brw_rasterizer_state *rast;
const struct brw_depth_stencil_state *zstencil;
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 3ecaa74e4f9..693d8bfdf8d 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -148,7 +148,7 @@ static void populate_key( struct brw_context *brw,
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
- key->nr_attrs = brw->vs.prog_data->nr_outputs_written;
+ key->nr_attrs = brw->vs.prog_data->nr_outputs;
/* BRW_NEW_PRIMITIVE */
key->primitive = gs_prim[brw->primitive];
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
index d2df8af9f40..464013e7c40 100644
--- a/src/gallium/drivers/i965/brw_swtnl.c
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -1,111 +1,93 @@
-/* XXX: could split the primitive list to fallback only on the
- * non-conformant primitives.
- */
-static GLboolean check_fallbacks( struct brw_context *brw,
- const struct _mesa_prim *prim,
- GLuint nr_prims )
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+
+
+static GLboolean need_swtnl( struct brw_context *brw )
{
- GLuint i;
+ const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
/* If we don't require strict OpenGL conformance, never
* use fallbacks. If we're forcing fallbacks, always
* use fallfacks.
*/
if (brw->flags.no_swtnl)
- return GL_FALSE;
+ return FALSE;
if (brw->flags.force_swtnl)
- return GL_TRUE;
+ return TRUE;
- if (brw->curr.rast->tmpl.smooth_polys) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
- return GL_TRUE;
+ /* Exceeding hw limits on number of VS inputs?
+ */
+ if (brw->curr.num_vertex_elements == 0 ||
+ brw->curr.num_vertex_elements >= BRW_VEP_MAX) {
+ return TRUE;
}
- /* BRW hardware will do AA lines, but they are non-conformant it
- * seems. TBD whether we keep this fallback:
+ /* Position array with zero stride?
+ *
+ * XXX: position isn't always at zero...
+ * XXX: eliminate zero-stride arrays
*/
- if (ctx->Line.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_LINES)
- return GL_TRUE;
+ {
+ int ve0_vb = brw->curr.vertex_element[0].vertex_buffer_index;
+
+ if (brw->curr.vertex_buffer[ve0_vb].stride == 0)
+ return TRUE;
}
- /* Stipple -- these fallbacks could be resolved with a little
- * bit of work?
+ /* XXX: short-circuit
*/
- if (ctx->Line.StippleFlag) {
- for (i = 0; i < nr_prims; i++) {
- /* GS doesn't get enough information to know when to reset
- * the stipple counter?!?
- */
- if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
- return GL_TRUE;
-
- if (prim[i].mode == GL_POLYGON &&
- (ctx->Polygon.FrontMode == GL_LINE ||
- ctx->Polygon.BackMode == GL_LINE))
- return GL_TRUE;
- }
- }
+ return FALSE;
- if (ctx->Point.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (prim[i].mode == GL_POINTS)
- return GL_TRUE;
- }
+ if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) {
+ if (rast->poly_smooth)
+ return TRUE;
- /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
- * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
- * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
- * we want strict conformance, force the fallback.
- * Right now, we only do this for 2D textures.
- */
+ }
+
+ if (brw->reduced_primitive == PIPE_PRIM_LINES ||
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+ (rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
+ rast->fill_ccw == PIPE_POLYGON_MODE_LINE)))
{
- int u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
- if (texUnit->Enabled) {
- if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- }
- }
+ /* BRW hardware will do AA lines, but they are non-conformant it
+ * seems. TBD whether we keep this fallback:
+ */
+ if (rast->line_smooth)
+ return TRUE;
+
+ /* XXX: was a fallback in mesa (gs doesn't get enough
+ * information to know when to reset stipple counter), but there
+ * must be a way around it.
+ */
+ if (rast->line_stipple_enable &&
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES ||
+ brw->primitive == PIPE_PRIM_LINE_LOOP ||
+ brw->primitive == PIPE_PRIM_LINE_STRIP))
+ return TRUE;
}
- /* Exceeding hw limits on number of VS inputs?
- */
- if (brw->nr_ve == 0 ||
- brw->nr_ve >= BRW_VEP_MAX) {
- return TRUE;
+
+ if (brw->reduced_primitive == PIPE_PRIM_POINTS ||
+ (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+ (rast->fill_cw == PIPE_POLYGON_MODE_POINT ||
+ rast->fill_ccw == PIPE_POLYGON_MODE_POINT)))
+ {
+ if (rast->point_smooth)
+ return TRUE;
}
- /* Position array with zero stride?
+ /* BRW hardware doesn't handle CLAMP texturing correctly;
+ * brw_wm_sampler_state:translate_wrap_mode() treats CLAMP
+ * as CLAMP_TO_EDGE instead. If we're using CLAMP, and
+ * we want strict conformance, force the fallback.
+ *
+ * XXX: need a workaround for this.
*/
- if (brw->vs[brw->ve[0]]->stride == 0)
- return TRUE;
-
-
/* Nothing stopping us from the fast path now */
- return GL_FALSE;
+ return FALSE;
}
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index ff2466528df..57fd8f20b20 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -35,6 +35,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "brw_debug.h"
#define VS 0
#define GS 1
@@ -111,7 +112,7 @@ static GLboolean check_urb_layout( struct brw_context *brw )
/* Most minimal update, forces re-emit of URB fence packet after GS
* unit turned on/off.
*/
-static void recalculate_urb_fence( struct brw_context *brw )
+static int recalculate_urb_fence( struct brw_context *brw )
{
GLuint csize = brw->curbe.total_size;
GLuint vsize = brw->vs.prog_data->urb_entry_size;
@@ -204,6 +205,8 @@ done:
brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
}
+
+ return 0;
}
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 010ac115d36..3965ca6c54e 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -28,17 +28,19 @@
* Authors:
* Keith Whitwell <[email protected]>
*/
-
+
+#include "tgsi/tgsi_dump.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
#include "brw_state.h"
+#include "brw_pipe_rast.h"
static void do_vs_prog( struct brw_context *brw,
- struct brw_vertex_program *vp,
+ struct brw_vertex_shader *vp,
struct brw_vs_prog_key *key )
{
GLuint program_size;
@@ -51,16 +53,12 @@ static void do_vs_prog( struct brw_context *brw,
brw_init_compile(brw, &c.func);
c.vp = vp;
- c.prog_data.nr_outputs_written = vp->program.Base.OutputsWritten;
- c.prog_data.inputs_read = vp->program.Base.InputsRead;
-
- if (c.key.copy_edgeflag) {
- c.prog_data.nr_outputs_written |= 1<<VERT_RESULT_EDGE;
- c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
- }
+ c.prog_data.nr_outputs = vp->info.num_outputs;
+ c.prog_data.nr_inputs = vp->info.num_inputs;
+ c.prog_data.copy_edgeflag = c.key.copy_edgeflag;
if (0)
- tgsi_dump(&c.vp->tokens, 0);
+ tgsi_dump(c.vp->tokens, 0);
/* Emit GEN4 code.
*/
@@ -80,11 +78,10 @@ static void do_vs_prog( struct brw_context *brw,
}
-static void brw_upload_vs_prog(struct brw_context *brw)
+static int brw_upload_vs_prog(struct brw_context *brw)
{
struct brw_vs_prog_key key;
- struct brw_vertex_program *vp =
- (struct brw_vertex_program *)brw->vertex_program;
+ struct brw_vertex_shader *vp = brw->curr.vertex_shader;
memset(&key, 0, sizeof(key));
@@ -92,9 +89,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
* the inputs it asks for, whether they are varying or not.
*/
key.program_string_id = vp->id;
- key.nr_userclip = brw->nr_userclip;
- key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL ||
- brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL);
+ key.nr_userclip = brw->curr.ucp.nr;
+ key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
+ brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
/* Make an early check for the key.
*/
@@ -105,6 +102,8 @@ static void brw_upload_vs_prog(struct brw_context *brw)
&brw->vs.prog_data);
if (brw->vs.prog_bo == NULL)
do_vs_prog(brw, vp, &key);
+
+ return 0;
}
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index e33fa2f0aad..58119567dcf 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -46,17 +46,22 @@ struct brw_vs_prog_key {
};
+
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+
struct brw_vs_compile {
struct brw_compile func;
struct brw_vs_prog_key key;
struct brw_vs_prog_data prog_data;
- struct brw_vertex_program *vp;
+ struct brw_vertex_shader *vp;
GLuint nr_inputs;
+ GLuint nr_outputs;
+ GLboolean copy_edgeflag;
GLuint first_output;
- GLuint nr_outputs;
GLuint first_overflow_output; /**< VERT_ATTRIB_x */
GLuint first_tmp;
@@ -80,8 +85,13 @@ struct brw_vs_compile {
GLint index;
struct brw_reg reg;
} current_const[3];
+
+ struct brw_instruction *if_inst[MAX_IF_DEPTH];
+ struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+
};
+
void brw_vs_emit( struct brw_vs_compile *c );
#endif
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 04132a167bc..4daa98b29e4 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -28,11 +28,25 @@
* Authors:
* Keith Whitwell <[email protected]>
*/
-
#include "pipe/p_shader_tokens.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "tgsi/tgsi_ureg.h"
+
#include "brw_context.h"
#include "brw_vs.h"
+#include "brw_debug.h"
+
+
+struct ureg_instruction {
+ unsigned opcode:8;
+ unsigned tex_target:3;
+ struct ureg_dst dst;
+ struct ureg_src src[3];
+};
static struct brw_reg get_tmp( struct brw_vs_compile *c )
@@ -72,8 +86,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
* works if everything fits in the GRF.
* XXX this heuristic/check may need some fine tuning...
*/
- if (c->vp->program.Base.Parameters->NumParameters +
- c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ if (c->vp->info.file_max[TGSI_FILE_CONSTANT] +
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF)
c->vp->use_const_buffer = GL_TRUE;
else
c->vp->use_const_buffer = GL_FALSE;
@@ -106,25 +120,21 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
else {
/* use a section of the GRF for constants */
- GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+ GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
}
reg += (nr_params + 1) / 2;
c->prog_data.curb_read_length = reg - 1;
-
c->prog_data.nr_params = nr_params * 4;
}
/* Allocate input regs:
*/
- c->nr_inputs = 0;
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (c->prog_data.inputs_read & (1 << i)) {
- c->nr_inputs++;
- c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
+ c->nr_inputs = c->vp->info.num_inputs;
+ for (i = 0; i < c->nr_inputs; i++) {
+ c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
}
/* If there are no inputs, we'll still be reading one attribute's worth
@@ -144,45 +154,51 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
else
mrf = 4;
- for (i = 0; i < c->prog_data.nr_outputs_written; i++) {
- c->nr_outputs++;
- assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
- if (i == VERT_RESULT_HPOS) {
- c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ /* XXX: need to access vertex output semantics here:
+ */
+ c->nr_outputs = c->prog_data.nr_outputs;
+ for (i = 0; i < c->prog_data.nr_outputs; i++) {
+ assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+
+ /* XXX: Hardwire position to zero:
+ */
+ if (i == 0) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
}
- else if (i == VERT_RESULT_PSIZ) {
- c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ /* XXX: disable psiz:
+ */
+ else if (0) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
+ else if (mrf < 16) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
+ mrf++;
+ }
else {
- if (mrf < 16) {
- c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
- }
- else {
- /* too many vertex results to fit in MRF, use GRF for overflow */
- if (!c->first_overflow_output)
- c->first_overflow_output = i;
- c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
+ /* too many vertex results to fit in MRF, use GRF for overflow */
+ if (!c->first_overflow_output)
+ c->first_overflow_output = i;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ reg++;
}
}
/* Allocate program temporaries:
*/
- for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
- c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) {
+ c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
reg++;
}
/* Address reg(s). Don't try to use the internal address reg until
* deref time.
*/
- for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
- c->regs[PROGRAM_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
+ for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) {
+ c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
reg,
0,
BRW_REGISTER_TYPE_D,
@@ -243,8 +259,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->prog_data.total_grf = reg;
if (BRW_DEBUG & DEBUG_VS) {
- debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
- debug_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+ debug_printf("%s NumAddrRegs %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_ADDRESS]+1);
+ debug_printf("%s NumTemps %d\n", __FUNCTION__,
+ c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1);
debug_printf("%s reg = %d\n", __FUNCTION__, reg);
}
}
@@ -740,25 +758,25 @@ static void emit_nrm( struct brw_vs_compile *c,
static struct brw_reg
get_constant(struct brw_vs_compile *c,
- const struct prog_instruction *inst,
+ const struct ureg_instruction *inst,
GLuint argIndex)
{
- const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ const struct ureg_src src = inst->src[argIndex];
struct brw_compile *p = &c->func;
struct brw_reg const_reg;
struct brw_reg const2_reg;
- const GLboolean relAddr = src->RelAddr;
+ const GLboolean relAddr = src.Indirect;
assert(argIndex < 3);
- if (c->current_const[argIndex].index != src->Index || relAddr) {
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+ if (c->current_const[argIndex].index != src.Index || relAddr) {
+ struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
- c->current_const[argIndex].index = src->Index;
+ c->current_const[argIndex].index = src.Index;
#if 0
printf(" fetch const[%d] for arg %d into reg %d\n",
- src->Index, argIndex, c->current_const[argIndex].reg.nr);
+ src.Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
/* need to fetch the constant now */
brw_dp_READ_4_vs(p,
@@ -766,7 +784,7 @@ get_constant(struct brw_vs_compile *c,
0, /* oword */
relAddr, /* relative indexing? */
addrReg, /* address register */
- 16 * src->Index, /* byte offset */
+ 16 * src.Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
);
@@ -783,7 +801,7 @@ get_constant(struct brw_vs_compile *c,
1, /* oword */
relAddr, /* relative indexing? */
addrReg, /* address register */
- 16 * src->Index, /* byte offset */
+ 16 * src.Index, /* byte offset */
SURF_INDEX_VERT_CONST_BUFFER
);
}
@@ -813,30 +831,24 @@ get_constant(struct brw_vs_compile *c,
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
- gl_register_file file,
+ enum tgsi_file_type file,
GLuint index )
{
switch (file) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
+ case TGSI_FILE_CONSTANT:
assert(c->regs[file][index].nr != 0);
return c->regs[file][index];
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
- case PROGRAM_ADDRESS:
+
+ case TGSI_FILE_ADDRESS:
assert(index == 0);
return c->regs[file][index];
- case PROGRAM_UNDEFINED: /* undef values */
+ case TGSI_FILE_NULL: /* undef values */
return brw_null_reg();
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_WRITE_ONLY:
default:
assert(0);
return brw_null_reg();
@@ -853,7 +865,7 @@ static struct brw_reg deref( struct brw_vs_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0];
struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -886,17 +898,17 @@ static struct brw_reg deref( struct brw_vs_compile *c,
*/
static struct brw_reg
get_src_reg( struct brw_vs_compile *c,
- const struct prog_instruction *inst,
+ const struct ureg_instruction *inst,
GLuint argIndex )
{
- const GLuint file = inst->SrcReg[argIndex].File;
- const GLint index = inst->SrcReg[argIndex].Index;
- const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+ const GLuint file = inst->src[argIndex].File;
+ const GLint index = inst->src[argIndex].Index;
+ const GLboolean relAddr = inst->src[argIndex].Indirect;
switch (file) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_INPUT:
+ case TGSI_FILE_OUTPUT:
if (relAddr) {
return deref(c, c->regs[file][0], index);
}
@@ -905,30 +917,25 @@ get_src_reg( struct brw_vs_compile *c,
return c->regs[file][index];
}
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- case PROGRAM_ENV_PARAM:
+ case TGSI_FILE_CONSTANT:
if (c->vp->use_const_buffer) {
return get_constant(c, inst, argIndex);
}
else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
}
else {
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
+ assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+ return c->regs[TGSI_FILE_CONSTANT][index];
}
- case PROGRAM_ADDRESS:
+ case TGSI_FILE_ADDRESS:
assert(index == 0);
return c->regs[file][index];
- case PROGRAM_UNDEFINED:
+ case TGSI_FILE_NULL:
/* this is a normal case since we loop over all three src args */
return brw_null_reg();
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_WRITE_ONLY:
default:
assert(0);
return brw_null_reg();
@@ -959,27 +966,27 @@ static void emit_arl( struct brw_vs_compile *c,
* Return the brw reg for the given instruction's src argument.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- const struct prog_instruction *inst,
+ const struct ureg_instruction *inst,
GLuint argIndex )
{
- const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ const struct ureg_src src = inst->src[argIndex];
struct brw_reg reg;
- if (src->File == PROGRAM_UNDEFINED)
+ if (src.File == TGSI_FILE_NULL)
return brw_null_reg();
reg = get_src_reg(c, inst, argIndex);
/* Convert 3-bit swizzle to 2-bit.
*/
- reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
- GET_SWZ(src->Swizzle, 1),
- GET_SWZ(src->Swizzle, 2),
- GET_SWZ(src->Swizzle, 3));
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX,
+ src.SwizzleY,
+ src.SwizzleZ,
+ src.SwizzleW);
/* Note this is ok for non-swizzle instructions:
*/
- reg.negate = src->Negate ? 1 : 0;
+ reg.negate = src.Negate ? 1 : 0;
return reg;
}
@@ -989,21 +996,21 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
* Get brw register for the given program dest register.
*/
static struct brw_reg get_dst( struct brw_vs_compile *c,
- struct prog_dst_register dst )
+ struct ureg_dst dst )
{
struct brw_reg reg;
switch (dst.File) {
- case PROGRAM_TEMPORARY:
- case PROGRAM_OUTPUT:
+ case TGSI_FILE_TEMPORARY:
+ case TGSI_FILE_OUTPUT:
assert(c->regs[dst.File][dst.Index].nr != 0);
reg = c->regs[dst.File][dst.Index];
break;
- case PROGRAM_ADDRESS:
+ case TGSI_FILE_ADDRESS:
assert(dst.Index == 0);
reg = c->regs[dst.File][dst.Index];
break;
- case PROGRAM_UNDEFINED:
+ case TGSI_FILE_NULL:
/* we may hit this for OPCODE_END, OPCODE_KIL, etc */
reg = brw_null_reg();
break;
@@ -1027,15 +1034,16 @@ static void emit_vertex_write( struct brw_vs_compile *c)
{
struct brw_compile *p = &c->func;
struct brw_reg m0 = brw_message_reg(0);
- struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+ struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
GLuint len_vertext_header = 2;
if (c->key.copy_edgeflag) {
+ assert(0);
brw_MOV(p,
- get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
- get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
+ get_reg(c, TGSI_FILE_OUTPUT, 0),
+ get_reg(c, TGSI_FILE_INPUT, 0));
}
/* Build ndc coords */
@@ -1060,7 +1068,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
brw_set_access_mode(p, BRW_ALIGN_16);
if (c->prog_data.writes_psiz) {
- struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
+ struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ];
brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
}
@@ -1138,7 +1146,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
eot, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
-!
+
if (c->first_overflow_output > 0) {
/* Not all of the vertex outputs/results fit into the MRF.
* Move the overflowed attributes from the GRF to the MRF and
@@ -1148,9 +1156,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* at mrf[4] atm...
*/
GLuint i, mrf = 0;
- for (i = c->first_overflow_output; i < c->prog_data.nr_outputs_written; i++) {
+ for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
/* move from GRF to MRF */
- brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+ brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
mrf++;
}
@@ -1195,9 +1203,9 @@ post_vs_emit( struct brw_vs_compile *c,
}
static uint32_t
-get_predicate(const struct prog_instruction *inst)
+get_predicate(const struct ureg_instruction *inst)
{
- if (inst->DstReg.CondMask == COND_TR)
+ if (inst->dst.CondMask == COND_TR)
return BRW_PREDICATE_NONE;
/* All of GLSL only produces predicates for COND_NE and one channel per
@@ -1213,9 +1221,9 @@ get_predicate(const struct prog_instruction *inst)
* predicate on that. We can probably support this, but it won't
* necessarily be easy.
*/
- assert(inst->DstReg.CondMask == COND_NE);
+/* assert(inst->dst.CondMask == COND_NE); */
- switch (inst->DstReg.CondSwizzle) {
+ switch (inst->dst.CondSwizzle) {
case SWIZZLE_XXXX:
return BRW_PREDICATE_ALIGN16_REPLICATE_X;
case SWIZZLE_YYYY:
@@ -1225,26 +1233,281 @@ get_predicate(const struct prog_instruction *inst)
case SWIZZLE_WWWW:
return BRW_PREDICATE_ALIGN16_REPLICATE_W;
default:
- _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
- inst->DstReg.CondMask);
+ debug_printf("Unexpected predicate: 0x%08x\n",
+ inst->dst.CondMask);
return BRW_PREDICATE_NORMAL;
}
}
+static void emit_insn(struct brw_vs_compile *c,
+ const struct tgsi_full_instruction *insn)
+{
+ struct brw_reg args[3], dst;
+ GLuint i;
+
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* Get argument regs.
+ */
+ for (i = 0; i < 3; i++) {
+ const struct ureg_src src = inst->src[i];
+ index = src.Index;
+ file = src.File;
+ args[i] = get_arg(c, inst, i);
+ }
+
+ /* Get dest regs. Note that it is possible for a reg to be both
+ * dst and arg, given the static allocation of registers. So
+ * care needs to be taken emitting multi-operation instructions.
+ */
+ index = inst->dst.Index;
+ file = inst->dst.File;
+ dst = get_dst(c, inst->dst);
+
+ if (inst->SaturateMode != SATURATE_OFF) {
+ debug_printf("Unsupported saturate %d in vertex shader",
+ inst->SaturateMode);
+ }
+
+ switch (inst->Opcode) {
+ case TGSI_OPCODE_ABS:
+ brw_MOV(p, dst, brw_abs(args[0]));
+ break;
+ case TGSI_OPCODE_ADD:
+ brw_ADD(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_COS:
+ emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_DP3:
+ brw_DP3(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DP4:
+ brw_DP4(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_DPH:
+ brw_DPH(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_NRM3:
+ emit_nrm(c, dst, args[0], 3);
+ break;
+ case TGSI_OPCODE_NRM4:
+ emit_nrm(c, dst, args[0], 4);
+ break;
+ case TGSI_OPCODE_DST:
+ unalias2(c, dst, args[0], args[1], emit_dst_noalias);
+ break;
+ case TGSI_OPCODE_EXP:
+ unalias1(c, dst, args[0], emit_exp_noalias);
+ break;
+ case TGSI_OPCODE_EX2:
+ emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_ARL:
+ emit_arl(c, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ brw_RNDD(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_FRC:
+ brw_FRC(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ unalias1(c, dst, args[0], emit_log_noalias);
+ break;
+ case TGSI_OPCODE_LG2:
+ emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_LIT:
+ unalias1(c, dst, args[0], emit_lit_noalias);
+ break;
+ case TGSI_OPCODE_LRP:
+ unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+ break;
+ case TGSI_OPCODE_MAD:
+ brw_MOV(p, brw_acc_reg(), args[2]);
+ brw_MAC(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MAX:
+ emit_max(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MIN:
+ emit_min(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_MOV:
+ brw_MOV(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_MUL:
+ brw_MUL(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_POW:
+ emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RCP:
+ emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_RSQ:
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SEQ:
+ emit_seq(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SIN:
+ emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+ break;
+ case TGSI_OPCODE_SNE:
+ emit_sne(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGE:
+ emit_sge(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SGT:
+ emit_sgt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLT:
+ emit_slt(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SLE:
+ emit_sle(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_SUB:
+ brw_ADD(p, dst, args[0], negate(args[1]));
+ break;
+ case TGSI_OPCODE_TRUNC:
+ /* round toward zero */
+ brw_RNDZ(p, dst, args[0]);
+ break;
+ case TGSI_OPCODE_XPD:
+ emit_xpd(p, dst, args[0], args[1]);
+ break;
+ case TGSI_OPCODE_IF:
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+ /* Note that brw_IF smashes the predicate_control field. */
+ if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+ if_depth++;
+ break;
+ case TGSI_OPCODE_ELSE:
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+ break;
+ case TGSI_OPCODE_ENDIF:
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
+ break;
+ case TGSI_OPCODE_BGNLOOP:
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ break;
+ case TGSI_OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_BREAK(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_CONT(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ loop_depth--;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == TGSI_OPCODE_BRK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ else if (inst0->header.opcode == TGSI_OPCODE_CONT) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ inst0->bits3.if_else.pop_count = 0;
+ }
+ }
+ }
+ break;
+ case TGSI_OPCODE_BRA:
+ brw_set_predicate_control(p, get_predicate(inst));
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case TGSI_OPCODE_CAL:
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(4));
+ brw_save_call(p, inst->Comment, p->nr_insn);
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_RET:
+ brw_ADD(p, get_addr_reg(stack_index),
+ get_addr_reg(stack_index), brw_imm_d(-4));
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+ brw_set_access_mode(p, BRW_ALIGN_16);
+ break;
+ case TGSI_OPCODE_END:
+ end_offset = p->nr_insn;
+ /* this instruction will get patched later to jump past subroutine
+ * code, etc.
+ */
+ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+ break;
+ case TGSI_OPCODE_PRINT:
+ /* no-op */
+ break;
+ case TGSI_OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+ default:
+ debug_printf("Unsupported opcode %i (%s) in vertex shader",
+ inst->Opcode, inst->Opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->Opcode) :
+ "unknown");
+ }
+
+ /* Set the predication update on the last instruction of the native
+ * instruction sequence.
+ *
+ * This would be problematic if it was set on a math instruction,
+ * but that shouldn't be the case with the current GLSL compiler.
+ */
+ if (inst->CondUpdate) {
+ struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+ assert(hw_insn->header.destreg__conditionalmod == 0);
+ hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+ }
+
+ release_tmps(c);
+}
+
+
/* Emit the vertex program instructions here.
*/
void brw_vs_emit(struct brw_vs_compile *c )
{
-#define MAX_IF_DEPTH 32
-#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
- const GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
struct brw_instruction *end_inst, *last_inst;
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
const struct brw_indirect stack_index = brw_indirect(0, 0);
+ struct tgsi_parse_context parse;
+ struct tgsi_full_declaration *decl;
GLuint index;
GLuint file;
@@ -1264,258 +1527,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
for (insn = 0; insn < nr_insns; insn++) {
- const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
- struct brw_reg args[3], dst;
- GLuint i;
+ const struct ureg_instruction *inst = &c->vp->program.Base.Instructions[insn];
-#if 0
- printf("%d: ", insn);
- _mesa_print_instruction(inst);
-#endif
-
- /* Get argument regs.
- */
- for (i = 0; i < 3; i++) {
- const struct prog_src_register *src = &inst->SrcReg[i];
- index = src->Index;
- file = src->File;
- args[i] = get_arg(c, inst, i);
- }
-
- /* Get dest regs. Note that it is possible for a reg to be both
- * dst and arg, given the static allocation of registers. So
- * care needs to be taken emitting multi-operation instructions.
- */
- index = inst->DstReg.Index;
- file = inst->DstReg.File;
- dst = get_dst(c, inst->DstReg);
-
- if (inst->SaturateMode != SATURATE_OFF) {
- _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
- inst->SaturateMode);
- }
-
- switch (inst->Opcode) {
- case TGSI_OPCODE_ABS:
- brw_MOV(p, dst, brw_abs(args[0]));
- break;
- case TGSI_OPCODE_ADD:
- brw_ADD(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_COS:
- emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_DP3:
- brw_DP3(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_DP4:
- brw_DP4(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_DPH:
- brw_DPH(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_NRM3:
- emit_nrm(c, dst, args[0], 3);
- break;
- case TGSI_OPCODE_NRM4:
- emit_nrm(c, dst, args[0], 4);
- break;
- case TGSI_OPCODE_DST:
- unalias2(c, dst, args[0], args[1], emit_dst_noalias);
- break;
- case TGSI_OPCODE_EXP:
- unalias1(c, dst, args[0], emit_exp_noalias);
- break;
- case TGSI_OPCODE_EX2:
- emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_ARL:
- emit_arl(c, dst, args[0]);
- break;
- case TGSI_OPCODE_FLR:
- brw_RNDD(p, dst, args[0]);
- break;
- case TGSI_OPCODE_FRC:
- brw_FRC(p, dst, args[0]);
- break;
- case TGSI_OPCODE_LOG:
- unalias1(c, dst, args[0], emit_log_noalias);
- break;
- case TGSI_OPCODE_LG2:
- emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_LIT:
- unalias1(c, dst, args[0], emit_lit_noalias);
- break;
- case TGSI_OPCODE_LRP:
- unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
- break;
- case TGSI_OPCODE_MAD:
- brw_MOV(p, brw_acc_reg(), args[2]);
- brw_MAC(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_MAX:
- emit_max(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_MIN:
- emit_min(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_MOV:
- brw_MOV(p, dst, args[0]);
- break;
- case TGSI_OPCODE_MUL:
- brw_MUL(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_POW:
- emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_RCP:
- emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_SEQ:
- emit_seq(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_SIN:
- emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
- break;
- case TGSI_OPCODE_SNE:
- emit_sne(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_SGE:
- emit_sge(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_SGT:
- emit_sgt(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_SLT:
- emit_slt(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_SLE:
- emit_sle(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_SUB:
- brw_ADD(p, dst, args[0], negate(args[1]));
- break;
- case TGSI_OPCODE_TRUNC:
- /* round toward zero */
- brw_RNDZ(p, dst, args[0]);
- break;
- case TGSI_OPCODE_XPD:
- emit_xpd(p, dst, args[0], args[1]);
- break;
- case TGSI_OPCODE_IF:
- assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
- /* Note that brw_IF smashes the predicate_control field. */
- if_inst[if_depth]->header.predicate_control = get_predicate(inst);
- if_depth++;
- break;
- case TGSI_OPCODE_ELSE:
- if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
- break;
- case TGSI_OPCODE_ENDIF:
- assert(if_depth > 0);
- brw_ENDIF(p, if_inst[--if_depth]);
- break;
- case TGSI_OPCODE_BGNLOOP:
- loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
- break;
- case TGSI_OPCODE_BRK:
- brw_set_predicate_control(p, get_predicate(inst));
- brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case TGSI_OPCODE_CONT:
- brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case TGSI_OPCODE_ENDLOOP:
- {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- loop_depth--;
-
- if (BRW_IS_IGDNG(brw))
- br = 2;
-
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- inst0->bits3.if_else.pop_count = 0;
- }
- else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- inst0->bits3.if_else.pop_count = 0;
- }
- }
- }
- break;
- case TGSI_OPCODE_BRA:
- brw_set_predicate_control(p, get_predicate(inst));
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case TGSI_OPCODE_CAL:
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(4));
- brw_save_call(p, inst->Comment, p->nr_insn);
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
- case TGSI_OPCODE_RET:
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(-4));
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
- break;
- case TGSI_OPCODE_END:
- end_offset = p->nr_insn;
- /* this instruction will get patched later to jump past subroutine
- * code, etc.
- */
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- break;
- case TGSI_OPCODE_PRINT:
- /* no-op */
- break;
- case TGSI_OPCODE_BGNSUB:
- brw_save_label(p, inst->Comment, p->nr_insn);
- break;
- case TGSI_OPCODE_ENDSUB:
- /* no-op */
- break;
- default:
- _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
- inst->Opcode, inst->Opcode < MAX_OPCODE ?
- _mesa_opcode_string(inst->Opcode) :
- "unknown");
- }
-
- /* Set the predication update on the last instruction of the native
- * instruction sequence.
- *
- * This would be problematic if it was set on a math instruction,
- * but that shouldn't be the case with the current GLSL compiler.
- */
- if (inst->CondUpdate) {
- struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
-
- assert(hw_insn->header.destreg__conditionalmod == 0);
- hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
- }
-
- release_tmps(c);
}
end_inst = &p->store[end_offset];
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 3118e615f91..23f7ba16fd0 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -167,8 +167,8 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
case PROGRAM_PAYLOAD:
break;
default:
- _mesa_problem(NULL, "Unexpected file in get_reg()");
- return brw_null_reg();
+ debug_printf("Unexpected file in get_reg()");
+ return brw_null_reg();
}
assert(index < 256);
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index e1ed6438dca..7157feb6f39 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -516,8 +516,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
break;
default:
- _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
- irb->texformat->MesaFormat);
+ debug_printf("Bad renderbuffer format: %d\n",
+ irb->texformat->MesaFormat);
+ assert(0);
+ key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ return;
}
key.tiling = region->tiling;
if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {