diff options
Diffstat (limited to 'src/mesa/drivers')
100 files changed, 2601 insertions, 2588 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index ba8be125718..cdb2500f7c2 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -266,13 +266,16 @@ struct gen_mipmap_state GLuint FBO; }; - +#define MAX_META_OPS_DEPTH 2 /** * All per-context meta state. */ struct gl_meta_state { - struct save_state Save; /**< state saved during meta-ops */ + /** Stack of state saved during meta-ops */ + struct save_state Save[MAX_META_OPS_DEPTH]; + /** Save stack depth */ + GLuint SaveStackDepth; struct temp_texture TempTex; @@ -324,8 +327,13 @@ _mesa_meta_free(struct gl_context *ctx) static void _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) { - struct save_state *save = &ctx->Meta->Save; + struct save_state *save; + + /* hope MAX_META_OPS_DEPTH is large enough */ + assert(ctx->Meta->SaveStackDepth < MAX_META_OPS_DEPTH); + save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth++]; + memset(save, 0, sizeof(*save)); save->SavedState = state; if (state & META_ALPHA_TEST) { @@ -575,7 +583,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) static void _mesa_meta_end(struct gl_context *ctx) { - struct save_state *save = &ctx->Meta->Save; + struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth]; const GLbitfield state = save->SavedState; if (state & META_ALPHA_TEST) { @@ -1398,6 +1406,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) struct vertex verts[4]; /* save all state but scissor, pixel pack/unpack */ GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; if (buffers & BUFFER_BITS_COLOR) { /* if clearing color buffers, don't save/restore colormask */ @@ -1453,7 +1462,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, GL_REPLACE, GL_REPLACE, GL_REPLACE); _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, - ctx->Stencil.Clear & 0x7fffffff, + ctx->Stencil.Clear & stencilMax, ctx->Stencil.WriteMask[0]); } else { diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f943f81dd05..f32f3cf6020 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -176,6 +176,7 @@ i915CreateContext(int api, ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE; + ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE; ctx->Const.MaxDrawBuffers = 1; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index c00ee415b6b..7a9fb7f088b 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p) if (inst->DstReg.CondMask == COND_TR) { tmp = i915_get_utemp(p); + /* The KIL instruction discards the fragment if any component of + * the source is < 0. Emit an immediate operand of {-1}.xywz. + */ i915_emit_texld(p, get_live_regs(p, inst), tmp, A0_DEST_CHANNEL_ALL, 0, /* use a dummy dest reg */ - swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + negate(swizzle(tmp, ONE, ONE, ONE, ONE), + 1, 1, 1, 1), T0_TEXKILL); } else { p->error = 1; diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index e3ca863fe51..7c3ac0c14ef 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -81,7 +81,6 @@ DRIVER_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ - brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index a8369b07c35..d3a1233aac0 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -232,3 +232,28 @@ const struct brw_tracked_state brw_cc_unit = { .prepare = prepare_cc_unit, .emit = upload_cc_unit, }; + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; + bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; + bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; + bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_blend_constant_color +}; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index cb0a8b96c9c..28549f2574a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api, (i == MESA_SHADER_FRAGMENT); ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = (i == MESA_SHADER_FRAGMENT); - - if (intel->gen == 6) - ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX); } ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 335339515a2..7069724466a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -171,7 +171,6 @@ struct brw_vertex_program { struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -211,6 +210,7 @@ struct brw_wm_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; GLboolean error; + int dispatch_width; /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 7b823eb201b..877b22fec19 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (vp->use_const_buffer) { - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - } - } - } else { - for (i = 0; i < nr; i++) { - memcpy(buf + offset + i * 4, + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 239586a0366..7f3e4986808 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -462,6 +462,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -1022,6 +1029,13 @@ # define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 # define ATTRIBUTE_0_SWIZZLE_SHIFT 6 # define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + /* DW16: Point sprite texture coordinate enables */ /* DW17: Constant interpolation enables */ /* DW18: attr 0-7 wrap shortest enables */ diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 962c04128b8..6b61f7af15d 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) err |= dest (file, inst); } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF)) { + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { format (file, " %d", inst->bits1.branch_gen6.jump_count); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 2ff39e8e64a..3b5c4c071e3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) { - p->current->header.compression_control = compression_control; + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->brw->intel.gen >= 6) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } } void brw_set_mask_control( struct brw_compile *p, GLuint value ) @@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p ) { assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; p->current++; } @@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p ) { assert(p->current != p->stack); p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; } @@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) p->brw = brw; p->nr_insn = 0; p->current = p->stack; + p->compressed = false; memset(p->current, 0, sizeof(p->current[0])); /* Some defaults? diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index b4538e6e8a7..4dbdc522100 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -33,6 +33,7 @@ #ifndef BRW_EU_H #define BRW_EU_H +#include <stdbool.h> #include "brw_structs.h" #include "brw_defines.h" #include "program/prog_instruction.h" @@ -106,10 +107,12 @@ struct brw_compile { /* Allow clients to push/pop instruction state: */ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; struct brw_instruction *current; GLuint flag_value; GLboolean single_program_flow; + bool compressed; struct brw_context *brw; struct brw_glsl_label *first_label; /**< linked list of labels */ @@ -954,6 +957,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); /* Forward jumps: */ @@ -1009,6 +1014,7 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); +void brw_set_uip_jip(struct brw_compile *p); /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb941dacfd..9c764fe779d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -41,19 +41,20 @@ * Internal helper for constructing instructions */ -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) { - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + if (reg.width == BRW_WIDTH_8 && p->compressed) insn->header.execution_size = BRW_EXECUTE_16; else insn->header.execution_size = reg.width; /* note - definitions are compatible */ } -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) +static void brw_set_dest(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn, /* NEW: Set the execution size based on dest.width and * insn->compression_control: */ - guess_execution_size(insn, dest); + guess_execution_size(p, insn, dest); } extern int reg_type_size[]; @@ -629,7 +630,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); return insn; } @@ -641,7 +642,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); return insn; @@ -680,7 +681,7 @@ void brw_##OP(struct brw_compile *p, \ { \ struct brw_instruction *rnd, *add; \ rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(rnd, dest); \ + brw_set_dest(p, rnd, dest); \ brw_set_src0(rnd, src); \ rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ \ @@ -779,7 +780,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p, void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_ud(0x0)); } @@ -840,11 +841,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -870,7 +871,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional, insn = next_insn(p, BRW_OPCODE_IF); - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->header.execution_size = BRW_EXECUTE_8; insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, src0); @@ -905,11 +906,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -965,11 +966,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); if (intel->gen < 6) { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } @@ -1029,16 +1030,44 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); + if (intel->gen >= 6) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + int br = 2; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); + + insn->bits3.break_cont.uip = br * (do_insn - insn); + insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1046,7 +1075,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -1058,17 +1087,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_null_reg()); + brw_set_dest(p, insn, brw_null_reg()); brw_set_src0(insn, brw_null_reg()); brw_set_src1(insn, brw_null_reg()); @@ -1094,34 +1139,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 6) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + assert(insn->header.execution_size == BRW_EXECUTE_8); + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; + assert(do_insn->header.opcode == BRW_OPCODE_DO); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0)); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } @@ -1159,7 +1212,7 @@ void brw_CMP(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); insn->header.destreg__conditionalmod = conditional; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -1184,7 +1237,7 @@ void brw_WAIT (struct brw_compile *p) struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); struct brw_reg src = brw_notification_1_reg(); - brw_set_dest(insn, src); + brw_set_dest(p, insn, src); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); insn->header.execution_size = 0; /* must */ @@ -1219,6 +1272,10 @@ void brw_math( struct brw_compile *p, assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { assert(src.type == BRW_REGISTER_TYPE_F); @@ -1228,8 +1285,9 @@ void brw_math( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); } else { @@ -1242,7 +1300,7 @@ void brw_math( struct brw_compile *p, insn->header.predicate_control = 0; insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1284,12 +1342,18 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); } @@ -1318,8 +1382,13 @@ void brw_math_16( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); return; @@ -1334,7 +1403,7 @@ void brw_math_16( struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1351,7 +1420,7 @@ void brw_math_16( struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; - brw_set_dest(insn, offset(dest,1)); + brw_set_dest(p, insn, offset(dest,1)); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1446,7 +1515,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, send_commit_msg = 1; } - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_write_message(p->brw, @@ -1516,7 +1585,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = mrf.nr; - brw_set_dest(insn, dest); /* UW? */ + brw_set_dest(p, insn, dest); /* UW? */ brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1569,7 +1638,7 @@ void brw_oword_block_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); if (intel->gen >= 6) { brw_set_src0(insn, mrf); } else { @@ -1614,7 +1683,7 @@ void brw_dword_scattered_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1639,29 +1708,21 @@ void brw_dp_READ_4_vs(struct brw_compile *p, GLuint location, GLuint bind_table_index) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_reg_nr = 1; - struct brw_reg b; - /* - printf("vs const read msg, location %u, msg_reg_nr %d\n", - location, msg_reg_nr); - */ + if (intel->gen >= 6) + location /= 16; /* Setup MRF[1] with location/offset into const buffer */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); - + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -1671,8 +1732,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(insn, brw_null_reg()); + } brw_set_dp_read_message(p->brw, insn, @@ -1706,7 +1771,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* M1.0 is block offset 0, M1.4 is block offset 1, all other * fields ignored. */ - brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); @@ -1717,7 +1782,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, insn->header.destreg__conditionalmod = 0; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_vec8_grf(0, 0)); if (intel->gen == 6) @@ -1782,7 +1847,7 @@ void brw_fb_WRITE(struct brw_compile *p, else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, @@ -1860,7 +1925,7 @@ void brw_SAMPLE(struct brw_compile *p, struct brw_reg m1 = brw_message_reg(msg_reg_nr); - guess_execution_size(p->current, dest); + guess_execution_size(p, p->current, dest); if (p->current->header.execution_size == BRW_EXECUTE_16) dispatch_16 = GL_TRUE; @@ -1895,12 +1960,15 @@ void brw_SAMPLE(struct brw_compile *p, * and the first message register index comes from src0. */ if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - /* m1 contains header? */ - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); + if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0); + brw_pop_insn_state(p); + } + src0 = brw_message_reg(msg_reg_nr); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -1909,7 +1977,7 @@ void brw_SAMPLE(struct brw_compile *p, if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, @@ -1970,7 +2038,7 @@ void brw_urb_WRITE(struct brw_compile *p, assert(msg_length < BRW_MAX_MRF); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1989,6 +2057,80 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + if (ip + insn->bits1.branch_gen6.jump_count / br < start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); + break; + case BRW_OPCODE_CONTINUE: + /* JIP is set at CONTINUE emit time, since that's when we + * know where the start of the loop is. + */ + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -2013,7 +2155,7 @@ void brw_ff_sync(struct brw_compile *p, } insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index edb02fabb23..c3cbe0df618 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -600,8 +600,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) * might be able to do better by doing execsize = 1 math and then * expanding that result out, but we would need to be careful with * masking. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && src.file == UNIFORM) { + if (intel->gen >= 6 && (src.file == UNIFORM || + src.abs || + src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); src = expanded; @@ -933,6 +938,10 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by lower_noise"); break; + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + case ir_unop_sqrt: emit_math(FS_OPCODE_SQRT, this->result, op[0]); break; @@ -1423,28 +1432,70 @@ fs_visitor::visit(ir_discard *ir) void fs_visitor::visit(ir_constant *ir) { - fs_reg reg(this, ir->type); - this->result = reg; + /* Set this->result to reg at the bottom of the function because some code + * paths will cause this visitor to be applied to other fields. This will + * cause the value stored in this->result to be modified. + * + * Make reg constant so that it doesn't get accidentally modified along the + * way. Yes, I actually had this problem. :( + */ + const fs_reg reg(this, ir->type); + fs_reg dst_reg = reg; - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); - break; - default: - assert(!"Non-float/uint/int/bool constant"); + if (ir->type->is_array()) { + const unsigned size = type_size(ir->type->fields.array); + + for (unsigned i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else if (ir->type->is_record()) { + foreach_list(node, &ir->components) { + ir_instruction *const field = (ir_instruction *) node; + const unsigned size = type_size(field->type); + + field->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else { + const unsigned size = type_size(ir->type); + + for (unsigned i = 0; i < size; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); + break; + case GLSL_TYPE_UINT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); + break; + case GLSL_TYPE_INT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); + break; + case GLSL_TYPE_BOOL: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + dst_reg.reg_offset++; } - reg.reg_offset++; } + + this->result = reg; } void @@ -1574,7 +1625,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); + inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); inst->conditional_mod = BRW_CONDITIONAL_Z; return; @@ -1951,7 +2002,7 @@ fs_visitor::emit_interpolation_setup_gen6() emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); this->current_annotation = "compute 1/pos.w"; - this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); + this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->pixel_w = fs_reg(this, glsl_type::float_type); emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); @@ -1979,17 +2030,17 @@ fs_visitor::emit_fb_writes() nr += 2; } - if (c->key.aa_dest_stencil_reg) { + if (c->aa_dest_stencil_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); } /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4; - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { + if (c->source_depth_to_render_target) { + if (c->computes_depth) { /* Hand over gl_FragDepth. */ assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); @@ -1998,20 +2049,22 @@ fs_visitor::emit_fb_writes() } else { /* Pass through the payload depth. */ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); } } - if (c->key.dest_depth_reg) { + if (c->dest_depth_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); } fs_reg color = reg_undef; if (this->frag_color) color = *(variable_storage(this->frag_color)); - else if (this->frag_data) + else if (this->frag_data) { color = *(variable_storage(this->frag_data)); + color.type = BRW_REGISTER_TYPE_F; + } for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = talloc_asprintf(this->mem_ctx, @@ -2452,7 +2505,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) void fs_visitor::assign_curb_setup() { - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.first_curbe_grf = c->nr_payload_regs; c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ @@ -3227,6 +3280,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3241,6 +3295,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3373,10 +3431,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: - /* FINISHME: We need to write the loop instruction support still. */ - if (intel->gen >= 6) - this->fail = true; - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3386,7 +3440,11 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; @@ -3400,16 +3458,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3486,6 +3546,26 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 3b7b03a05b8..20bfa4c3ea3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: + case ir_unop_sin_reduced: + case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: for (i = 0; i < vector_elements; i++) { @@ -328,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_noise: assert(!"noise should have been broken down to function call"); break; + case ir_quadop_vector: + assert(!"should have been lowered"); + break; } ir->remove(); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index b0c76f4094d..73b41fdbcef 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -166,6 +166,9 @@ static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + int prim_gs_always; + memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -185,10 +188,14 @@ static void populate_key( struct brw_context *brw, key->pv_first = GL_TRUE; } - key->need_gs_prog = (key->hint_gs_always || - brw->primitive == GL_QUADS || + if (intel->gen == 6) + prim_gs_always = brw->primitive == GL_LINE_LOOP; + else + prim_gs_always = brw->primitive == GL_QUADS || brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP); + brw->primitive == GL_LINE_LOOP; + + key->need_gs_prog = (key->hint_gs_always || prim_gs_always); } /* Calculate interpolants for triangle and line rasterization. @@ -205,8 +212,10 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } + drm_intel_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = NULL; + if (brw->gs.prog_active) { - drm_intel_bo_unreference(brw->gs.prog_bo); brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1d350bc0413..a91b0528fac 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -38,40 +38,6 @@ #include "brw_state.h" #include "brw_defines.h" - - - - -/*********************************************************************** - * Blend color - */ - -static void upload_blend_constant_color(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->intel.ctx; - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; - bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; - bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; - bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); -} - - -const struct brw_tracked_state brw_blend_constant_color = { - .dirty = { - .mesa = _NEW_COLOR, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .emit = upload_blend_constant_color -}; - /* Constant single cliprect for framebuffer object or DRI2 drawing */ static void upload_drawing_rect(struct brw_context *brw) { @@ -339,6 +305,9 @@ static void upload_polygon_stipple(struct brw_context *brw) struct brw_polygon_stipple bps; GLuint i; + if (!ctx->Polygon.StippleFlag) + return; + memset(&bps, 0, sizeof(bps)); bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; @@ -381,6 +350,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_polygon_stipple_offset bpso; + if (!ctx->Polygon.StippleFlag) + return; + memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; @@ -409,7 +381,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { - .mesa = _NEW_WINDOW_POS, + .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -421,9 +393,10 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { */ static void upload_aa_line_parameters(struct brw_context *brw) { + struct gl_context *ctx = &brw->intel.ctx; struct brw_aa_line_parameters balp; - if (!brw->has_aa_line_parameters) + if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; /* use legacy aa line coverage computation */ @@ -436,7 +409,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) const struct brw_tracked_state brw_aa_line_parameters = { .dirty = { - .mesa = 0, + .mesa = _NEW_LINE, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -454,6 +427,9 @@ static void upload_line_stipple(struct brw_context *brw) GLfloat tmp; GLint tmpi; + if (!ctx->Line.StippleFlag) + return; + memset(&bls, 0, sizeof(bls)); bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; bls.header.length = sizeof(bls)/4 - 2; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 1367d814696..94efa791091 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); /* Don't reject fragment shaders for their Mesa IR state when we're * using the new FS backend. diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 338f3876b31..eba4411ca70 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] = &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ - &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_wm_constants, /* Before wm_state */ &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ce9af9c4fe..461f27048cc 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1064,6 +1064,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -1169,7 +1178,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1539,6 +1553,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4a41c7a5176..6ae75d22c14 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw, (void) ctx; aux_size = sizeof(c.prog_data); - if (c.vp->use_const_buffer) - aux_size += c.vp->program.Base.Parameters->NumParameters; + /* constant_map */ + aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, @@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POINT */ if (ctx->Point.PointSprite) { @@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9338a6b7dbf..0b88cc1ec76 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -44,6 +44,7 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; + GLuint two_side_color: 1; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7e43324a1f9..09887dae95d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, mrf, j; int attributes_in_vue; int first_reladdr_output; + int max_constant; + int constant = 0; + int vert_result_reoder[VERT_RESULT_MAX]; + int bfc = 0; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } - /* Vertex program parameters from curbe: + /* Assign some (probably all) of the vertex program constants to + * the push constant buffer/CURBE. + * + * There's an obvious limit to the numer of push constants equal to + * the number of register available, and that number is smaller + * than the minimum maximum number of vertex program parameters, so + * support for pull constants is required if we overflow. + * Additionally, on gen6 the number of push constants is even + * lower. + * + * When there's relative addressing, we don't know what range of + * Mesa IR registers can be accessed. And generally, when relative + * addressing is used we also have too many constants to load them + * all as push constants. So, we'll just support relative + * addressing out of the pull constant buffers, and try to load as + * many statically-accessed constants into the push constant buffer + * as we can. */ - if (c->vp->use_const_buffer) { - int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; - int constant = 0; - - /* We've got more constants than we can load with the push - * mechanism. This is often correlated with reladdr loads where - * we should probably be using a pull mechanism anyway to avoid - * excessive reading. However, the pull mechanism is slow in - * general. So, we try to allocate as many non-reladdr-loaded - * constants through the push buffer as we can before giving up. - */ - memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); - for (i = 0; - i < c->vp->program.Base.NumInstructions && constant < max_constant; - i++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; - int arg; - - for (arg = 0; arg < 3 && constant < max_constant; arg++) { - if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && - inst->SrcReg[arg].File != PROGRAM_CONSTANT && - inst->SrcReg[arg].File != PROGRAM_UNIFORM && - inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && - inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || - inst->SrcReg[arg].RelAddr) - continue; - - if (c->constant_map[inst->SrcReg[arg].Index] == -1) { - c->constant_map[inst->SrcReg[arg].Index] = constant++; - } + if (intel->gen >= 6) { + /* We can only load 32 regs of push constants. */ + max_constant = 32 * 2 - c->key.nr_userclip; + } else { + max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + } + + /* constant_map maps from ParameterValues[] index to index in the + * push constant buffer, or -1 if it's only in the pull constant + * buffer. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) { + continue; } - } - for (i = 0; i < constant; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, - (i%2) * 4), - 0, 4, 1); + if (inst->SrcReg[arg].RelAddr) { + c->vp->use_const_buffer = GL_TRUE; + continue; + } + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } } - reg += (constant + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - /* XXX 0 causes a bug elsewhere... */ - c->prog_data.nr_params = MAX2(constant * 4, 4); } - else { - /* use a section of the GRF for constants */ - GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; + /* If we ran out of push constant space, then we'll also upload all + * constants through the pull constant buffer so that they can be + * accessed no matter what. For relative addressing (the common + * case) we need them all in place anyway. + */ + if (constant == max_constant) + c->vp->use_const_buffer = GL_TRUE; + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), + 0, 4, 1); } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = constant * 4; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; /* Allocate input regs: */ @@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; first_reladdr_output = get_first_reladdr_output(&c->vp->program); - for (i = 0; i < VERT_RESULT_MAX; i++) { + + for (i = 0; i < VERT_RESULT_MAX; i++) + vert_result_reoder[i] = i; + + /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */ + if (intel->gen >= 6 && c->key.two_side_color) { + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + + if (bfc) { + for (i = 0; i < bfc; i++) { + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i; + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i; + } + + for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) { + vert_result_reoder[i] = i - bfc; + } + } + } + + for (j = 0; j < VERT_RESULT_MAX; j++) { + i = vert_result_reoder[j]; + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); @@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) else if (i == VERT_RESULT_PSIZ) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { /* Two restrictions on our compute-to-MRF here. The @@ -574,9 +625,18 @@ static void emit_max( struct brw_compile *p, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_SEL(p, dst, arg0, arg1); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } else { + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } } static void emit_min( struct brw_compile *p, @@ -584,9 +644,18 @@ static void emit_min( struct brw_compile *p, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_SEL(p, dst, arg0, arg1); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } else { + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } } static void emit_math1_gen4(struct brw_vs_compile *c, @@ -680,7 +749,7 @@ emit_math1(struct brw_vs_compile *c, emit_math1_gen4(c, function, dst, arg0, precision); } -static void emit_math2( struct brw_vs_compile *c, +static void emit_math2_gen4( struct brw_vs_compile *c, GLuint function, struct brw_reg dst, struct brw_reg arg0, @@ -688,14 +757,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; GLboolean need_tmp = GL_FALSE; - if (dst.file != BRW_GENERAL_REGISTER_FILE) - need_tmp = GL_TRUE; - - if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf) + if (dst.file != BRW_GENERAL_REGISTER_FILE || + dst.dw1.bits.writemask != 0xf) need_tmp = GL_TRUE; if (need_tmp) @@ -718,6 +784,53 @@ static void emit_math2( struct brw_vs_compile *c, } } +static void emit_math2_gen6( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_src0, tmp_src1, tmp_dst; + + tmp_src0 = get_tmp(c); + tmp_src1 = get_tmp(c); + tmp_dst = get_tmp(c); + + brw_MOV(p, tmp_src0, arg0); + brw_MOV(p, tmp_src1, arg1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + tmp_dst, + function, + tmp_src0, + tmp_src1); + brw_set_access_mode(p, BRW_ALIGN_16); + + brw_MOV(p, dst, tmp_dst); + + release_tmp(c, tmp_src0); + release_tmp(c, tmp_src1); + release_tmp(c, tmp_dst); +} + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) + emit_math2_gen6(c, function, dst, arg0, arg1, precision); + else + emit_math2_gen4(c, function, dst, arg0, arg1, precision); +} static void emit_exp_noalias( struct brw_vs_compile *c, struct brw_reg dst, @@ -990,8 +1103,6 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - if (c->current_const[argIndex].index != src->Index) { /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; @@ -1022,14 +1133,14 @@ get_reladdr_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg const_reg = c->current_const[argIndex].reg; - struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + uint32_t offset; assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - /* Can't reuse a reladdr constant load. */ c->current_const[argIndex].index = -1; @@ -1038,15 +1149,21 @@ get_reladdr_constant(struct brw_vs_compile *c, src->Index, argIndex, c->current_const[argIndex].reg.nr); #endif - brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16)); + if (intel->gen >= 6) { + offset = src->Index; + } else { + struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16)); + addr_reg = byte_addr_reg; + offset = 16 * src->Index; + } /* fetch the first vec4 */ brw_dp_READ_4_vs_relative(p, - const_reg, /* writeback dest */ - byte_addr_reg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ - ); + const_reg, + addr_reg, + offset, + SURF_INDEX_VERT_CONST_BUFFER); return const_reg; } @@ -1241,22 +1358,18 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_UNIFORM: case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: - if (c->vp->use_const_buffer) { - if (!relAddr && c->constant_map[index] != -1) { - assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); - return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; - } else if (relAddr) + if (!relAddr && c->constant_map[index] != -1) { + /* Take from the push constant buffer if possible. */ + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else { + /* Must be in the pull constant buffer then .*/ + assert(c->vp->use_const_buffer); + if (relAddr) return get_reladdr_constant(c, inst, argIndex); else return get_constant(c, inst, argIndex); } - else if (relAddr) { - return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16); - } - else { - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; - } case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -1585,6 +1698,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) break; if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i))) continue; + if (i == VERT_RESULT_PSIZ) + continue; if (i >= VERT_RESULT_TEX0 && c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) { @@ -1895,7 +2010,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL); break; case OPCODE_SEQ: @@ -1969,35 +2084,42 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p, if_depth_in_loop[loop_depth]); + if (intel->gen >= 6) { + brw_CONT_gen6(p, loop_inst[loop_depth - 1]); + } else { + brw_CONT(p, if_depth_in_loop[loop_depth]); + } brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: - { - clear_current_const(c); - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - loop_depth--; - - if (intel->gen == 5) - br = 2; - - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && + + case OPCODE_ENDLOOP: { + clear_current_const(c); + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (intel->gen == 5) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } break; + case OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); @@ -2088,6 +2210,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) } brw_resolve_cals(p); + brw_set_uip_jip(p); brw_optimize(p); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ccdc18e0b8d..656501b4f79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_emit(c); } +static void +brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + + if (intel->gen >= 6) { + /* R0-1: masks, pixel X/Y coordinates. */ + c->nr_payload_regs = 2; + /* R2: only for 32-pixel dispatch.*/ + /* R3-4: perspective pixel location barycentric */ + c->nr_payload_regs += 2; + /* R5-6: perspective pixel location bary for dispatch width != 8 */ + if (c->dispatch_width == 16) { + c->nr_payload_regs += 2; + } + /* R7-10: perspective centroid barycentric */ + /* R11-14: perspective sample barycentric */ + /* R15-18: linear pixel location barycentric */ + /* R19-22: linear centroid barycentric */ + /* R23-26: linear sample barycentric */ + + /* R27: interpolated depth if uses source depth */ + if (uses_depth) { + c->source_depth_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R28: interpolated depth if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. + */ + if (uses_depth) { + c->source_w_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R30: interpolated W if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R31: MSAA position offsets. */ + /* R32-: bary for 32-pixel. */ + /* R58-59: interp W for 32-pixel. */ + + if (c->fp->program.Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + c->source_depth_to_render_target = GL_TRUE; + c->computes_depth = GL_TRUE; + } + } else { + brw_wm_lookup_iz(intel, c); + } +} /** * All Mesa program -> GPU code generation goes through this function. @@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - /* temporary sanity check assertion */ - ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + brw_wm_payload_setup(brw, c); if (!brw_wm_fs_emit(brw, c)) { /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { - c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); - } + c->dispatch_width = 16; + brw_wm_payload_setup(brw, c); + brw_wm_non_glsl_emit(brw, c); } + c->prog_data.dispatch_width = c->dispatch_width; /* Scratch space is used for register spilling */ if (c->last_scratch) { @@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw, } } - if (intel->gen >= 6) { - /* R0-1: masks, pixel X/Y coordinates. */ - key->nr_payload_regs = 2; - /* R2: only for 32-pixel dispatch.*/ - /* R3-4: perspective pixel location barycentric */ - key->nr_payload_regs += 2; - /* R5-6: perspective pixel location bary for dispatch width != 8 */ - if (!fp->isGLSL) { /* dispatch_width != 8 */ - key->nr_payload_regs += 2; - } - /* R7-10: perspective centroid barycentric */ - /* R11-14: perspective sample barycentric */ - /* R15-18: linear pixel location barycentric */ - /* R19-22: linear centroid barycentric */ - /* R23-26: linear sample barycentric */ - - /* R27: interpolated depth if uses source depth */ - if (uses_depth) { - key->source_depth_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R28: interpolated depth if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. - */ - if (uses_depth) { - key->source_w_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R30: interpolated W if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R31: MSAA position offsets. */ - /* R32-: bary for 32-pixel. */ - /* R58-59: interp W for 32-pixel. */ - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - key->source_depth_to_render_target = GL_TRUE; - key->computes_depth = GL_TRUE; - } - - } else { - brw_wm_lookup_iz(intel, - line_aa, - lookup, - uses_depth, - key); - } + key->iz_lookup = lookup; + key->line_aa = line_aa; + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; @@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw, swizzles[2] = SWIZZLE_ZERO; } else if (t->DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; + } else if (t->DepthMode == GL_RED) { + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ZERO; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 2ca685784fc..e7f3cfbb75f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -59,16 +59,9 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - GLuint source_depth_reg:3; - GLuint source_w_reg:3; - GLuint aa_dest_stencil_reg:3; - GLuint dest_depth_reg:3; - GLuint nr_payload_regs:4; - GLuint computes_depth:1; /* could be derived from program string */ - GLuint source_depth_to_render_target:1; + GLuint stats_wm:1; GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ - GLuint runtime_check_aads_emit:1; GLuint nr_color_regions:5; GLuint render_to_fbo:1; @@ -81,6 +74,8 @@ struct brw_wm_prog_key { GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint iz_lookup; + GLuint line_aa; GLuint program_string_id:32; }; @@ -204,6 +199,15 @@ struct brw_wm_compile { PASS2_DONE } state; + GLuint source_depth_reg:3; + GLuint source_w_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_payload_regs:4; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint runtime_check_aads_emit:1; + /* Initial pass - translate fp instructions to fp instructions, * simplifying and adding instructions for interpolation and * framebuffer writes. @@ -306,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, void brw_wm_print_program( struct brw_wm_compile *c, const char *stage ); -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ); +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ @@ -381,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c, void emit_frontfacing(struct brw_compile *p, const struct brw_reg *dst, GLuint mask); -void emit_kil_nv(struct brw_wm_compile *c); void emit_linterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 96fecc97ee2..a0e86034e1e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -896,10 +896,14 @@ void emit_math1(struct brw_wm_compile *c, BRW_MATH_SATURATE_NONE); struct brw_reg src; - if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || - arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE) || + arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); @@ -1301,9 +1305,15 @@ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + struct intel_context *intel = &p->brw->intel; + struct brw_reg pixelmask; GLuint i, j; + if (intel->gen >= 6) + pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + else + pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + for (i = 0; i < 4; i++) { /* Check if we've already done the comparison for this reg * -- common when someone does KIL TEMP.wwww. @@ -1319,26 +1329,11 @@ static void emit_kil( struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); brw_pop_insn_state(p); } } -/* KIL_NV kills the pixels that are currently executing, not based on a test - * of the arguments. - */ -void emit_kil_nv( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ - brw_AND(p, r0uw, c->emit_mask_reg, r0uw); - brw_pop_insn_state(p); -} - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -1387,8 +1382,8 @@ static void emit_aa( struct brw_wm_compile *c, GLuint reg ) { struct brw_compile *p = &c->func; - GLuint comp = c->key.aa_dest_stencil_reg / 2; - GLuint off = c->key.aa_dest_stencil_reg % 2; + GLuint comp = c->aa_dest_stencil_reg / 2; + GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); @@ -1416,11 +1411,10 @@ void emit_fb_write(struct brw_wm_compile *c, struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; - int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ - if (c->key.aa_dest_stencil_reg) + if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave @@ -1428,11 +1422,6 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); - if (intel->gen >= 6) - base_reg = nr; - else - base_reg = 0; - for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: @@ -1493,9 +1482,9 @@ void emit_fb_write(struct brw_wm_compile *c, brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) + if (c->source_depth_to_render_target) { - if (c->key.computes_depth) + if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ @@ -1503,10 +1492,10 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } - if (c->key.dest_depth_reg) + if (c->dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + GLuint comp = c->dest_depth_reg / 2; + GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); @@ -1524,15 +1513,27 @@ void emit_fb_write(struct brw_wm_compile *c, } if (intel->gen >= 6) { - /* Subtract off the message header, since we send headerless. */ - nr -= 2; + /* Load the message header. There's no implied move from src0 + * to the base mrf on gen6. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0)); + brw_pop_insn_state(p); + + if (target != 0) { + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + 0, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(target)); + } } - if (!c->key.runtime_check_aads_emit) { - if (c->key.aa_dest_stencil_reg) + if (!c->runtime_check_aads_emit) { + if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, base_reg, nr, target, eot); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1897,10 +1898,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 2cae6988804..4759b289a0c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) { - /* This is only called for producing 1/w in pre-gen6 interp. for - * gen6, the interp opcodes don't use this argument. + /* This is called for producing 1/w in pre-gen6 interp. for gen6, + * the interp opcodes don't use this argument. But to keep the + * nr_args = 3 expectations of pinterp happy, just stuff delta_xy + * into the slot. */ if (c->func.brw->intel.gen >= 6) - return src_undef(); + return c->delta_xy; if (src_is_undef(c->pixel_w)) { struct prog_dst_register pixel_w = get_temp(c); @@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c, struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas; - if (c->func.brw->intel.gen < 6) { - deltas = get_delta_xy(c); - } else { - deltas = src_undef(); - } + deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those @@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) precalc_lit(c, inst); break; + case OPCODE_RSQ: + out = emit_scalar_insn(c, inst); + out->SrcReg[0].Abs = GL_TRUE; + break; + case OPCODE_TEX: precalc_tex(c, inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c deleted file mode 100644 index 7fe8ab1f334..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ /dev/null @@ -1,1035 +0,0 @@ -#include "main/macros.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component); - -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ - int i; - - if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE)) - return GL_TRUE; - - for (i = 0; i < fp->Base.NumInstructions; i++) { - const struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_IF: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_BGNLOOP: - return GL_TRUE; - default: - break; - } - } - return GL_FALSE; -} - - - -static void -reclaim_temps(struct brw_wm_compile *c); - - -/** Mark GRF register as used. */ -static void -prealloc_grf(struct brw_wm_compile *c, int r) -{ - c->used_grf[r] = GL_TRUE; -} - - -/** Mark given GRF register as not in use. */ -static void -release_grf(struct brw_wm_compile *c, int r) -{ - /*assert(c->used_grf[r]);*/ - c->used_grf[r] = GL_FALSE; - c->first_free_grf = MIN2(c->first_free_grf, r); -} - - -/** Return index of a free GRF, mark it as used. */ -static int -alloc_grf(struct brw_wm_compile *c) -{ - GLuint r; - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - /* no free temps, try to reclaim some */ - reclaim_temps(c); - c->first_free_grf = 0; - - /* try alloc again */ - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - for (r = 0; r < BRW_WM_MAX_GRF; r++) { - assert(c->used_grf[r]); - } - - /* really, no free GRF regs found */ - if (!c->out_of_regs) { - /* print warning once per compilation */ - _mesa_warning(NULL, "i965: ran out of registers for fragment program"); - c->out_of_regs = GL_TRUE; - } - - return -1; -} - - -/** Return number of GRF registers used */ -static int -num_grf_used(const struct brw_wm_compile *c) -{ - int r; - for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) - if (c->used_grf[r]) - return r + 1; - return 0; -} - - - -/** - * Record the mapping of a Mesa register to a hardware register. - */ -static void set_reg(struct brw_wm_compile *c, int file, int index, - int component, struct brw_reg reg) -{ - c->wm_regs[file][index][component].reg = reg; - c->wm_regs[file][index][component].inited = GL_TRUE; -} - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - struct brw_reg reg; - - /* if we need to allocate another temp, grow the tmp_regs[] array */ - if (c->tmp_index == c->tmp_max) { - int r = alloc_grf(c); - if (r < 0) { - /*printf("Out of temps in %s\n", __FUNCTION__);*/ - r = 50; /* XXX random register! */ - } - c->tmp_regs[ c->tmp_max++ ] = r; - } - - /* form the GRF register */ - reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); - /*printf("alloc_temp %d\n", reg.nr);*/ - assert(reg.nr < BRW_WM_MAX_GRF); - return reg; - -} - -/** - * Save current temp register info. - * There must be a matching call to release_tmps(). - */ -static int mark_tmps(struct brw_wm_compile *c) -{ - return c->tmp_index; -} - -static void release_tmps(struct brw_wm_compile *c, int mark) -{ - c->tmp_index = mark; -} - -/** - * Convert Mesa src register to brw register. - * - * Since we're running in SOA mode each Mesa register corresponds to four - * hardware registers. We allocate the hardware registers as needed here. - * - * \param file register file, one of PROGRAM_x - * \param index register number - * \param component src component (X=0, Y=1, Z=2, W=3) - * \param nr not used?!? - * \param neg negate value? - * \param abs take absolute value? - */ -static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, - int nr, GLuint neg, GLuint abs) -{ - struct brw_reg reg; - switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: - return brw_null_reg(); - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - case PROGRAM_PAYLOAD: - break; - default: - _mesa_problem(NULL, "Unexpected file in get_reg()"); - return brw_null_reg(); - } - - assert(index < 256); - assert(component < 4); - - /* see if we've already allocated a HW register for this Mesa register */ - if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; - } - else { - /* no, allocate new register */ - int grf = alloc_grf(c); - /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ - if (grf < 0) { - /* totally out of temps */ - grf = 51; /* XXX random register! */ - } - - reg = brw_vec8_grf(grf, 0); - /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - - set_reg(c, file, index, component, reg); - } - - if (neg & (1 << component)) { - reg = negate(reg); - } - if (abs) - reg = brw_abs(reg); - return reg; -} - - - -/** - * This is called if we run out of GRF registers. Examine the live intervals - * of temp regs in the program and free those which won't be used again. - */ -static void -reclaim_temps(struct brw_wm_compile *c) -{ - GLint intBegin[MAX_PROGRAM_TEMPS]; - GLint intEnd[MAX_PROGRAM_TEMPS]; - int index; - - /*printf("Reclaim temps:\n");*/ - - _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, - intBegin, intEnd); - - for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { - if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { - /* program temp[i] can be freed */ - int component; - /*printf(" temp[%d] is dead\n", index);*/ - for (component = 0; component < 4; component++) { - if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { - int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; - release_grf(c, r); - /* - printf(" Reclaim temp %d, reg %d at inst %d\n", - index, r, c->cur_inst); - */ - c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; - } - } - } - } -} - - - - -/** - * Preallocate registers. This sets up the Mesa to hardware register - * mapping for certain registers, such as constants (uniforms/state vars) - * and shader inputs. - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ - struct intel_context *intel = &c->func.brw->intel; - int i, j; - struct brw_reg reg; - int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; - GLuint reg_index = 0; - - memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); - c->first_free_grf = 0; - - for (i = 0; i < 4; i++) { - if (i < (c->key.nr_payload_regs + 1) / 2) - reg = brw_vec8_grf(i * 2, 0); - else - reg = brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); - } - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0, - brw_vec8_grf(c->key.source_w_reg, 0)); - reg_index += c->key.nr_payload_regs; - - /* constants */ - { - const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; - const GLuint nr_temps = c->fp->program.Base.NumTemporaries; - - /* use a real constant buffer, or just use a section of the GRF? */ - /* XXX this heuristic may need adjustment... */ - if ((nr_params + nr_temps) * 4 + reg_index > 80) { - for (i = 0; i < nr_params; i++) { - float *pv = c->fp->program.Base.Parameters->ParameterValues[i]; - for (j = 0; j < 4; j++) { - c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j]; - c->prog_data.nr_pull_params++; - } - } - - c->prog_data.nr_params = 0; - } - /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ - - if (!c->prog_data.nr_pull_params) { - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants in CURBE */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = ALIGN(nr_params, 2) / 2; - reg_index += c->nr_creg; - } - } - - /* fragment shader inputs: One 2-reg pair of interpolation - * coefficients for each vec4 to be set up. - */ - if (intel->gen >= 6) { - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i))) - continue; - - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) { - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - } - reg_index += 2; - } - urb_read_length = reg_index; - } else { - for (i = 0; i < VERT_RESULT_MAX; i++) { - int fp_input; - - if (i >= VERT_RESULT_VAR0) - fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; - else if (i <= VERT_RESULT_TEX7) - fp_input = i; - else - fp_input = -1; - - if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = reg_index; - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); - } - if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { - reg_index += 2; - } - } - } - - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; - c->prog_data.urb_read_length = urb_read_length; - c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index += 2; - - /* mark GRF regs [0..reg_index-1] as in-use */ - for (i = 0; i < reg_index; i++) - prealloc_grf(c, i); - - /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ - prealloc_grf(c, 126); - prealloc_grf(c, 127); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - struct brw_reg dst[4]; - - switch (inst->Opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - /* Allocate the channels of texture results contiguously, - * since they are written out that way by the sampler unit. - */ - for (j = 0; j < 4; j++) { - dst[j] = get_dst_reg(c, inst, j); - if (j != 0) - assert(dst[j].nr == dst[j - 1].nr + 1); - } - break; - default: - break; - } - } - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - - switch (inst->Opcode) { - case WM_DELTAXY: - /* Allocate WM_DELTAXY destination on G45/GM45 to an - * even-numbered GRF if possible so that we can use the PLN - * instruction. - */ - if (inst->DstReg.WriteMask == WRITEMASK_XY && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && - (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { - int grf; - - for (grf = c->first_free_grf & ~1; - grf < BRW_WM_MAX_GRF; - grf += 2) - { - if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { - c->used_grf[grf] = GL_TRUE; - c->used_grf[grf + 1] = GL_TRUE; - c->first_free_grf = grf + 2; /* a guess */ - - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, - brw_vec8_grf(grf, 0)); - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, - brw_vec8_grf(grf + 1, 0)); - break; - } - } - } - default: - break; - } - } - - /* An instruction may reference up to three constants. - * They'll be found in these registers. - * XXX alloc these on demand! - */ - if (c->prog_data.nr_pull_params) { - for (i = 0; i < 3; i++) { - c->current_const[i].index = -1; - c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); - } - } -#if 0 - printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); -#endif -} - - -/** - * Check if any of the instruction's src registers are constants, uniforms, - * or statevars. If so, fetch any constants that we don't already have in - * the three GRF slots. - */ -static void fetch_constants(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint i; - - /* loop over instruction src regs */ - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - if (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM) { - c->current_const[i].index = src->Index; - -#if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); -#endif - - /* need to fetch the constant now */ - brw_oword_block_read(p, - c->current_const[i].reg, - brw_message_reg(1), - 16 * src->Index, - SURF_INDEX_FRAG_CONST_BUFFER); - } - } -} - - -/** - * Convert Mesa dst register to brw register. - */ -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component) -{ - const int nr = 1; - return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, - 0, 0); -} - - -static struct brw_reg -get_src_reg_const(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint component) -{ - /* We should have already fetched the constant from the constant - * buffer in fetch_constants(). Now we just have to return a - * register description that extracts the needed component and - * smears it across all eight vector components. - */ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - struct brw_reg const_reg; - - assert(component < 4); - assert(srcRegIndex < 3); - assert(c->current_const[srcRegIndex].index != -1); - const_reg = c->current_const[srcRegIndex].reg; - - /* extract desired float from the const_reg, and smear */ - const_reg = stride(const_reg, 0, 1, 0); - const_reg.subnr = component * 4; - - if (src->Negate & (1 << component)) - const_reg = negate(const_reg); - if (src->Abs) - const_reg = brw_abs(const_reg); - -#if 0 - printf(" form const[%d].%d for arg %d, reg %d\n", - c->current_const[srcRegIndex].index, - component, - srcRegIndex, - const_reg.nr); -#endif - - return const_reg; -} - - -/** - * Convert Mesa src register to brw register. - */ -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - const GLuint nr = 1; - const GLuint component = GET_SWZ(src->Swizzle, channel); - - /* Only one immediate value can be used per native opcode, and it - * has be in the src1 slot, so not all Mesa instructions will get - * to take advantage of immediate constants. - */ - if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) { - const struct gl_program_parameter_list *params; - - params = c->fp->program.Base.Parameters; - - /* Extended swizzle terms */ - if (component == SWIZZLE_ZERO) { - return brw_imm_f(0.0F); - } else if (component == SWIZZLE_ONE) { - if (src->Negate) - return brw_imm_f(-1.0F); - else - return brw_imm_f(1.0F); - } - - if (src->File == PROGRAM_CONSTANT) { - float f = params->ParameterValues[src->Index][component]; - - if (src->Abs) - f = fabs(f); - if (src->Negate) - f = -f; - - return brw_imm_f(f); - } - } - - if (c->prog_data.nr_pull_params && - (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM)) { - return get_src_reg_const(c, inst, srcRegIndex, component); - } - else { - /* other type of source register */ - return get_reg(c, src->File, src->Index, component, nr, - src->Negate, src->Abs); - } -} - -static void emit_arl(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, addr_reg; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, 0); - src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ - brw_MOV(p, addr_reg, src0); - brw_set_saturate(p, 0); -} - -static INLINE struct brw_reg high_words( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), - 0, 8, 2 ); -} - -static INLINE struct brw_reg low_words( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); -} - -static INLINE struct brw_reg even_bytes( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); -} - -static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), - 0, 16, 2 ); -} - -/** - * Resolve subroutine calls after code emit is done. - */ -static void post_wm_emit( struct brw_wm_compile *c ) -{ - brw_resolve_cals(&c->func); -} - -static void -get_argument_regs(struct brw_wm_compile *c, - const struct prog_instruction *inst, - int index, - struct brw_reg *dst, - struct brw_reg *regs, - int mask) -{ - struct brw_compile *p = &c->func; - int i, j; - - for (i = 0; i < 4; i++) { - if (mask & (1 << i)) { - regs[i] = get_src_reg(c, inst, index, i); - - /* Unalias destination registers from our sources. */ - if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { - for (j = 0; j < 4; j++) { - if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { - struct brw_reg tmp = alloc_tmp(c); - brw_MOV(p, tmp, regs[i]); - regs[i] = tmp; - break; - } - } - } - } - } -} - -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) -{ - struct intel_context *intel = &brw->intel; -#define MAX_IF_DEPTH 32 -#define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; - int if_depth_in_loop[MAX_LOOP_DEPTH]; - GLuint i, if_depth = 0, loop_depth = 0; - struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); - - c->out_of_regs = GL_FALSE; - - if_depth_in_loop[loop_depth] = 0; - - prealloc_reg(c); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - - if (intel->gen >= 6) - brw_set_acc_write_control(p, 1); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - int dst_flags; - struct brw_reg args[3][4], dst[4]; - int j; - int mark = mark_tmps( c ); - - c->cur_inst = i; - -#if 0 - printf("Inst %d: ", i); - _mesa_print_instruction(inst); -#endif - - /* fetch any constants that this instruction needs */ - if (c->prog_data.nr_pull_params) - fetch_constants(c, inst); - - if (inst->Opcode != OPCODE_ARL) { - for (j = 0; j < 4; j++) { - if (inst->DstReg.WriteMask & (1 << j)) - dst[j] = get_dst_reg(c, inst, j); - else - dst[j] = brw_null_reg(); - } - } - for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); - - dst_flags = inst->DstReg.WriteMask; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - dst_flags |= SATURATE; - - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - switch (inst->Opcode) { - case WM_PIXELXY: - emit_pixel_xy(c, dst, dst_flags); - break; - case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0]); - break; - case WM_PIXELW: - emit_pixel_w(c, dst, dst_flags, args[0], args[1]); - break; - case WM_LINTERP: - emit_linterp(p, dst, dst_flags, args[0], args[1]); - break; - case WM_PINTERP: - emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case WM_CINTERP: - emit_cinterp(p, dst, dst_flags, args[0]); - break; - case WM_WPOSXY: - emit_wpos_xy(c, dst, dst_flags, args[0]); - break; - case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2], - INST_AUX_GET_TARGET(inst->Aux), - inst->Aux & INST_AUX_EOT); - break; - case WM_FRONTFACING: - emit_frontfacing(p, dst, dst_flags); - break; - case OPCODE_ADD: - emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_ARL: - emit_arl(c, inst); - break; - case OPCODE_FRC: - emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); - break; - case OPCODE_FLR: - emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); - break; - case OPCODE_LRP: - emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TRUNC: - emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); - break; - case OPCODE_DP2: - emit_dp2(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP3: - emit_dp3(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP4: - emit_dp4(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_XPD: - emit_xpd(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DPH: - emit_dph(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); - break; - case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); - break; - case OPCODE_SIN: - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); - break; - case OPCODE_COS: - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); - break; - case OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); - break; - case OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); - break; - case OPCODE_CMP: - emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_MIN: - emit_min(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAX: - emit_max(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DDX: - case OPCODE_DDY: - emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), - args[0]); - break; - case OPCODE_SLT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_L, args[0], args[1]); - break; - case OPCODE_SLE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_LE, args[0], args[1]); - break; - case OPCODE_SGT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_G, args[0], args[1]); - break; - case OPCODE_SGE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_GE, args[0], args[1]); - break; - case OPCODE_SEQ: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_EQ, args[0], args[1]); - break; - case OPCODE_SNE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_NEQ, args[0], args[1]); - break; - case OPCODE_SSG: - emit_sign(p, dst, dst_flags, args[0]); - break; - case OPCODE_MUL: - emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, - dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAD: - emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TEX: - emit_tex(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - inst->TexSrcUnit, - (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); - break; - case OPCODE_TXB: - emit_txb(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); - break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - case OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth]++; - break; - case OPCODE_ELSE: - assert(if_depth > 0); - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); - break; - case OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); - if_depth_in_loop[loop_depth]--; - break; - case OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); - break; - case OPCODE_ENDSUB: - /* no-op */ - break; - case OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(&c->func, inst->Comment, p->nr_insn); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case OPCODE_RET: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); - - break; - case OPCODE_BGNLOOP: - /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth] = 0; - break; - case OPCODE_BRK: - brw_BREAK(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_CONT: - brw_CONT(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_ENDLOOP: - { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen == 5) - br = 2; - - assert(loop_depth > 0); - loop_depth--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - break; - default: - printf("unsupported opcode %d (%s) in fragment shader\n", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : "unknown"); - } - - /* Release temporaries containing any unaliased source regs. */ - release_tmps( c, mark ); - - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - post_wm_emit(c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stdout, &p->store[i], intel->gen); - printf("\n"); - } -} - -/** - * Do GPU code generation for shaders that use GLSL features such as - * flow control. Other shaders will be compiled with the - */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("brw_wm_glsl_emit:\n"); - } - - /* initial instruction translation/simplification */ - brw_wm_pass_fp(c); - - /* actual code generation */ - brw_wm_emit_glsl(brw, c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "brw_wm_glsl_emit done"); - } - - c->prog_data.total_grf = num_grf_used(c); - c->prog_data.total_scratch = 0; -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 62e556698ba..471ea1c18d6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -120,14 +120,14 @@ const struct { * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES * \param lookup bitmask of IZ_* flags */ -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ) +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c) { GLuint reg = 2; GLboolean kill_stats_promoted_workaround = GL_FALSE; + int lookup = c->key.iz_lookup; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; assert (lookup < IZ_BIT_MAX); @@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel, * statistics are enabled..." paragraph of 11.5.3.2: Early Depth * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. */ - if (intel->stats_wm && + if (c->key.stats_wm && (lookup & IZ_PS_KILL_ALPHATEST_BIT) && wm_iz_table[lookup].mode == P) { kill_stats_promoted_workaround = GL_TRUE; } if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) - key->computes_depth = 1; + c->computes_depth = 1; - if (wm_iz_table[lookup].sd_present || ps_uses_depth || + if (wm_iz_table[lookup].sd_present || uses_depth || kill_stats_promoted_workaround) { - key->source_depth_reg = reg; + c->source_depth_reg = reg; reg += 2; } if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) - key->source_depth_to_render_target = 1; + c->source_depth_to_render_target = 1; - if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { - key->aa_dest_stencil_reg = reg; - key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && - line_aa == AA_SOMETIMES); + if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) { + c->aa_dest_stencil_reg = reg; + c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + c->key.line_aa == AA_SOMETIMES); reg++; } if (wm_iz_table[lookup].dd_present) { - key->dest_depth_reg = reg; + c->dest_depth_reg = reg; reg+=2; } - key->nr_payload_regs = reg; + c->nr_payload_regs = reg; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 83152526b3a..f78bdc31866 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) GLuint i; for (i = 0; i < 4; i++) { - GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i; + GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i; pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 3a2874b6ddf..7d6a3fa9f12 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) if (inst->opcode == WM_FB_WRITE) { track_arg(c, inst, 0, WRITEMASK_XYZW); track_arg(c, inst, 1, WRITEMASK_XYZW); - if (c->key.source_depth_to_render_target && - c->key.computes_depth) + if (c->source_depth_to_render_target && c->computes_depth) track_arg(c, inst, 2, WRITEMASK_Z); else track_arg(c, inst, 2, 0); @@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 44e39538145..8c2b9e7020b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { + struct brw_context *brw = c->func.brw; + struct intel_context *intel = &brw->intel; GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->grf_limit; j++) c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; - for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++) + for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++) prealloc_reg(c, &c->payload.depth[j], i++); for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < VERT_RESULT_MAX; j++) { - if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { - int fp_index; - - if (j >= VERT_RESULT_VAR0) - fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); - else if (j <= VERT_RESULT_TEX7) - fp_index = j; - else - fp_index = -1; - - nr_interp_regs++; - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + if (intel->gen >= 6) { + for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) { + nr_interp_regs++; + prealloc_reg(c, &c->payload.input_interp[j], i++); + } + } + } else { + for (j = 0; j < VERT_RESULT_MAX; j++) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; + else + fp_index = -1; + + nr_interp_regs++; + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + } } + assert(nr_interp_regs >= 1); } - assert(nr_interp_regs >= 1); - c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2); + c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2); c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index fea96d35381..e7c97a1cb05 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap ) static drm_intel_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { - struct brw_sampler_default_color sdc; + struct intel_context *intel = &brw->intel; - COPY_4V(sdc.color, color); - - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + if (intel->gen >= 5) { + struct gen5_sampler_default_color sdc; + + memset(&sdc, 0, sizeof(sdc)); + + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); + + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); + + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); + + /* XXX: Fill in half floats */ + /* XXX: Fill in signed bytes */ + + COPY_4V(sdc.f, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } else { + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 76de7b7b6f6..e9ef635bca2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { struct gl_context *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; /* If using the fragment shader backend, the program is always * 8-wide. diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 76fc94df1f6..ad744044c70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -139,6 +139,8 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I16_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A16_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R16_UNORM; else return BRW_SURFACEFORMAT_L16_UNORM; @@ -174,6 +176,8 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I24X8_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A24X8_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS; else return BRW_SURFACEFORMAT_L24X8_UNORM; @@ -274,6 +278,7 @@ brw_create_constant_surface(struct brw_context *brw, drm_intel_bo **out_bo, uint32_t *out_offset) { + struct intel_context *intel = &brw->intel; const GLint w = width - 1; struct brw_surface_state surf; void *map; @@ -284,6 +289,9 @@ brw_create_constant_surface(struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + if (intel->gen >= 6) + surf.ss0.render_cache_read_write = 1; + assert(bo); surf.ss1.base_addr = bo->offset; /* reloc */ diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 800a2555214..c2631a7b4df 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -35,6 +35,7 @@ struct gen6_blend_state_key { GLboolean color_blend, alpha_enabled; GLboolean dither; + GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4]; GLenum logic_op; @@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw, memset(key, 0, sizeof(*key)); /* _NEW_COLOR */ + memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask)); + + /* _NEW_COLOR */ if (ctx->Color._LogicOpEnabled) key->logic_op = ctx->Color.LogicOp; else @@ -87,54 +91,62 @@ static drm_intel_bo * blend_state_create_from_key(struct brw_context *brw, struct gen6_blend_state_key *key) { - struct gen6_blend_state blend; + struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS]; drm_intel_bo *bo; + int b; memset(&blend, 0, sizeof(blend)); - if (key->logic_op != GL_COPY) { - blend.blend1.logic_op_enable = 1; - blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; + for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) { + if (key->logic_op != GL_COPY) { + blend[b].blend1.logic_op_enable = 1; + blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend & (1 << b)) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend[b].blend0.blend_enable = 1; + blend[b].blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); } - blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); - blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); - blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); - - blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); - blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + if (key->alpha_enabled) { + blend[b].blend1.alpha_test_enable = 1; + blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); - blend.blend0.blend_enable = 1; - blend.blend0.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - blend.blend1.alpha_test_enable = 1; - blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + } - } + if (key->dither) { + blend[b].blend1.dither_enable = 1; + blend[b].blend1.y_dither_offset = 0; + blend[b].blend1.x_dither_offset = 0; + } - if (key->dither) { - blend.blend1.dither_enable = 1; - blend.blend1.y_dither_offset = 0; - blend.blend1.x_dither_offset = 0; + blend[b].blend1.write_disable_r = !key->color_mask[b][0]; + blend[b].blend1.write_disable_g = !key->color_mask[b][1]; + blend[b].blend1.write_disable_b = !key->color_mask[b][2]; + blend[b].blend1.write_disable_a = !key->color_mask[b][3]; } bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, @@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = { }; struct gen6_color_calc_state_key { - GLubyte blend_constant_color[4]; + float blend_constant_color[4]; GLclampf alpha_ref; GLubyte stencil_ref[2]; }; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index c65b41e2b6b..c7c4eb1f27d 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -64,7 +64,9 @@ upload_clip_state(struct brw_context *brw) userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); - OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); + OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | + U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | + GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 471067e8f02..45c148baedd 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -33,9 +33,10 @@ #include "intel_batchbuffer.h" static uint32_t -get_attr_override(struct brw_context *brw, int fs_attr) +get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color) { int attr_index = 0, i, vs_attr; + int bfc = 0; if (fs_attr <= FRAG_ATTRIB_TEX7) vs_attr = fs_attr; @@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr) attr_index++; } + assert(attr_index < 32); + + if (two_side_color) { + if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + } + + if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) { + if (fs_attr == FRAG_ATTRIB_COL0) + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) { + attr_index++; + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + } else { + attr_index += bfc; + } + } + return attr_index; } @@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; /* CACHE_NEW_VS_PROG */ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); - uint32_t dw1, dw2, dw3, dw4, dw16; + uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; int urb_start; + int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) @@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw) dw3 = 0; dw4 = 0; dw16 = 0; + dw17 = 0; /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) @@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; + if (ctx->Polygon.OffsetLine) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; + + if (ctx->Polygon.OffsetPoint) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; + + switch (ctx->Polygon.FrontMode) { + case GL_FILL: + dw2 |= GEN6_SF_FRONT_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_FRONT_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_FRONT_POINT; + break; + + default: + assert(0); + break; + } + + switch (ctx->Polygon.BackMode) { + case GL_FILL: + dw2 |= GEN6_SF_BACK_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_BACK_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_BACK_POINT; + break; + + default: + assert(0); + break; + } + /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; @@ -160,6 +230,12 @@ upload_sf_state(struct brw_context *brw) } } + /* flat shading */ + if (ctx->Light.ShadeModel == GL_FLAT) { + dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >> + ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); + } + BEGIN_BATCH(20); OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); OUT_BATCH(dw1); @@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr); + attr_overrides |= get_attr_override(brw, attr, two_side_color); attr++; break; } @@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr) << 16; + attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; attr++; break; } @@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw) OUT_BATCH(attr_overrides); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ - OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(dw17); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); @@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_BUFFERS | _NEW_POINT | _NEW_TRANSFORM), - .brw = BRW_NEW_CONTEXT, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index a34123478fb..de97fd3783d 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT, - .cache = CACHE_NEW_VS_PROG, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), }, .prepare = prepare_urb, .emit = upload_urb, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e94d0c0ddbb..4ef9e2e6072 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -54,7 +54,7 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { - int params_uploaded = 0; + int params_uploaded = 0, param_regs; float *param; if (brw->vertex_program->IsNVProgram) @@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw) params_uploaded++; } - if (vp->use_const_buffer) { - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; - } - } - } else { - for (i = 0; i < nr_params; i++) { - memcpy(param, vp->program.Base.Parameters->ParameterValues[i], + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); - param += 4; params_uploaded++; } } @@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw) drm_intel_gem_bo_unmap_gtt(constant_bo); + param_regs = (params_uploaded + 1) / 2; + assert(param_regs <= 32); + BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(params_uploaded, 2) / 2 - 1); + param_regs - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index ea5418bacf1..d80df4e254b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw) constants[i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); } + + if (0) { + printf("WM constants:\n"); + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { + if ((i & 7) == 0) + printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8); + printf("%8f ", constants[i]); + if ((i & 7) == 7) + printf("\n"); + } + if ((i & 7) != 0) + printf("\n"); + printf("\n"); + } + drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } } @@ -88,6 +103,7 @@ upload_wm_state(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); @@ -104,7 +120,8 @@ upload_wm_state(struct brw_context *brw) (5 - 2)); OUT_RELOC(brw->wm.push_const_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1); + ALIGN(brw->wm.prog_data->nr_params, + brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -126,8 +143,8 @@ upload_wm_state(struct brw_context *brw) dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (fp->isGLSL) + /* CACHE_NEW_WM_PROG */ + if (brw->wm.prog_data->dispatch_width == 8) dw5 |= GEN6_WM_8_DISPATCH_ENABLE; else dw5 |= GEN6_WM_16_DISPATCH_ENABLE; @@ -176,13 +193,14 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen6_wm_state = { .dirty = { .mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS | - _NEW_PROGRAM_CONSTANTS), + _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), - .cache = CACHE_NEW_SAMPLER + .cache = (CACHE_NEW_SAMPLER | + CACHE_NEW_WM_PROG) }, .emit = upload_wm_state, }; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 4b498f8c5b2..21fc9ece886 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -92,7 +92,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) batch->ptr = NULL; - if (!intel->no_hw) { + if (!intel->intelScreen->no_hw) { drm_intel_bo_exec(batch->buf, used, NULL, 0, (x_off & 0xffff) | (y_off << 16)); } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 152cdcaf37d..9c222c7b485 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -519,7 +519,6 @@ static const struct dri_debug_control debug_control[] = { { "sing", DEBUG_SINGLE_THREAD }, { "thre", DEBUG_SINGLE_THREAD }, { "wm", DEBUG_WM }, - { "glsl_force", DEBUG_GLSL_FORCE }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, { "clip", DEBUG_CLIP }, @@ -800,11 +799,6 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - /* XXX force SIMD8 kernel for Sandybridge before we fixed - SIMD16 interpolation. */ - if (intel->gen == 6) - INTEL_DEBUG |= DEBUG_GLSL_FORCE; - intel->batch = intel_batchbuffer_alloc(intel); intel_fbo_init(intel); @@ -838,11 +832,6 @@ intelInitContext(struct intel_context *intel, intel->always_flush_cache = 1; } - /* Disable all hardware rendering (skip emitting batches and fences/waits - * to the kernel) - */ - intel->no_hw = getenv("INTEL_NO_HW") != NULL; - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 9d5139c0000..96493c0f2bb 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -207,7 +207,6 @@ struct intel_context GLboolean hw_stipple; GLboolean depth_buffer_is_float; GLboolean no_rast; - GLboolean no_hw; GLboolean always_flush_batch; GLboolean always_flush_cache; @@ -362,7 +361,6 @@ extern int INTEL_DEBUG; #define DEBUG_WM 0x800000 #define DEBUG_URB 0x1000000 #define DEBUG_VS 0x2000000 -#define DEBUG_GLSL_FORCE 0x4000000 #define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 862a13d2ea5..18e796a1186 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -465,10 +465,12 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to XGBA8 texture OK\n"); } +#ifndef I915 else if (texImage->TexFormat == MESA_FORMAT_SARGB8) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to SARGB8 texture OK\n"); } +#endif else if (texImage->TexFormat == MESA_FORMAT_RGB565) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); @@ -481,6 +483,7 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to ARGB4444 texture OK\n"); } +#ifndef I915 else if (texImage->TexFormat == MESA_FORMAT_A8) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to A8 texture OK\n"); @@ -501,6 +504,7 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.DataType = GL_UNSIGNED_SHORT; DBG("Render to RG88 texture OK\n"); } +#endif else if (texImage->TexFormat == MESA_FORMAT_Z16) { irb->Base.DataType = GL_UNSIGNED_SHORT; DBG("Render to DEPTH16 texture OK\n"); @@ -710,15 +714,17 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: - case MESA_FORMAT_SARGB8: case MESA_FORMAT_RGB565: case MESA_FORMAT_ARGB1555: case MESA_FORMAT_ARGB4444: +#ifndef I915 + case MESA_FORMAT_SARGB8: case MESA_FORMAT_A8: case MESA_FORMAT_R8: case MESA_FORMAT_R16: case MESA_FORMAT_RG88: case MESA_FORMAT_RG1616: +#endif break; default: fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 061f0d278d6..3f13589a214 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -452,7 +452,7 @@ intelCreateContext(gl_api api, return brwCreateContext(api, mesaVis, driContextPriv, sharedContextPrivate); #endif - fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); + fprintf(stderr, "Unrecognized deviceID 0x%x\n", intelScreen->deviceID); return GL_FALSE; } @@ -462,7 +462,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen) __DRIscreen *spriv = intelScreen->driScrnPriv; int num_fences = 0; - intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; + intelScreen->no_hw = (getenv("INTEL_NO_HW") != NULL || + getenv("INTEL_DEVID_OVERRIDE") != NULL); intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); if (intelScreen->bufmgr == NULL) { @@ -497,6 +498,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) GLenum fb_format[3]; GLenum fb_type[3]; unsigned int api_mask; + char *devid_override; static const GLenum back_buffer_modes[] = { GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML @@ -523,6 +525,16 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) &intelScreen->deviceID)) return GL_FALSE; + /* Allow an override of the device ID for the purpose of making the + * driver produce dumps for debugging of new chipset enablement. + * This implies INTEL_NO_HW, to avoid programming your actual GPU + * incorrectly. + */ + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + intelScreen->deviceID = strtod(devid_override, NULL); + } + api_mask = (1 << __DRI_API_OPENGL); #if FEATURE_ES1 api_mask |= (1 << __DRI_API_GLES); diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 9d73a2fb375..f8316ae2f8d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -204,11 +204,13 @@ intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat, * { R, G, 1.0, 1.0 } from a red-green texture would be useful. */ case GL_RED: + case GL_COMPRESSED_RED: case GL_R8: return MESA_FORMAT_R8; case GL_R16: return MESA_FORMAT_R16; case GL_RG: + case GL_COMPRESSED_RG: case GL_RG8: return MESA_FORMAT_RG88; case GL_RG16: diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 8a047e6419b..b62290231b9 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -200,6 +200,7 @@ void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev ) } default: assert(0); + emitsize = 0; } if (!rmesa->radeon.tcl.aos[nr].bo) { rcommon_emit_vector( ctx, diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 8be32ea91fe..1db8678e890 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 2d28b065390..05d3da8a10d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) */ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { + const struct swizzle_data* sd; unsigned int relevant; int j; @@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return 0; - if (!lookup_native_swizzle(reg.Swizzle)) + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) return 0; return 1; @@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - if (!sd) { + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 2f130198d35..e0d349b98ce 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -24,6 +24,7 @@ #include <stdio.h> +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" @@ -54,6 +55,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; @@ -65,27 +68,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) continue; } - switch (inst->Opcode) { - case RC_OPCODE_FRC: - case RC_OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case RC_OPCODE_ADD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case RC_OPCODE_CMP: - case RC_OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); } } } @@ -93,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -133,11 +120,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, {"dataflow optimize", 1, opt, rc_optimize, NULL}, {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ - {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"register allocation", 1, opt, rc_pair_regalloc, NULL}, @@ -150,9 +137,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_FRAGMENT_PROGRAM; c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; - rc_run_compiler(&c->Base, fs_list, "Fragment Program"); + rc_run_compiler(&c->Base, fs_list); rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index bf8341f0173..472029f63d0 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -26,6 +26,7 @@ #include "../r300_reg.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" @@ -790,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) if (!hwtemps[j]) break; } - if (j >= c->max_temp_regs) { - rc_error(c, "Too many temporaries\n"); - return; + ta[orig].Allocated = 1; + if (last_inst_src_reladdr && + last_inst_src_reladdr->IP > inst->IP) { + ta[orig].HwTemp = orig; } else { - ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } - hwtemps[ta[orig].HwTemp] = 1; + ta[orig].HwTemp = j; } + hwtemps[ta[orig].HwTemp] = 1; } inst->U.I.DstReg.Index = ta[orig].HwTemp; @@ -1018,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -1062,18 +1057,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"dataflow optimize", 1, opt, rc_optimize, NULL}, /* This pass must be done after optimizations. */ {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, - {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, {"register allocation", 1, opt, allocate_temporary_registers, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_VERTEX_PROGRAM; c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - rc_run_compiler(&c->Base, vs_list, "Vertex Program"); + rc_run_compiler(&c->Base, vs_list); c->code->InputsRead = c->Base.Program.InputsRead; c->code->OutputsWritten = c->Base.Program.OutputsWritten; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 289bb87ae59..ef81be48f77 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -29,6 +29,7 @@ #include <stdio.h> +#include "radeon_compiler_util.h" #include "../r300_reg.h" /** diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 6f101c68eb6..5da82d90f67 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,9 +45,6 @@ #include "radeon_program_pair.h" -#define MAX_BRANCH_DEPTH_FULL 32 -#define MAX_BRANCH_DEPTH_PARTIAL 4 - #define PROG_CODE \ struct r500_fragment_program_code *code = &c->code->code.r500 @@ -200,6 +197,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | 0x100; } else if (src.File == RC_FILE_TEMPORARY) { @@ -506,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst break; } case RC_OPCODE_IF: - if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); return; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index cfb6df2cd79..b69e81698ae 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -34,6 +34,8 @@ #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 4286baed0c6..65548604bcc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -29,6 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" void rc_init(struct radeon_compiler * c) @@ -356,66 +357,92 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) static void reg_count_callback(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - unsigned int * max_reg = userdata; + int *max_reg = userdata; if (file == RC_FILE_TEMPORARY) - index > *max_reg ? *max_reg = index : 0; + (int)index > *max_reg ? *max_reg = index : 0; } -static void print_stats(struct radeon_compiler * c) +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { + int max_reg = -1; struct rc_instruction * tmp; - unsigned max_reg, insts, fc, tex, alpha, rgb, presub; - max_reg = insts = fc = tex = alpha = rgb = presub = 0; + memset(s, 0, sizeof(*s)); + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) - presub++; + s->num_presub_ops++; info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) - alpha++; + s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) - rgb++; + s->num_rgb_insts++; info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) - fc++; + s->num_fc_insts++; if (info->HasTexture) - tex++; - insts++; + s->num_tex_insts++; + s->num_insts++; } - if (insts < 4) - return; - fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - "~%4u Instructions\n" - "~%4u Vector Instructions (RGB)\n" - "~%4u Scalar Instructions (Alpha)\n" - "~%4u Flow Control Instructions\n" - "~%4u Texture Instructions\n" - "~%4u Presub Operations\n" - "~%4u Temporary Registers\n" - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - insts, rgb, alpha, fc, tex, presub, max_reg + 1); + s->num_temp_regs = max_reg + 1; } -/* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name) +static void print_stats(struct radeon_compiler * c) { - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "%s: before compilation\n", shader_name); - rc_print_program(&c->Program); + struct rc_program_stats s; + + rc_get_stats(c, &s); + + if (s.num_insts < 4) + return; + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); } +} +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ for (unsigned i = 0; list[i].name; i++) { if (list[i].predicate) { list[i].run(c, list[i].user); @@ -424,11 +451,23 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis return; if ((c->Debug & RC_DBG_LOG) && list[i].dump) { - fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name); + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); rc_print_program(&c->Program); } } } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + if (c->Debug & RC_DBG_STATS) print_stats(c); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 31fd469a04f..e6633395895 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -35,9 +35,16 @@ struct rc_swizzle_caps; +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; + enum rc_program_type type; unsigned Debug:2; unsigned Error:1; char * ErrorMsg; @@ -140,9 +147,21 @@ struct radeon_compiler_pass { void *user; /* Optional parameter which is passed to the run function. */ }; +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + /* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name); +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); void rc_validate_final_shader(struct radeon_compiler *c, void *user); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 97f4c758492..bf393a9fb16 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -31,6 +31,8 @@ #include "radeon_compiler_util.h" +#include "radeon_compiler.h" +#include "radeon_dataflow.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -46,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz) return mask; } +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, @@ -59,3 +146,123 @@ unsigned int rc_src_reads_dst_mask( } return dst_mask & rc_swizzle_to_writemask(src_swz); } + +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < channels; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct can_use_presub_data { + struct rc_src_register RemoveSrcs[3]; + unsigned int RGBCount; + unsigned int AlphaCount; +}; + +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct can_use_presub_data * d = userdata; + unsigned int src_type = rc_source_type_mask(mask); + unsigned int i; + + if (file == RC_FILE_NONE) + return; + + for(i = 0; i < 3; i++) { + if (d->RemoveSrcs[i].File == file + && d->RemoveSrcs[i].Index == index) { + src_type &= + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + } + } + + if (src_type & RC_SOURCE_RGB) + d->RGBCount++; + + if (src_type & RC_SOURCE_ALPHA) + d->AlphaCount++; +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int presub_src_type = rc_source_type_mask(presub_writemask); + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.RemoveSrcs[0] = replace_reg; + d.RemoveSrcs[1] = presub_src0; + d.RemoveSrcs[2] = presub_src1; + + rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + if ((presub_src_type & RC_SOURCE_RGB) + && d.RGBCount + num_presub_srcs > 3) { + return 0; + } + + if ((presub_src_type & RC_SOURCE_ALPHA) + && d.AlphaCount + num_presub_srcs > 3) { + return 0; + } + + return 1; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 1a14e7cb0ef..461ab9ffb10 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,8 +3,27 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +struct rc_instruction; +struct rc_src_register; + unsigned int rc_swizzle_to_writemask(unsigned int swz); +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, @@ -13,4 +32,16 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index fd94194dc34..d0a64d936e0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -139,7 +139,46 @@ static void pair_sub_for_all_args( const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - cb(userdata, fullinst, &sub->Arg[i]); + unsigned int src_type; + unsigned int channels = 0; + if (&fullinst->U.P.RGB == sub) + channels = 3; + else if (&fullinst->U.P.Alpha == sub) + channels = 1; + + assert(channels > 0); + src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } } } @@ -308,6 +347,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; if (opcode->HasDstReg) { rc_register_file file = inst->DstReg.File; @@ -327,6 +367,12 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, unsigned int i; unsigned int srcp_srcs = rc_presubtract_src_reg_count( inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + for(i = 0; i < srcp_srcs; i++) { file = inst->PreSub.SrcReg[i].File; index = inst->PreSub.SrcReg[i].Index; @@ -334,7 +380,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, inst->PreSub.SrcReg[i].File = file; inst->PreSub.SrcReg[i].Index = index; } - + remapped_presub = 1; } else { cb(userdata, fullinst, &file, &index); @@ -430,12 +476,29 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst) } +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + struct get_readers_callback_data { struct radeon_compiler * C; struct rc_reader_data * ReaderData; - rc_read_src_fn ReadCB; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; }; static void add_reader( @@ -443,7 +506,7 @@ static void add_reader( struct rc_reader_data * data, struct rc_instruction * inst, unsigned int mask, - struct rc_src_register * src) + void * arg_or_src) { struct rc_reader * new; memory_pool_array_reserve(pool, struct rc_reader, data->Readers, @@ -451,7 +514,74 @@ static void add_reader( new = &data->Readers[data->ReaderCount++]; new->Inst = inst; new->WriteMask = mask; - new->Src = src; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + new->U.Src = arg_or_src; + } else { + new->U.Arg = arg_or_src; + } +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->Abort) + return; + + add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg); } /** @@ -464,37 +594,18 @@ static void get_readers_normal_read_callback( struct rc_src_register * src) { struct get_readers_callback_data * d = userdata; - unsigned int read_mask; unsigned int shared_mask; - if (src->RelAddr) - d->ReaderData->Abort = 1; - - shared_mask = rc_src_reads_dst_mask(src->File, src->Index, - src->Swizzle, - d->ReaderData->Writer->U.I.DstReg.File, - d->ReaderData->Writer->U.I.DstReg.Index, - d->AliveWriteMask); + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); if (shared_mask == RC_MASK_NONE) return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); - /* If we make it this far, it means that this source reads from the - * same register written to by d->ReaderData->Writer. */ - - if (d->ReaderData->AbortOnRead) { - d->ReaderData->Abort = 1; - return; - } - - read_mask = rc_swizzle_to_writemask(src->Swizzle); - /* XXX The behavior in this case should be configurable. */ - if ((read_mask & d->AliveWriteMask) != read_mask) { - d->ReaderData->Abort = 1; - return; - } - - d->ReadCB(d->ReaderData, inst, src); if (d->ReaderData->Abort) return; @@ -515,29 +626,132 @@ static void get_readers_write_callback( { struct get_readers_callback_data * d = userdata; - if (index == d->ReaderData->Writer->U.I.DstReg.Index - && file == d->ReaderData->Writer->U.I.DstReg.File) { - unsigned int shared_mask = mask - & d->ReaderData->Writer->U.I.DstReg.WriteMask; - if (d->ReaderData->InElse) { - if (shared_mask & d->AliveWriteMask) { - /* We set AbortOnRead here because the - * destination register of d->ReaderData->Writer - * is written to in both the IF and the - * ELSE block of this IF/ELSE statement. - * This means that readers of this - * destination register that follow this IF/ELSE - * statement use the value of different - * instructions depending on the control flow - * decisions made by the program. */ - d->ReaderData->AbortOnRead = 1; + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + /* XXX We can do better when we see a BGNLOOP if we + * add a flag called AbortOnWrite to struct + * rc_reader_data and leave it set until the next + * ENDLOOP. */ + case RC_OPCODE_ENDLOOP: + /* XXX We can do better when we see an ENDLOOP by + * searching backwards from writer and looking for + * readers of writer's destination index. If we find a + * reader before we get to the BGNLOOP, we must abort + * unless there is another writer between that reader + * and the BGNLOOP. */ + case RC_OPCODE_BRK: + case RC_OPCODE_CONT: + d->ReaderData->Abort = 1; + return; + case RC_OPCODE_IF: + branch_depth++; + if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[branch_depth].IfWriteMask = + d->AliveWriteMask; + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + struct branch_write_mask * masks = + &d->BranchMasks[branch_depth]; + + if (masks->HasElse) { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~masks->ElseWriteMask; + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ + masks->ElseWriteMask) + & (masks->IfWriteMask + ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, + sizeof(struct branch_write_mask)); + branch_depth--; + } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); } else { - d->AliveWriteMask &= ~shared_mask; + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); } - } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); - d->WriteCB(d->ReaderData, inst, file, index, mask); + if (d->ReaderData->Abort) + return; + + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } } /** @@ -578,83 +792,26 @@ static void get_readers_write_callback( * @param write_cb This function will be called for every instruction after * writer. */ -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, struct rc_instruction * writer, struct rc_reader_data * data, - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb) { - struct rc_instruction * tmp; struct get_readers_callback_data d; - unsigned int branch_depth = 0; - data->Writer = writer; data->Abort = 0; - data->AbortOnRead = 0; - data->InElse = 0; data->ReaderCount = 0; data->ReadersReserved = 0; data->Readers = NULL; d.C = c; - d.AliveWriteMask = writer->U.I.DstReg.WriteMask; d.ReaderData = data; - d.ReadCB = read_cb; + d.ReadNormalCB = read_normal_cb; + d.ReadPairCB = read_pair_cb; d.WriteCB = write_cb; - if (!writer->U.I.DstReg.WriteMask) - return; - - for(tmp = writer->Next; tmp != &c->Program.Instructions; - tmp = tmp->Next){ - rc_opcode opcode = get_flow_control_inst(tmp); - switch(opcode) { - case RC_OPCODE_BGNLOOP: - /* XXX We can do better when we see a BGNLOOP if we - * add a flag called AbortOnWrite to struct - * rc_reader_data and leave it set until the next - * ENDLOOP. */ - case RC_OPCODE_ENDLOOP: - /* XXX We can do better when we see an ENDLOOP by - * searching backwards from writer and looking for - * readers of writer's destination index. If we find a - * reader before we get to the BGNLOOP, we must abort - * unless there is another writer between that reader - * and the BGNLOOP. */ - data->Abort = 1; - return; - case RC_OPCODE_IF: - /* XXX We can do better here, but this will have to - * do until this dataflow analysis is more mature. */ - data->Abort = 1; - branch_depth++; - break; - case RC_OPCODE_ELSE: - if (branch_depth == 0) - data->InElse = 1; - break; - case RC_OPCODE_ENDIF: - if (branch_depth == 0) { - data->AbortOnRead = 1; - data->InElse = 0; - } - else { - branch_depth--; - } - break; - default: - break; - } - - if (!data->InElse) - rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d); - rc_for_all_writes_mask(tmp, get_readers_write_callback, &d); - - if (data->Abort) - return; - - if (!d.AliveWriteMask) - return; - } + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 7de6b98f763..ef971c5b234 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,6 +36,7 @@ struct rc_instruction; struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; struct rc_compiler; @@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, void * userdata); typedef void (*rc_pair_read_arg_fn)(void * userdata, - struct rc_instruction * inst, struct rc_pair_instruction_arg * arg); + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); void rc_pair_for_all_reads_arg(struct rc_instruction * inst, rc_pair_read_arg_fn cb, void * userdata); @@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v struct rc_reader { struct rc_instruction * Inst; unsigned int WriteMask; - struct rc_src_register * Src; + union { + struct rc_src_register * Src; + struct rc_pair_instruction_arg * Arg; + } U; }; struct rc_reader_data { @@ -87,14 +92,13 @@ struct rc_reader_data { void * CbData; }; -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, - struct rc_instruction * inst, + struct rc_instruction * writer, struct rc_reader_data * data, - /*XXX: These should be their own function types. */ - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); - /** * Compiler passes based on dataflow analysis. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index da495a3afaa..25afd272bee 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, @@ -453,6 +460,7 @@ void rc_compute_sources_for_writemask( srcmasks[1] |= RC_MASK_XY; break; case RC_OPCODE_DP3: + case RC_OPCODE_XPD: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; break; @@ -460,6 +468,10 @@ void rc_compute_sources_for_writemask( srcmasks[0] |= RC_MASK_XYZW; srcmasks[1] |= RC_MASK_XYZW; break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; case RC_OPCODE_TXB: case RC_OPCODE_TXP: srcmasks[0] |= RC_MASK_W; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index d3f639c8701..7e666101276 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -50,6 +50,9 @@ typedef enum { /** vec4 instruction: dst.c = ceil(src0.c) */ RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 15b9c5e7dc3..44f4c0fbdc7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct combine.Negate = outer.Negate; } else { combine.Abs = inner.Abs; - combine.Negate = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(outer.Swizzle, chan); - if (swz < 4) - combine.Negate |= GET_BIT(inner.Negate, swz) << chan; - } + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); combine.Negate ^= outer.Negate; } combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); @@ -71,12 +66,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, { rc_register_file file = src->File; struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && - (info->NumSrcRegs > 2 || info->HasTexture)) { + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + *src, + reader_data->Writer->U.I.PreSub.SrcReg[0], + reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -112,11 +108,11 @@ static void src_clobbered_reads_cb( && src->Index == sc_data->Index && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { - sc_data->ReaderData->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { - sc_data->ReaderData->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } } @@ -149,8 +145,9 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i return; /* Get a list of all the readers of this MOV instruction. */ - rc_get_readers_normal(c, inst_mov, &reader_data, - copy_propagate_scan_read, is_src_clobbered_scan_write); + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return; @@ -158,7 +155,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]); + *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]); if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) inst->U.I.PreSub = inst_mov->U.I.PreSub; @@ -423,24 +420,13 @@ static void presub_scan_read( struct rc_src_register * src) { struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. */ - if (info->NumSrcRegs > 2 || info->HasTexture) { - reader_data->Abort = 1; - return; - } + rc_presubtract_op * presub_opcode = reader_data->CbData; - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. - * XXX For now we will limit instructions to only one presubtract - * value.*/ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + *src, + reader_data->Writer->U.I.SrcReg[0], + reader_data->Writer->U.I.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -454,8 +440,10 @@ static int presub_helper( { struct rc_reader_data reader_data; unsigned int i; + rc_presubtract_op cb_op = presub_opcode; - rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read, + reader_data.CbData = &cb_op; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) @@ -468,7 +456,7 @@ static int presub_helper( rc_get_opcode_info(reader.Inst->U.I.Opcode); for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { - if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src) + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src) presub_replace(inst_add, reader.Inst, src_index); } } @@ -505,7 +493,9 @@ static void presub_replace_add( inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } -static int is_presub_candidate(struct rc_instruction * inst) +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; @@ -514,7 +504,12 @@ static int is_presub_candidate(struct rc_instruction * inst) return 0; for(i = 0; i < info->NumSrcRegs; i++) { - if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg)) + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) return 0; } return 1; @@ -528,7 +523,7 @@ static int peephole_add_presub_add( struct rc_src_register * src1 = NULL; unsigned int i; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) @@ -592,7 +587,7 @@ static int peephole_add_presub_inv( { unsigned int i, swz, mask; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; mask = inst_add->U.I.DstReg.WriteMask; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 91524f5ec68..d53181e1f75 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -66,10 +66,13 @@ struct regalloc_state { struct hardware_register * HwTemporary; unsigned int NumHwTemporaries; /** - * If an instruction is inside of a loop, end_loop will be the - * IP of the ENDLOOP instruction, otherwise end_loop will be 0 + * If an instruction is inside of a loop, EndLoop will be the + * IP of the ENDLOOP instruction, and BeginLoop will be the IP + * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop + * will be -1. */ - int end_loop; + int EndLoop; + int BeginLoop; }; static void print_live_intervals(struct live_intervals * src) @@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst, reg->Used = 1; if (file == RC_FILE_INPUT) reg->Live.Start = -1; + else if (s->BeginLoop >= 0) + reg->Live.Start = s->BeginLoop; else reg->Live.Start = inst->IP; reg->Live.End = inst->IP; - } else if (s->end_loop) - reg->Live.End = s->end_loop; + } else if (s->EndLoop >= 0) + reg->Live.End = s->EndLoop; else if (inst->IP > reg->Live.End) reg->Live.End = inst->IP; } @@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c, memset(s, 0, sizeof(*s)); s->C = c; s->NumHwTemporaries = c->max_temp_regs; + s->BeginLoop = -1; + s->EndLoop = -1; s->HwTemporary = memory_pool_malloc(&c->Pool, s->NumHwTemporaries * sizeof(struct hardware_register)); @@ -207,10 +214,12 @@ static void compute_live_intervals(struct radeon_compiler *c, inst = inst->Next) { /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { + * instruction is used as the end of the live interval and + * the BGNLOOP instruction is used as the beginning. */ + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { int loops = 1; struct rc_instruction * tmp; + s->BeginLoop = inst->IP; for(tmp = inst->Next; tmp != &s->C->Program.Instructions; tmp = tmp->Next) { @@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c, } else if (tmp->U.I.Opcode == RC_OPCODE_ENDLOOP) { if(!--loops) { - s->end_loop = tmp->IP; + s->EndLoop = tmp->IP; break; } } } } - if (inst->IP == s->end_loop) - s->end_loop = 0; + if (inst->IP == s->EndLoop) { + s->EndLoop = -1; + s->BeginLoop = -1; + } rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 553e9dcf7c1..9beb5d63579 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" @@ -54,6 +55,11 @@ struct schedule_instruction { * this instruction can be scheduled. */ unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; }; @@ -94,6 +100,16 @@ struct register_state { struct reg_value * Values[4]; }; +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + struct schedule_state { struct radeon_compiler * C; struct schedule_instruction * Current; @@ -126,15 +142,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } -static struct reg_value * get_reg_value(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - if (!pv) - return 0; - return *pv; -} - static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { inst->NextReady = *list; @@ -295,12 +302,12 @@ static int merge_presub_sources( assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); switch(type) { - case RC_PAIR_SOURCE_RGB: + case RC_SOURCE_RGB: is_rgb = 1; is_alpha = 0; dst_sub = &dst_full->RGB; break; - case RC_PAIR_SOURCE_ALPHA: + case RC_SOURCE_ALPHA: is_rgb = 0; is_alpha = 1; dst_sub = &dst_full->Alpha; @@ -341,6 +348,8 @@ static int merge_presub_sources( continue; free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; one_way = 1; } else { dst_sub->Src[free_source] = temp; @@ -356,11 +365,11 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_that_arg_reads( - dst_full->RGB.Arg[arg].Source, - dst_full->RGB.Arg[arg].Swizzle) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, + 3) & type)) { continue; } + if (dst_full->RGB.Arg[arg].Source == srcp_src) dst_full->RGB.Arg[arg].Source = free_source; /* We need to do this just in case register @@ -392,13 +401,13 @@ static int destructive_merge_instructions( /* Merge the rgb presubtract registers. */ if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { return 0; } } /* Merge the alpha presubtract registers */ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){ + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ return 0; } } @@ -525,6 +534,222 @@ static void presub_nop(struct rc_instruction * emitted) { } } } + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + struct rc_pair_instruction_source * old_src = + rc_pair_get_src(&inst->U.P, arg); + if (!old_src) { + return; + } + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = 1; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = swz; + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + /** * Find a good ALU instruction or pair of ALU instruction and emit it. * @@ -536,24 +761,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor { struct schedule_instruction * sinst; - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); } else { struct schedule_instruction **prgb; struct schedule_instruction **palpha; - + struct schedule_instruction *prev; +pair: /* Some pairings might fail because they require too * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { @@ -572,10 +789,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor goto success; } } - - /* No success in pairing; just take the first RGB instruction */ - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); @@ -591,13 +841,13 @@ static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; - struct reg_value * v = get_reg_value(s, file, index, chan); + struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; - if (v->Writer == s->Current) { + if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. */ @@ -608,16 +858,28 @@ static void scan_read(void * data, struct rc_instruction * inst, reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; - reader->Next = v->Readers; - v->Readers = reader; - v->NumReaders++; - - s->Current->NumDependencies++; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { - s->Current->ReadValues[s->Current->NumReadValues++] = v; + s->Current->ReadValues[s->Current->NumReadValues++] = *v; } } @@ -652,6 +914,16 @@ static void scan_write(void * data, struct rc_instruction * inst, } } +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { @@ -683,6 +955,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c, if (!s.Current->NumDependencies) instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ @@ -711,8 +988,13 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { + struct schedule_state s; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index c549be52183..fc05366f50e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, pair->RGB.DestIndex = inst->DstReg.Index; pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; } + if (needalpha) { - pair->Alpha.DestIndex = inst->DstReg.Index; pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 24b685fbeb4..fe5756ebc45 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "radeon_compiler.h" +#include "radeon_dataflow.h" /** @@ -70,58 +71,98 @@ void rc_local_transform( } } +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_used_temporaries_data * d = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= d->UsedLength) + return; + + d->Used[index] |= mask; +} + /** - * Left multiplication of a register with a swizzle + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. */ -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); + } +} + +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) { - struct rc_src_register tmp = srcreg; int i; - tmp.Swizzle = 0; - tmp.Negate = 0; - for(i = 0; i < 4; ++i) { - rc_swizzle swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; + return i; } } - return tmp; + return -1; } unsigned int rc_find_free_temporary(struct radeon_compiler * c) { - char used[RC_REGISTER_MAX_INDEX]; - unsigned int i; - struct rc_instruction * rcinst; + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; memset(used, 0, sizeof(used)); - for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct rc_sub_instruction *inst = &rcinst->U.I; - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); - unsigned int k; - - for (k = 0; k < opcode->NumSrcRegs; k++) { - if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) - used[inst->SrcReg[k].Index] = 1; - } - - if (opcode->HasDstReg) { - if (inst->DstReg.File == RC_FILE_TEMPORARY) - used[inst->DstReg.Index] = 1; - } - } + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); - for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (!used[i]) - return i; + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; } - - rc_error(c, "Ran out of temporary registers\n"); - return 0; + return free; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index f0a77d7b539..df6c94b35f9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -159,47 +159,6 @@ struct rc_program { struct rc_constant_list Constants; }; -static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -static inline unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) -{ - unsigned int ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) -{ - unsigned int ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; - - return ret; -} - -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); - -static inline void reset_srcreg(struct rc_src_register* reg) -{ - memset(reg, 0, sizeof(struct rc_src_register)); - reg->Swizzle = RC_SWIZZLE_XYZW; -} - - /** * A transformation that can be passed to \ref rc_local_transform. * @@ -222,6 +181,17 @@ void rc_local_transform( struct radeon_compiler *c, void *user); +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); @@ -233,4 +203,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c); void rc_print_program(const struct rc_program *prog); +rc_swizzle rc_mask_to_swizzle(unsigned int mask); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 39408845d5a..58977a40c7c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -36,6 +36,7 @@ #include "radeon_program_alu.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" static struct rc_instruction *emit1( @@ -84,16 +85,6 @@ static struct rc_instruction *emit3( return fpi; } -static struct rc_dst_register dstreg(int file, int index) -{ - struct rc_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = RC_MASK_XYZW; - dst.RelAddr = 0; - return dst; -} - static struct rc_dst_register dstregtmpmask(int index, int mask) { struct rc_dst_register dst = {0}; @@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg) return swizzle_smear(reg, RC_SWIZZLE_W); } +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c, * ceil(x) = x+frac(-x) */ - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c, static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c, static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; @@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c, static void transform_SEQ(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c, static void transform_SGE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c, static void transform_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c, static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c, static void transform_SLT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c, * CMP tmp1, x, 1, 0 * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0; + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, negate(inst->U.I.SrcReg[0]), builtin_one, builtin_zero); @@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c, static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -553,6 +592,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and two extra slots + * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * @@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ - int tempreg0 = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), + dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -642,7 +682,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, @@ -650,16 +690,16 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, /* MOV dst, src */ emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[0]); /* MAX dst.z, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(tempreg, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, tempreg), + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } static void transform_r300_vertex_SEQ(struct radeon_compiler *c, @@ -743,12 +783,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, builtin_zero, inst->U.I.SrcReg[0]); @@ -763,7 +804,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -781,6 +822,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 9dcd44c522d..45f79ece5ba 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ return 0; } } + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index a21fe8d3df8..5905d26e521 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -27,6 +27,9 @@ #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + +#include <stdlib.h> /** * Return the source slot where we installed the given register access, @@ -204,24 +207,37 @@ void rc_pair_foreach_source_that_rgb_reads( } } -/*return 0 for rgb, 1 for alpha -1 for error. */ - -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle) +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) { - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_PAIR_SOURCE_NONE; - - for(chan = 0; chan < 3; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_PAIR_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_PAIR_SOURCE_RGB; + unsigned int i, type; + unsigned int channels = 0; + + for(i = 0; i < 3; i++) { + if (arg == pair_inst->RGB.Arg + i) { + channels = 3; + break; } } - return ret; + + if (channels == 0) { + for (i = 0; i < 3; i++) { + if (arg == pair_inst->Alpha.Arg + i) { + channels = 1; + break; + } + } + } + + assert(channels > 0); + type = rc_source_type_swz(arg->Swizzle, channels); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 54d44a2098b..ccf7a0070cd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -55,10 +55,6 @@ struct radeon_compiler; */ #define RC_PAIR_PRESUB_SRC 3 -#define RC_PAIR_SOURCE_NONE 0x0 -#define RC_PAIR_SOURCE_RGB 0x1 -#define RC_PAIR_SOURCE_ALPHA 0x2 - struct rc_pair_instruction_source { unsigned int Used:1; unsigned int File:3; @@ -115,9 +111,9 @@ void rc_pair_foreach_source_that_rgb_reads( void * data, rc_pair_foreach_src_fn cb); -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle); +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 618ab5a099b..ae13f6742f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz) case RC_SWIZZLE_HALF: return 'H'; case RC_SWIZZLE_UNUSED: return '_'; } + fprintf(stderr, "bad swz: %u\n", swz); return '?'; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 530afa5e08e..f9d9f34b6ad 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -28,6 +28,8 @@ #include "radeon_program_tex.h" +#include "radeon_compiler_util.h" + /* Series of transformations to be done on textures. */ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index 5f67f536f61..7d76585a593 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) rc_for_all_reads_src(inst, mark_used, &d); } - /* Pass 2: If there is relative addressing, mark all externals as used. */ - if (has_rel_addr) { + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { for (unsigned i = 0; i < c->Program.Constants.Count; i++) if (constants[i].Type == RC_CONSTANT_EXTERNAL) const_used[i] = 1; @@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) /* is_identity ==> new_count == old_count * !is_identity ==> new_count < old_count */ assert( is_identity || new_count < c->Program.Constants.Count); - assert(!(has_rel_addr && are_externals_remapped)); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); /* Pass 4: Redirect reads of all constants to their new locations. */ if (!is_identity) { @@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) inst != &c->Program.Instructions; inst = inst->Next) { rc_remap_registers(inst, remap_regs, inv_remap_table); } - } /* Set the new constant count. Note that new_count may be less than diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 60e228be5bd..88165f78953 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -33,100 +33,51 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" - -struct reg_rename { - int old_index; - int new_index; - int temp_index; -}; - -static void rename_reg(void * data, struct rc_instruction * inst, - rc_register_file * file, unsigned int * index) -{ - struct reg_rename *r = data; - - if(r->old_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->new_index; - } - else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->temp_index; - } -} - -static void rename_all( - struct radeon_compiler *c, - struct rc_instruction * start, - unsigned int old, - unsigned int new, - unsigned int temp) -{ - struct rc_instruction * inst; - struct reg_rename r; - r.old_index = old; - r.new_index = new; - r.temp_index = temp; - for(inst = start; inst != &c->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, rename_reg, &r); - } -} +#include "radeon_program.h" /** * This function renames registers in an attempt to get the code close to * SSA form. After this function has completed, most of the register are only - * written to one time, with a few exceptions. For example, this block of code - * will not be modified by this function: - * Mov Temp[0].x Const[0].x - * Mov Temp[0].y Const[0].y - * Basically, destination registers will be renamed if: - * 1. There have been no previous writes to that register - * or - * 2. If the instruction is writting to the exact components (no more, no less) - * of a register that has been written to by previous instructions. + * written to one time, with a few exceptions. * * This function assumes all the instructions are still of type * RC_INSTRUCTION_NORMAL. */ void rc_rename_regs(struct radeon_compiler *c, void *user) { - unsigned int cur_index = 0; - unsigned int icount; + unsigned int i, used_length; + int new_index; struct rc_instruction * inst; - unsigned int * masks; + struct rc_reader_data reader_data; + unsigned char * used; - /* The number of instructions in the program is also the maximum - * number of temp registers that could potentially be used. */ - icount = rc_recompute_ips(c); - masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int)); - memset(masks, 0, icount * sizeof(unsigned int)); + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + rc_get_used_temporaries(c, used, used_length); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * info; - unsigned int old_index, temp_index; - struct rc_dst_register * dst; - if(inst->Type != RC_INSTRUCTION_NORMAL) { - rc_error(c, "%s only works with normal instructions.", - __FUNCTION__); - return; - } - dst = &inst->U.I.DstReg; - info = rc_get_opcode_info(inst->U.I.Opcode); - if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) continue; + + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; } - if(dst->Index >= icount || !masks[dst->Index] || - masks[dst->Index] == dst->WriteMask) { - old_index = dst->Index; - /* We need to set dst->Index here so get free temporary - * will work. */ - dst->Index = cur_index++; - temp_index = rc_find_free_temporary(c); - rename_all(c, inst->Next, old_index, - dst->Index, temp_index); + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.Src->Index = new_index; } - assert(dst->Index < icount); - masks[dst->Index] |= dst->WriteMask; } } diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c index 2c9e4e2b844..53dacbfdf39 100644 --- a/src/mesa/drivers/dri/r600/evergreen_chip.c +++ b/src/mesa/drivers/dri/r600/evergreen_chip.c @@ -286,7 +286,11 @@ static void evergreenSetupVTXConstants(struct gl_context * ctx, if (!paos->bo) return; - r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM)) + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); //uSQ_VTX_CONSTANT_WORD0_0 uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c index a77be183a12..076a608573c 100644 --- a/src/mesa/drivers/dri/r600/evergreen_state.c +++ b/src/mesa/drivers/dri/r600/evergreen_state.c @@ -1461,6 +1461,14 @@ static void evergreenInitSQConfig(struct gl_context * ctx) uMaxThreads = 248; uMaxStackEntries = 512; break; + case CHIP_FAMILY_PALM: + uSqNumCfInsts = 1; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 96; + uMaxThreads = 192; + uMaxStackEntries = 256; + break; default: uSqNumCfInsts = 2; bVC_ENABLE = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 2f4c92d6767..3b5448a0e4e 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -31,7 +31,6 @@ #include "main/enums.h" #include "main/image.h" #include "main/teximage.h" -#include "main/mipmap.h" #include "main/simple_list.h" #include "main/texobj.h" diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index b6443bf0c53..aa1891eac32 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -259,7 +259,7 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) - &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_HEMLOCK) ) + &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_PALM) ) { r700->bShaderUseMemConstant = GL_TRUE; } @@ -285,8 +285,13 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ - ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ + if (screen->chip_family >= CHIP_FAMILY_CEDAR) { + ctx->Const.MaxTextureLevels = 15; + ctx->Const.MaxTextureRectSize = 16384; + } else { + ctx->Const.MaxTextureLevels = 14; + ctx->Const.MaxTextureRectSize = 8192; + } ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2bf24096a0d..1fa559cec1a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3334,7 +3334,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGE; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + } pAsm->D.dst.op3 = 1; tmp = (-1); @@ -3416,8 +3423,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) checkop1(pAsm); tmp = gethelpr(pAsm); - - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -3457,7 +3470,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4742,7 +4762,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4782,7 +4809,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -5010,7 +5044,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) GLuint tmp = gethelpr(pAsm); /* tmp = (src > 0 ? 1 : src) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -5033,7 +5074,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) } /* dst = (-tmp > 0 ? -1 : tmp) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; if( GL_FALSE == assemble_dst(pAsm) ) diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 61106fbc43f..82789cec5ed 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -440,6 +440,11 @@ #define PCI_CHIP_HEMLOCK_689C 0x689C #define PCI_CHIP_HEMLOCK_689D 0x689D +#define PCI_CHIP_PALM_9802 0x9802 +#define PCI_CHIP_PALM_9803 0x9803 +#define PCI_CHIP_PALM_9804 0x9804 +#define PCI_CHIP_PALM_9805 0x9805 + enum { CHIP_FAMILY_R100, CHIP_FAMILY_RV100, @@ -483,6 +488,7 @@ enum { CHIP_FAMILY_JUNIPER, CHIP_FAMILY_CYPRESS, CHIP_FAMILY_HEMLOCK, + CHIP_FAMILY_PALM, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index fecdd119059..ca6ab46ca43 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -99,6 +99,7 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_JUNIPER: return "JUNIPER"; case CHIP_FAMILY_CYPRESS: return "CYPRESS"; case CHIP_FAMILY_HEMLOCK: return "HEMLOCK"; + case CHIP_FAMILY_PALM: return "PALM"; default: return "unknown"; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h index 088f9701722..a68a9768779 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h @@ -49,7 +49,7 @@ struct _radeon_mipmap_level { }; /* store the max possible in the miptree */ -#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13 +#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 15 /** * A mipmap tree contains texture images in the layout that the hardware diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index b379240579d..94e56c2ade6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1155,6 +1155,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_PALM_9802: + case PCI_CHIP_PALM_9803: + case PCI_CHIP_PALM_9804: + case PCI_CHIP_PALM_9805: + screen->chip_family = CHIP_FAMILY_PALM; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", device_id); diff --git a/src/mesa/drivers/dri/sis/server/sis_dri.h b/src/mesa/drivers/dri/sis/server/sis_dri.h index f0171f3c0f8..7d8f507115d 100644 --- a/src/mesa/drivers/dri/sis/server/sis_dri.h +++ b/src/mesa/drivers/dri/sis/server/sis_dri.h @@ -72,13 +72,4 @@ typedef struct { int dummy; } SISDRIContextRec, *SISDRIContextPtr; -#ifdef XFree86Server - -#include "screenint.h" - -Bool SISDRIScreenInit(ScreenPtr pScreen); -void SISDRICloseScreen(ScreenPtr pScreen); -Bool SISDRIFinishScreenInit(ScreenPtr pScreen); - -#endif #endif diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.h b/src/mesa/drivers/dri/tdfx/tdfx_context.h index fb38419dcdd..7e2f0e00a8e 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_context.h +++ b/src/mesa/drivers/dri/tdfx/tdfx_context.h @@ -41,11 +41,7 @@ #include <sys/time.h> #include "dri_util.h" -#ifdef XFree86Server -#include "GL/xf86glx.h" -#else #include "main/glheader.h" -#endif #if defined(__linux__) #include <signal.h> #endif diff --git a/src/mesa/drivers/dri/unichrome/server/via_dri.h b/src/mesa/drivers/dri/unichrome/server/via_dri.h index b47397d5728..c6eed03c1c9 100644 --- a/src/mesa/drivers/dri/unichrome/server/via_dri.h +++ b/src/mesa/drivers/dri/unichrome/server/via_dri.h @@ -35,9 +35,7 @@ #define VIA_DRIDDX_VERSION_MINOR 0 #define VIA_DRIDDX_VERSION_PATCH 0 -#ifndef XFree86Server typedef int Bool; -#endif typedef struct { drm_handle_t handle; diff --git a/src/mesa/drivers/windows/gdi/InitCritSections.cpp b/src/mesa/drivers/windows/gdi/InitCritSections.cpp index 7145bffa510..69f03b8e47c 100644 --- a/src/mesa/drivers/windows/gdi/InitCritSections.cpp +++ b/src/mesa/drivers/windows/gdi/InitCritSections.cpp @@ -1,7 +1,8 @@ #include "glapi.h" #include "glThread.h" -#ifdef WIN32_THREADS +#ifdef WIN32 + extern "C" _glthread_Mutex OneTimeLock; extern "C" _glthread_Mutex GenTexturesLock; @@ -29,4 +30,4 @@ public: _CriticalSectionInit _CriticalSectionInit::m_inst; -#endif +#endif /* WIN32 */ diff --git a/src/mesa/drivers/x11/glxheader.h b/src/mesa/drivers/x11/glxheader.h index d88afba20e7..ee002191bc0 100644 --- a/src/mesa/drivers/x11/glxheader.h +++ b/src/mesa/drivers/x11/glxheader.h @@ -32,13 +32,6 @@ #include "main/glheader.h" -#ifdef XFree86Server - -# include "xorg-server.h" -# include "resource.h" -# include "windowstr.h" - -#else # include <X11/Xlib.h> # include <X11/Xlibint.h> @@ -51,7 +44,6 @@ # include <GL/glx.h> # include <sys/time.h> -#endif diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 00ceb960c62..b5eabadf486 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -158,14 +158,12 @@ static short hpcr_rgbTbl[3][256] = { /** * Return the host's byte order as LSBFirst or MSBFirst ala X. */ -#ifndef XFree86Server static int host_byte_order( void ) { int i = 1; char *cptr = (char *) &i; return (*cptr==1) ? LSBFirst : MSBFirst; } -#endif /** @@ -176,7 +174,7 @@ static int host_byte_order( void ) */ static int check_for_xshm( XMesaDisplay *display ) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) int major, minor, ignore; Bool pixmaps; @@ -227,16 +225,6 @@ gamma_adjust( GLfloat gamma, GLint value, GLint max ) static int bits_per_pixel( XMesaVisual xmv ) { -#ifdef XFree86Server - const int depth = xmv->nplanes; - int i; - assert(depth > 0); - for (i = 0; i < screenInfo.numPixmapFormats; i++) { - if (screenInfo.formats[i].depth == depth) - return screenInfo.formats[i].bitsPerPixel; - } - return depth; /* should never get here, but this should be safe */ -#else XMesaDisplay *dpy = xmv->display; XMesaVisualInfo visinfo = xmv->visinfo; XMesaImage *img; @@ -257,7 +245,6 @@ bits_per_pixel( XMesaVisual xmv ) img->data = NULL; XMesaDestroyImage( img ); return bitsPerPixel; -#endif } @@ -271,7 +258,6 @@ bits_per_pixel( XMesaVisual xmv ) * Return: GL_TRUE - window exists * GL_FALSE - window doesn't exist */ -#ifndef XFree86Server static GLboolean WindowExistsFlag; static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr ) @@ -306,7 +292,6 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, GLuint *width, GLuint *height *height = h; return stat; } -#endif /** @@ -319,10 +304,6 @@ void xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, GLuint *width, GLuint *height) { -#ifdef XFree86Server - *width = MIN2(b->frontxrb->drawable->width, MAX_WIDTH); - *height = MIN2(b->frontxrb->drawable->height, MAX_HEIGHT); -#else Status stat; _glthread_LOCK_MUTEX(_xmesa_lock); @@ -335,7 +316,6 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, _mesa_warning(NULL, "XGetGeometry failed!\n"); *width = *height = 1; } -#endif } @@ -549,16 +529,11 @@ noFaultXAllocColor( int client, XMesaColor *color, int *exact, int *alloced ) { -#ifdef XFree86Server - Pixel *ppixIn; - xrgb *ctable; -#else /* we'll try to cache ctable for better remote display performance */ static Display *prevDisplay = NULL; static XMesaColormap prevCmap = 0; static int prevCmapSize = 0; static XMesaColor *ctable = NULL; -#endif XMesaColor subColor; int i, bestmatch; double mindist; /* 3*2^16^2 exceeds long int precision. */ @@ -566,14 +541,7 @@ noFaultXAllocColor( int client, (void) client; /* First try just using XAllocColor. */ -#ifdef XFree86Server - if (AllocColor(cmap, - &color->red, &color->green, &color->blue, - &color->pixel, - client) == Success) -#else if (XAllocColor(dpy, cmap, color)) -#endif { *exact = 1; *alloced = 1; @@ -584,14 +552,6 @@ noFaultXAllocColor( int client, /* Retrieve color table entries. */ /* XXX alloca candidate. */ -#ifdef XFree86Server - ppixIn = (Pixel *) MALLOC(cmapSize * sizeof(Pixel)); - ctable = (xrgb *) MALLOC(cmapSize * sizeof(xrgb)); - for (i = 0; i < cmapSize; i++) { - ppixIn[i] = i; - } - QueryColors(cmap, cmapSize, ppixIn, ctable); -#else if (prevDisplay != dpy || prevCmap != cmap || prevCmapSize != cmapSize || !ctable) { /* free previously cached color table */ @@ -608,7 +568,6 @@ noFaultXAllocColor( int client, prevCmap = cmap; prevCmapSize = cmapSize; } -#endif /* Find best match. */ bestmatch = -1; @@ -632,14 +591,7 @@ noFaultXAllocColor( int client, * fail if the cell is read/write. Otherwise, we're incrementing * the cell's reference count. */ -#ifdef XFree86Server - if (AllocColor(cmap, - &subColor.red, &subColor.green, &subColor.blue, - &subColor.pixel, - client) == Success) { -#else if (XAllocColor(dpy, cmap, &subColor)) { -#endif *alloced = 1; } else { @@ -651,12 +603,7 @@ noFaultXAllocColor( int client, subColor.flags = DoRed | DoGreen | DoBlue; *alloced = 0; } -#ifdef XFree86Server - free(ppixIn); - free(ctable); -#else /* don't free table, save it for next time */ -#endif *color = subColor; *exact = 0; @@ -873,10 +820,8 @@ setup_8bit_hpcr(XMesaVisual v) v->hpcr_clear_pixmap = XMesaCreatePixmap(v->display, DefaultRootWindow(v->display), 16, 2, 8); -#ifndef XFree86Server v->hpcr_clear_ximage = XGetImage(v->display, v->hpcr_clear_pixmap, 0, 0, 16, 2, AllPlanes, ZPixmap); -#endif } } @@ -1049,9 +994,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, int client = 0; const int xclass = v->visualType; -#ifdef XFree86Server - client = (window) ? CLIENT_ID(window->id) : 0; -#endif ASSERT(!b || b->xm_visual == v); @@ -1120,40 +1062,23 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, } /* X11 graphics contexts */ -#ifdef XFree86Server - b->gc = CreateScratchGC(v->display, window->depth); -#else b->gc = XCreateGC( v->display, window, 0, NULL ); -#endif XMesaSetFunction( v->display, b->gc, GXcopy ); /* cleargc - for glClear() */ -#ifdef XFree86Server - b->cleargc = CreateScratchGC(v->display, window->depth); -#else b->cleargc = XCreateGC( v->display, window, 0, NULL ); -#endif XMesaSetFunction( v->display, b->cleargc, GXcopy ); /* * Don't generate Graphics Expose/NoExpose events in swapbuffers(). * Patch contributed by Michael Pichler May 15, 1995. */ -#ifdef XFree86Server - b->swapgc = CreateScratchGC(v->display, window->depth); - { - CARD32 v[1]; - v[0] = FALSE; - dixChangeGC(NullClient, b->swapgc, GCGraphicsExposures, v, NULL); - } -#else { XGCValues gcvalues; gcvalues.graphics_exposures = False; b->swapgc = XCreateGC(v->display, window, GCGraphicsExposures, &gcvalues); } -#endif XMesaSetFunction( v->display, b->swapgc, GXcopy ); /* * Set fill style and tile pixmap once for all for HPCR stuff @@ -1175,9 +1100,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, /* Initialize the row buffer XImage for use in write_color_span() */ data = (char*) MALLOC(MAX_WIDTH*4); -#ifdef XFree86Server - b->rowimage = XMesaCreateImage(GET_VISUAL_DEPTH(v), MAX_WIDTH, 1, data); -#else b->rowimage = XCreateImage( v->display, v->visinfo->visual, v->visinfo->depth, @@ -1186,7 +1108,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, MAX_WIDTH, 1, /*width, height*/ 32, /*bitmap_pad*/ 0 /*bytes_per_line*/ ); -#endif if (!b->rowimage) return GL_FALSE; } @@ -1334,7 +1255,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, XMesaVisual v; GLint red_bits, green_bits, blue_bits, alpha_bits; -#ifndef XFree86Server /* For debugging only */ if (_mesa_getenv("MESA_XSYNC")) { /* This makes debugging X easier. @@ -1343,7 +1263,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, */ XSynchronize( display, 1 ); } -#endif /* Color-index rendering not supported. */ if (!rgb_flag) @@ -1360,14 +1279,12 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, * the struct but we may need some of the information contained in it * at a later time. */ -#ifndef XFree86Server v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo)); if(!v->visinfo) { free(v); return NULL; } memcpy(v->visinfo, visinfo, sizeof(*visinfo)); -#endif /* check for MESA_GAMMA environment variable */ gamma = _mesa_getenv("MESA_GAMMA"); @@ -1384,30 +1301,13 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, v->ximage_flag = ximage_flag; -#ifdef XFree86Server - /* We could calculate these values by ourselves. nplanes is either the sum - * of the red, green, and blue bits or the number index bits. - * ColormapEntries is either (1U << index_bits) or - * (1U << max(redBits, greenBits, blueBits)). - */ - assert(visinfo->nplanes > 0); - v->nplanes = visinfo->nplanes; - v->ColormapEntries = visinfo->ColormapEntries; - - v->mesa_visual.redMask = visinfo->redMask; - v->mesa_visual.greenMask = visinfo->greenMask; - v->mesa_visual.blueMask = visinfo->blueMask; - v->visualID = visinfo->vid; - v->screen = 0; /* FIXME: What should be done here? */ -#else v->mesa_visual.redMask = visinfo->red_mask; v->mesa_visual.greenMask = visinfo->green_mask; v->mesa_visual.blueMask = visinfo->blue_mask; v->visualID = visinfo->visualid; v->screen = visinfo->screen; -#endif -#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus)) +#if !(defined(__cplusplus) || defined(c_plusplus)) v->visualType = xmesa_convert_from_x_visual_type(visinfo->class); #else v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); @@ -1461,9 +1361,7 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, PUBLIC void XMesaDestroyVisual( XMesaVisual v ) { -#ifndef XFree86Server free(v->visinfo); -#endif free(v); } @@ -1532,12 +1430,6 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) _mesa_enable_extension(mesaCtx, "GL_EXT_timer_query"); #endif -#ifdef XFree86Server - /* If we're running in the X server, do bounds checking to prevent - * segfaults and server crashes! - */ - mesaCtx->Const.CheckArrayBounds = GL_TRUE; -#endif /* finish up xmesa context initializations */ c->swapbytes = CHECK_BYTE_ORDER(v) ? GL_FALSE : GL_TRUE; @@ -1602,9 +1494,7 @@ void XMesaDestroyContext( XMesaContext c ) PUBLIC XMesaBuffer XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) { -#ifndef XFree86Server XWindowAttributes attr; -#endif XMesaBuffer b; XMesaColormap cmap; int depth; @@ -1613,12 +1503,8 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) assert(w); /* Check that window depth matches visual depth */ -#ifdef XFree86Server - depth = ((XMesaDrawable)w)->depth; -#else XGetWindowAttributes( v->display, w, &attr ); depth = attr.depth; -#endif if (GET_VISUAL_DEPTH(v) != depth) { _mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n", GET_VISUAL_DEPTH(v), depth); @@ -1626,9 +1512,6 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) } /* Find colormap */ -#ifdef XFree86Server - cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP); -#else if (attr.colormap) { cmap = attr.colormap; } @@ -1638,7 +1521,6 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) /* OK, let's just allocate a new one and hope for the best */ cmap = XCreateColormap(v->display, w, attr.visual, AllocNone); } -#endif b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap); if (!b) @@ -1748,7 +1630,6 @@ XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, unsigned int width, unsigned int height) { -#ifndef XFree86Server XMesaWindow root; XMesaDrawable drawable; /* X Pixmap Drawable */ XMesaBuffer b; @@ -1770,9 +1651,6 @@ XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, } return b; -#else - return 0; -#endif } @@ -1931,40 +1809,6 @@ XMesaBuffer XMesaGetCurrentReadBuffer( void ) } -#ifdef XFree86Server -PUBLIC -GLboolean XMesaForceCurrent(XMesaContext c) -{ - if (c) { - _glapi_set_dispatch(c->mesa.CurrentDispatch); - - if (&(c->mesa) != _mesa_get_current_context()) { - _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer); - } - } - else { - _mesa_make_current(NULL, NULL, NULL); - } - return GL_TRUE; -} - - -PUBLIC -GLboolean XMesaLoseCurrent(XMesaContext c) -{ - (void) c; - _mesa_make_current(NULL, NULL, NULL); - return GL_TRUE; -} - - -PUBLIC -GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask ) -{ - _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask); - return GL_TRUE; -} -#endif /* XFree86Server */ #ifndef FX @@ -2004,7 +1848,7 @@ void XMesaSwapBuffers( XMesaBuffer b ) #endif if (b->backxrb->ximage) { /* Copy Ximage (back buf) from client memory to server window */ -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { /*_glthread_LOCK_MUTEX(_xmesa_lock);*/ XShmPutImage( b->xm_visual->display, b->frontxrb->drawable, @@ -2041,9 +1885,7 @@ void XMesaSwapBuffers( XMesaBuffer b ) if (b->swAlpha) _mesa_copy_soft_alpha_renderbuffers(ctx, &b->mesa_buffer); } -#if !defined(XFree86Server) XSync( b->xm_visual->display, False ); -#endif } @@ -2074,7 +1916,7 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) #endif if (b->backxrb->ximage) { /* Copy Ximage from host's memory to server's window */ -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { /* XXX assuming width and height aren't too large! */ XShmPutImage( b->xm_visual->display, b->frontxrb->drawable, @@ -2116,7 +1958,6 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) * Return: GL_TRUE = context is double buffered * GL_FALSE = context is single buffered */ -#ifndef XFree86Server GLboolean XMesaGetBackBuffer( XMesaBuffer b, XMesaPixmap *pixmap, XMesaImage **ximage ) @@ -2134,7 +1975,6 @@ GLboolean XMesaGetBackBuffer( XMesaBuffer b, return GL_FALSE; } } -#endif /* XFree86Server */ /* @@ -2171,11 +2011,7 @@ GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height, void XMesaFlush( XMesaContext c ) { if (c && c->xm_visual) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else XSync( c->xm_visual->display, False ); -#endif } } @@ -2234,15 +2070,11 @@ void XMesaGarbageCollect( void ) for (b=XMesaBufferList; b; b=next) { next = b->Next; if (b->display && b->frontxrb->drawable && b->type == WINDOW) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else XSync(b->display, False); if (!window_exists( b->display, b->frontxrb->drawable )) { /* found a dead window, free the ancillary info */ XMesaDestroyBuffer( b ); } -#endif } } } diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c index 2683bd44d19..10829b4284f 100644 --- a/src/mesa/drivers/x11/xm_buffer.c +++ b/src/mesa/drivers/x11/xm_buffer.c @@ -37,7 +37,7 @@ #include "main/renderbuffer.h" -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) static volatile int mesaXErrorFlag = 0; /** @@ -170,7 +170,7 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) if (b->db_mode == BACK_XIMAGE) { /* Deallocate the old backxrb->ximage, if any */ if (b->backxrb->ximage) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { XShmDetach(b->xm_visual->display, &b->shminfo); XDestroyImage(b->backxrb->ximage); @@ -188,10 +188,6 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) /* Allocate new back buffer */ if (b->shm == 0 || !alloc_back_shm_ximage(b, width, height)) { /* Allocate a regular XImage for the back buffer. */ -#ifdef XFree86Server - b->backxrb->ximage = XMesaCreateImage(b->xm_visual->BitsPerPixel, - width, height, NULL); -#else b->backxrb->ximage = XCreateImage(b->xm_visual->display, b->xm_visual->visinfo->visual, GET_VISUAL_DEPTH(b->xm_visual), @@ -199,7 +195,6 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) NULL, width, height, 8, 0); /* pad, bytes_per_line */ -#endif if (!b->backxrb->ximage) { _mesa_warning(NULL, "alloc_back_buffer: XCreateImage failed.\n"); return; @@ -359,16 +354,8 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb) if (b->num_alloced > 0) { /* If no other buffer uses this X colormap then free the colors. */ if (!xmesa_find_buffer(b->display, b->cmap, b)) { -#ifdef XFree86Server - int client = 0; - if (b->frontxrb->drawable) - client = CLIENT_ID(b->frontxrb->drawable->id); - (void)FreeColors(b->cmap, client, - b->num_alloced, b->alloced_colors, 0); -#else XFreeColors(b->display, b->cmap, b->alloced_colors, b->num_alloced, 0); -#endif } } @@ -382,7 +369,7 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb) if (fb->Visual.doubleBufferMode) { /* free back ximage/pixmap/shmregion */ if (b->backxrb->ximage) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { XShmDetach( b->display, &b->shminfo ); XDestroyImage( b->backxrb->ximage ); diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index acece2025cf..b8d9e20c426 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -93,16 +93,12 @@ const int xmesa_kernel1[16] = { static void finish_or_flush( struct gl_context *ctx ) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else const XMesaContext xmesa = XMESA_CONTEXT(ctx); if (xmesa) { _glthread_LOCK_MUTEX(_xmesa_lock); XSync( xmesa->display, False ); _glthread_UNLOCK_MUTEX(_xmesa_lock); } -#endif } @@ -388,7 +384,6 @@ clear_buffers(struct gl_context *ctx, GLbitfield buffers) } -#ifndef XFree86Server /* XXX these functions haven't been tested in the Xserver environment */ @@ -731,7 +726,6 @@ xmesa_CopyPixels( struct gl_context *ctx, } } -#endif /* XFree86Server */ @@ -745,17 +739,9 @@ get_string( struct gl_context *ctx, GLenum name ) (void) ctx; switch (name) { case GL_RENDERER: -#ifdef XFree86Server - return (const GLubyte *) "Mesa GLX Indirect"; -#else return (const GLubyte *) "Mesa X11"; -#endif case GL_VENDOR: -#ifdef XFree86Server - return (const GLubyte *) "Mesa project: www.mesa3d.org"; -#else return NULL; -#endif default: return NULL; } @@ -948,43 +934,6 @@ xmesa_update_state( struct gl_context *ctx, GLbitfield new_state ) /** - * Called via ctx->Driver.TestProxyTeximage(). Normally, we'd just use - * the _mesa_test_proxy_teximage() fallback function, but we're going to - * special-case the 3D texture case to allow textures up to 512x512x32 - * texels. - */ -static GLboolean -test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level, - GLint internalFormat, GLenum format, GLenum type, - GLint width, GLint height, GLint depth, GLint border) -{ - if (target == GL_PROXY_TEXTURE_3D) { - /* special case for 3D textures */ - if (width * height * depth > 512 * 512 * 64 || - width < 2 * border || - (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || - height < 2 * border || - (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(height - 2 * border) != 1) || - depth < 2 * border || - (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(depth - 2 * border) != 1)) { - /* Bad size, or too many texels */ - return GL_FALSE; - } - return GL_TRUE; - } - else { - /* use the fallback routine for 1D, 2D, cube and rect targets */ - return _mesa_test_proxy_teximage(ctx, target, level, internalFormat, - format, type, width, height, depth, - border); - } -} - - -/** * In SW, we don't really compress GL_COMPRESSED_RGB[A] textures! */ static gl_format @@ -1124,7 +1073,6 @@ xmesa_init_driver_functions( XMesaVisual xmvisual, } else { driver->Clear = clear_buffers; -#ifndef XFree86Server driver->CopyPixels = xmesa_CopyPixels; if (xmvisual->undithered_pf == PF_8R8G8B && xmvisual->dithered_pf == PF_8R8G8B && @@ -1134,9 +1082,8 @@ xmesa_init_driver_functions( XMesaVisual xmvisual, else if (xmvisual->undithered_pf == PF_5R6G5B) { driver->DrawPixels = xmesa_DrawPixels_5R6G5B; } -#endif } - driver->TestProxyTexImage = test_proxy_teximage; + #if ENABLE_EXT_texure_compression_s3tc driver->ChooseTextureFormat = choose_tex_format; #else diff --git a/src/mesa/drivers/x11/xm_glide.c b/src/mesa/drivers/x11/xm_glide.c index cbd69b011a1..d8a0e6de6d0 100644 --- a/src/mesa/drivers/x11/xm_glide.c +++ b/src/mesa/drivers/x11/xm_glide.c @@ -140,16 +140,8 @@ static void FXgetImage( XMesaBuffer b ) GLuint x, y; GLuint width, height; -#ifdef XFree86Server - x = b->frontxrb->pixmap->x; - y = b->frontxrb->pixmap->y; - width = b->frontxrb->pixmap->width; - height = b->frontxrb->pixmap->height; - depth = b->frontxrb->pixmap->depth; -#else xmesa_get_window_size(b->display, b, &width, &height); x = y = 0; -#endif if (b->mesa_buffer.Width != width || b->mesa_buffer.Height != height) { b->mesa_buffer.Width = MIN2((int)width, b->FXctx->width); b->mesa_buffer.Height = MIN2((int)height, b->FXctx->height); diff --git a/src/mesa/drivers/x11/xm_image.c b/src/mesa/drivers/x11/xm_image.c index 087b4e4c3a7..12fef7dad34 100644 --- a/src/mesa/drivers/x11/xm_image.c +++ b/src/mesa/drivers/x11/xm_image.c @@ -37,97 +37,3 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "glxheader.h" #include "xmesaP.h" -#ifdef XFree86Server - -#ifdef ROUNDUP -#undef ROUNDUP -#endif - -#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3)) - -XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data) -{ - XMesaImage *image; - - image = (XMesaImage *)xalloc(sizeof(XMesaImage)); - - if (image) { - image->width = width; - image->height = height; - image->data = data; - /* Always pad to 32 bits */ - image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32); - image->bits_per_pixel = bitsPerPixel; - } - - return image; -} - -void XMesaDestroyImage(XMesaImage *image) -{ - if (image->data) - free(image->data); - xfree(image); -} - -unsigned long XMesaGetPixel(XMesaImage *image, int x, int y) -{ - CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); - CARD8 *i8; - CARD16 *i16; - CARD32 *i32; - switch (image->bits_per_pixel) { - case 8: - i8 = (CARD8 *)row; - return i8[x]; - break; - case 15: - case 16: - i16 = (CARD16 *)row; - return i16[x]; - break; - case 24: /* WARNING: architecture specific code */ - i8 = (CARD8 *)row; - return (((CARD32)i8[x*3]) | - (((CARD32)i8[x*3+1])<<8) | - (((CARD32)i8[x*3+2])<<16)); - break; - case 32: - i32 = (CARD32 *)row; - return i32[x]; - break; - } - return 0; -} - -#ifndef XMESA_USE_PUTPIXEL_MACRO -void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel) -{ - CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); - CARD8 *i8; - CARD16 *i16; - CARD32 *i32; - switch (image->bits_per_pixel) { - case 8: - i8 = (CARD8 *)row; - i8[x] = (CARD8)pixel; - break; - case 15: - case 16: - i16 = (CARD16 *)row; - i16[x] = (CARD16)pixel; - break; - case 24: /* WARNING: architecture specific code */ - i8 = (CARD8 *)__row; - i8[x*3] = (CARD8)(p); - i8[x*3+1] = (CARD8)(p>>8); - i8[x*3+2] = (CARD8)(p>>16); - case 32: - i32 = (CARD32 *)row; - i32[x] = (CARD32)pixel; - break; - } -} -#endif - -#endif /* XFree86Server */ diff --git a/src/mesa/drivers/x11/xm_line.c b/src/mesa/drivers/x11/xm_line.c index f03f99f918f..04cedcd4ec0 100644 --- a/src/mesa/drivers/x11/xm_line.c +++ b/src/mesa/drivers/x11/xm_line.c @@ -537,7 +537,6 @@ void xmesa_choose_point( struct gl_context *ctx ) -#ifndef XFree86Server /** * Draw fast, XOR line with XDrawLine in front color buffer. * WARNING: this isn't fully OpenGL conformant because different pixels @@ -567,7 +566,6 @@ xor_line(struct gl_context *ctx, const SWvertex *vert0, const SWvertex *vert1) XDrawLine(dpy, xrb->pixmap, gc, x0, y0, x1, y1); XMesaSetFunction(dpy, gc, GXcopy); /* this gc is used elsewhere */ } -#endif /* XFree86Server */ #endif /* CHAN_BITS == 8 */ @@ -660,7 +658,6 @@ get_line_func(struct gl_context *ctx) } } -#ifndef XFree86Server if (ctx->DrawBuffer->_NumColorDrawBuffers == 1 && ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT && swrast->_RasterMask == LOGIC_OP_BIT @@ -669,7 +666,6 @@ get_line_func(struct gl_context *ctx) && !ctx->Line.SmoothFlag) { return xor_line; } -#endif /* XFree86Server */ #endif /* CHAN_BITS == 8 */ return (swrast_line_func) NULL; diff --git a/src/mesa/drivers/x11/xm_span.c b/src/mesa/drivers/x11/xm_span.c index ab66c5e1f12..294b93a57cc 100644 --- a/src/mesa/drivers/x11/xm_span.c +++ b/src/mesa/drivers/x11/xm_span.c @@ -42,7 +42,6 @@ * generate BadMatch errors if the drawable isn't mapped. */ -#ifndef XFree86Server static int caught_xgetimage_error = 0; static int (*old_xerror_handler)( XMesaDisplay *dpy, XErrorEvent *ev ); static unsigned long xgetimage_serial; @@ -87,7 +86,6 @@ static int check_xgetimage_errors( void ) /* return 0=no error, 1=error caught */ return caught_xgetimage_error; } -#endif /* @@ -97,7 +95,6 @@ static unsigned long read_pixel( XMesaDisplay *dpy, XMesaDrawable d, int x, int y ) { unsigned long p; -#ifndef XFree86Server XMesaImage *pixel = NULL; int error; @@ -113,9 +110,6 @@ static unsigned long read_pixel( XMesaDisplay *dpy, if (pixel) { XMesaDestroyImage( pixel ); } -#else - (*dpy->GetImage)(d, x, y, 1, 1, ZPixmap, ~0L, (pointer)&p); -#endif return p; } @@ -3763,7 +3757,6 @@ static void put_values_ci_ximage( PUT_VALUES_ARGS ) /***** Pixel reading *****/ /**********************************************************************/ -#ifndef XFree86Server /** * Do clip testing prior to calling XGetImage. If any of the region lies * outside the screen's bounds, XGetImage will return NULL. @@ -3806,7 +3799,6 @@ clip_for_xgetimage(struct gl_context *ctx, XMesaPixmap pixmap, GLuint *n, GLint } return 0; } -#endif /* @@ -3824,7 +3816,6 @@ get_row_ci(struct gl_context *ctx, struct gl_renderbuffer *rb, y = YFLIP(xrb, y); if (xrb->pixmap) { -#ifndef XFree86Server XMesaImage *span = NULL; int error; int k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y); @@ -3850,11 +3841,6 @@ get_row_ci(struct gl_context *ctx, struct gl_renderbuffer *rb, if (span) { XMesaDestroyImage( span ); } -#else - (*xmesa->display->GetImage)(xrb->drawable, - x, y, n, 1, ZPixmap, - ~0L, (pointer)index); -#endif } else if (xrb->ximage) { XMesaImage *img = xrb->ximage; @@ -3882,14 +3868,6 @@ get_row_rgba(struct gl_context *ctx, struct gl_renderbuffer *rb, /* Read from Pixmap or Window */ XMesaImage *span = NULL; int error; -#ifdef XFree86Server - span = XMesaCreateImage(xmesa->xm_visual->BitsPerPixel, n, 1, NULL); - span->data = (char *)MALLOC(span->height * span->bytes_per_line); - error = (!span->data); - (*xmesa->display->GetImage)(xrb->drawable, - x, YFLIP(xrb, y), n, 1, ZPixmap, - ~0L, (pointer)span->data); -#else int k; y = YFLIP(xrb, y); k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y); @@ -3900,7 +3878,6 @@ get_row_rgba(struct gl_context *ctx, struct gl_renderbuffer *rb, span = XGetImage( xmesa->display, xrb->pixmap, x, y, n, 1, AllPlanes, ZPixmap ); error = check_xgetimage_errors(); -#endif if (span && !error) { switch (xmesa->pixelformat) { case PF_Truecolor: diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h index f63626a9702..98737fab248 100644 --- a/src/mesa/drivers/x11/xmesa.h +++ b/src/mesa/drivers/x11/xmesa.h @@ -72,13 +72,9 @@ and create a window, you must do the following to use the X/Mesa interface: extern "C" { #endif -#ifdef XFree86Server -#include "xmesa_xf86.h" -#else #include <X11/Xlib.h> #include <X11/Xutil.h> #include "xmesa_x.h" -#endif #include "GL/gl.h" #ifdef AMIWIN @@ -180,19 +176,6 @@ extern XMesaContext XMesaCreateContext( XMesaVisual v, extern void XMesaDestroyContext( XMesaContext c ); -#ifdef XFree86Server -/* - * These are the extra routines required for integration with XFree86. - * None of these routines should be user visible. -KEM - */ -extern GLboolean XMesaForceCurrent( XMesaContext c ); - -extern GLboolean XMesaLoseCurrent( XMesaContext c ); - -extern GLboolean XMesaCopyContext( XMesaContext src, - XMesaContext dst, - GLuint mask ); -#endif /* XFree86Server */ /* diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 5d34b430cb6..63e3e211bf6 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -33,9 +33,6 @@ #include "fxmesa.h" #include "xm_glide.h" #endif -#ifdef XFree86Server -#include "xm_image.h" -#endif extern _glthread_Mutex _xmesa_lock; @@ -88,13 +85,8 @@ struct xmesa_visual { XMesaDisplay *display; /* The X11 display */ int screen, visualID; int visualType; -#ifdef XFree86Server - GLint ColormapEntries; - GLint nplanes; -#else XMesaVisualInfo visinfo; /* X's visual info (pointer to private copy) */ XVisualInfo *vishandle; /* Only used in fakeglx.c */ -#endif GLint BitsPerPixel; /* True bits per pixel for XImages */ GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */ @@ -233,7 +225,7 @@ struct xmesa_buffer { /* 0 = not available */ /* 1 = XImage support available */ /* 2 = Pixmap support available too */ -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) XShmSegmentInfo shminfo; #endif @@ -259,11 +251,7 @@ struct xmesa_buffer { /* Used to do XAllocColor/XFreeColors accounting: */ int num_alloced; -#if defined(XFree86Server) - Pixel alloced_colors[256]; -#else unsigned long alloced_colors[256]; -#endif #if defined( FX ) /* For 3Dfx Glide only */ @@ -578,9 +566,7 @@ extern void xmesa_register_swrast_functions( struct gl_context *ctx ); #define ENABLE_EXT_texure_compression_s3tc 0 /* SW texture compression */ -#ifdef XFree86Server -#define ENABLE_EXT_timer_query 0 -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define ENABLE_EXT_timer_query 1 /* should have 64-bit GLuint64EXT */ #else #define ENABLE_EXT_timer_query 0 /* may not have 64-bit GLuint64EXT */ |