diff options
Diffstat (limited to 'src/mesa')
173 files changed, 4616 insertions, 4373 deletions
diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 08d731de2d5..cc4ad09fa33 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -16,7 +16,6 @@ if env['platform'] == 'windows': env.Append(CPPDEFINES = [ '_GDI32_', # prevent gl* being declared __declspec(dllimport) in MS headers 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers - 'WIN32_THREADS', # use Win32 thread API ]) env.Prepend(CPPPATH = ['#src/talloc']) else: diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index ba8be125718..cdb2500f7c2 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -266,13 +266,16 @@ struct gen_mipmap_state GLuint FBO; }; - +#define MAX_META_OPS_DEPTH 2 /** * All per-context meta state. */ struct gl_meta_state { - struct save_state Save; /**< state saved during meta-ops */ + /** Stack of state saved during meta-ops */ + struct save_state Save[MAX_META_OPS_DEPTH]; + /** Save stack depth */ + GLuint SaveStackDepth; struct temp_texture TempTex; @@ -324,8 +327,13 @@ _mesa_meta_free(struct gl_context *ctx) static void _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) { - struct save_state *save = &ctx->Meta->Save; + struct save_state *save; + + /* hope MAX_META_OPS_DEPTH is large enough */ + assert(ctx->Meta->SaveStackDepth < MAX_META_OPS_DEPTH); + save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth++]; + memset(save, 0, sizeof(*save)); save->SavedState = state; if (state & META_ALPHA_TEST) { @@ -575,7 +583,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) static void _mesa_meta_end(struct gl_context *ctx) { - struct save_state *save = &ctx->Meta->Save; + struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth]; const GLbitfield state = save->SavedState; if (state & META_ALPHA_TEST) { @@ -1398,6 +1406,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) struct vertex verts[4]; /* save all state but scissor, pixel pack/unpack */ GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE; + const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; if (buffers & BUFFER_BITS_COLOR) { /* if clearing color buffers, don't save/restore colormask */ @@ -1453,7 +1462,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers) _mesa_StencilOpSeparate(GL_FRONT_AND_BACK, GL_REPLACE, GL_REPLACE, GL_REPLACE); _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, - ctx->Stencil.Clear & 0x7fffffff, + ctx->Stencil.Clear & stencilMax, ctx->Stencil.WriteMask[0]); } else { diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f943f81dd05..f32f3cf6020 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -176,6 +176,7 @@ i915CreateContext(int api, ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE; ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE; + ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE; ctx->Const.MaxDrawBuffers = 1; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index c00ee415b6b..7a9fb7f088b 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p) if (inst->DstReg.CondMask == COND_TR) { tmp = i915_get_utemp(p); + /* The KIL instruction discards the fragment if any component of + * the source is < 0. Emit an immediate operand of {-1}.xywz. + */ i915_emit_texld(p, get_live_regs(p, inst), tmp, A0_DEST_CHANNEL_ALL, 0, /* use a dummy dest reg */ - swizzle(tmp, ONE, ONE, ONE, ONE), /* always */ + negate(swizzle(tmp, ONE, ONE, ONE, ONE), + 1, 1, 1, 1), T0_TEXKILL); } else { p->error = 1; diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index e3ca863fe51..7c3ac0c14ef 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -81,7 +81,6 @@ DRIVER_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ - brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index a8369b07c35..d3a1233aac0 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -232,3 +232,28 @@ const struct brw_tracked_state brw_cc_unit = { .prepare = prepare_cc_unit, .emit = upload_cc_unit, }; + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; + bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; + bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; + bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_blend_constant_color +}; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index cb0a8b96c9c..28549f2574a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api, (i == MESA_SHADER_FRAGMENT); ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = (i == MESA_SHADER_FRAGMENT); - - if (intel->gen == 6) - ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX); } ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 335339515a2..7069724466a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -171,7 +171,6 @@ struct brw_vertex_program { struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -211,6 +210,7 @@ struct brw_wm_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; GLboolean error; + int dispatch_width; /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 7b823eb201b..877b22fec19 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (vp->use_const_buffer) { - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - } - } - } else { - for (i = 0; i < nr; i++) { - memcpy(buf + offset + i * 4, + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 239586a0366..7f3e4986808 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -462,6 +462,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -1022,6 +1029,13 @@ # define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 # define ATTRIBUTE_0_SWIZZLE_SHIFT 6 # define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + /* DW16: Point sprite texture coordinate enables */ /* DW17: Constant interpolation enables */ /* DW18: attr 0-7 wrap shortest enables */ diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 962c04128b8..6b61f7af15d 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) err |= dest (file, inst); } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF)) { + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { format (file, " %d", inst->bits1.branch_gen6.jump_count); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 2ff39e8e64a..3b5c4c071e3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) { - p->current->header.compression_control = compression_control; + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->brw->intel.gen >= 6) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } } void brw_set_mask_control( struct brw_compile *p, GLuint value ) @@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p ) { assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; p->current++; } @@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p ) { assert(p->current != p->stack); p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; } @@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) p->brw = brw; p->nr_insn = 0; p->current = p->stack; + p->compressed = false; memset(p->current, 0, sizeof(p->current[0])); /* Some defaults? diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index b4538e6e8a7..4dbdc522100 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -33,6 +33,7 @@ #ifndef BRW_EU_H #define BRW_EU_H +#include <stdbool.h> #include "brw_structs.h" #include "brw_defines.h" #include "program/prog_instruction.h" @@ -106,10 +107,12 @@ struct brw_compile { /* Allow clients to push/pop instruction state: */ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; struct brw_instruction *current; GLuint flag_value; GLboolean single_program_flow; + bool compressed; struct brw_context *brw; struct brw_glsl_label *first_label; /**< linked list of labels */ @@ -954,6 +957,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); /* Forward jumps: */ @@ -1009,6 +1014,7 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); +void brw_set_uip_jip(struct brw_compile *p); /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb941dacfd..9c764fe779d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -41,19 +41,20 @@ * Internal helper for constructing instructions */ -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) { - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + if (reg.width == BRW_WIDTH_8 && p->compressed) insn->header.execution_size = BRW_EXECUTE_16; else insn->header.execution_size = reg.width; /* note - definitions are compatible */ } -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) +static void brw_set_dest(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn, /* NEW: Set the execution size based on dest.width and * insn->compression_control: */ - guess_execution_size(insn, dest); + guess_execution_size(p, insn, dest); } extern int reg_type_size[]; @@ -629,7 +630,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); return insn; } @@ -641,7 +642,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); return insn; @@ -680,7 +681,7 @@ void brw_##OP(struct brw_compile *p, \ { \ struct brw_instruction *rnd, *add; \ rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(rnd, dest); \ + brw_set_dest(p, rnd, dest); \ brw_set_src0(rnd, src); \ rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ \ @@ -779,7 +780,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p, void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_ud(0x0)); } @@ -840,11 +841,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -870,7 +871,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional, insn = next_insn(p, BRW_OPCODE_IF); - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->header.execution_size = BRW_EXECUTE_8; insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, src0); @@ -905,11 +906,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -965,11 +966,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); if (intel->gen < 6) { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } @@ -1029,16 +1030,44 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); + if (intel->gen >= 6) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + int br = 2; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); + + insn->bits3.break_cont.uip = br * (do_insn - insn); + insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1046,7 +1075,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -1058,17 +1087,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_null_reg()); + brw_set_dest(p, insn, brw_null_reg()); brw_set_src0(insn, brw_null_reg()); brw_set_src1(insn, brw_null_reg()); @@ -1094,34 +1139,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 6) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + assert(insn->header.execution_size == BRW_EXECUTE_8); + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; + assert(do_insn->header.opcode == BRW_OPCODE_DO); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0)); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } @@ -1159,7 +1212,7 @@ void brw_CMP(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); insn->header.destreg__conditionalmod = conditional; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -1184,7 +1237,7 @@ void brw_WAIT (struct brw_compile *p) struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); struct brw_reg src = brw_notification_1_reg(); - brw_set_dest(insn, src); + brw_set_dest(p, insn, src); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); insn->header.execution_size = 0; /* must */ @@ -1219,6 +1272,10 @@ void brw_math( struct brw_compile *p, assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { assert(src.type == BRW_REGISTER_TYPE_F); @@ -1228,8 +1285,9 @@ void brw_math( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); } else { @@ -1242,7 +1300,7 @@ void brw_math( struct brw_compile *p, insn->header.predicate_control = 0; insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1284,12 +1342,18 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); } @@ -1318,8 +1382,13 @@ void brw_math_16( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); return; @@ -1334,7 +1403,7 @@ void brw_math_16( struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1351,7 +1420,7 @@ void brw_math_16( struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; - brw_set_dest(insn, offset(dest,1)); + brw_set_dest(p, insn, offset(dest,1)); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1446,7 +1515,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, send_commit_msg = 1; } - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_write_message(p->brw, @@ -1516,7 +1585,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = mrf.nr; - brw_set_dest(insn, dest); /* UW? */ + brw_set_dest(p, insn, dest); /* UW? */ brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1569,7 +1638,7 @@ void brw_oword_block_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); if (intel->gen >= 6) { brw_set_src0(insn, mrf); } else { @@ -1614,7 +1683,7 @@ void brw_dword_scattered_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1639,29 +1708,21 @@ void brw_dp_READ_4_vs(struct brw_compile *p, GLuint location, GLuint bind_table_index) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_reg_nr = 1; - struct brw_reg b; - /* - printf("vs const read msg, location %u, msg_reg_nr %d\n", - location, msg_reg_nr); - */ + if (intel->gen >= 6) + location /= 16; /* Setup MRF[1] with location/offset into const buffer */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); - + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -1671,8 +1732,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(insn, brw_null_reg()); + } brw_set_dp_read_message(p->brw, insn, @@ -1706,7 +1771,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* M1.0 is block offset 0, M1.4 is block offset 1, all other * fields ignored. */ - brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); @@ -1717,7 +1782,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, insn->header.destreg__conditionalmod = 0; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_vec8_grf(0, 0)); if (intel->gen == 6) @@ -1782,7 +1847,7 @@ void brw_fb_WRITE(struct brw_compile *p, else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, @@ -1860,7 +1925,7 @@ void brw_SAMPLE(struct brw_compile *p, struct brw_reg m1 = brw_message_reg(msg_reg_nr); - guess_execution_size(p->current, dest); + guess_execution_size(p, p->current, dest); if (p->current->header.execution_size == BRW_EXECUTE_16) dispatch_16 = GL_TRUE; @@ -1895,12 +1960,15 @@ void brw_SAMPLE(struct brw_compile *p, * and the first message register index comes from src0. */ if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - /* m1 contains header? */ - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); + if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0); + brw_pop_insn_state(p); + } + src0 = brw_message_reg(msg_reg_nr); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -1909,7 +1977,7 @@ void brw_SAMPLE(struct brw_compile *p, if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, @@ -1970,7 +2038,7 @@ void brw_urb_WRITE(struct brw_compile *p, assert(msg_length < BRW_MAX_MRF); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1989,6 +2057,80 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + if (ip + insn->bits1.branch_gen6.jump_count / br < start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); + break; + case BRW_OPCODE_CONTINUE: + /* JIP is set at CONTINUE emit time, since that's when we + * know where the start of the loop is. + */ + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -2013,7 +2155,7 @@ void brw_ff_sync(struct brw_compile *p, } insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index edb02fabb23..c3cbe0df618 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -600,8 +600,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) * might be able to do better by doing execsize = 1 math and then * expanding that result out, but we would need to be careful with * masking. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && src.file == UNIFORM) { + if (intel->gen >= 6 && (src.file == UNIFORM || + src.abs || + src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); src = expanded; @@ -933,6 +938,10 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by lower_noise"); break; + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + case ir_unop_sqrt: emit_math(FS_OPCODE_SQRT, this->result, op[0]); break; @@ -1423,28 +1432,70 @@ fs_visitor::visit(ir_discard *ir) void fs_visitor::visit(ir_constant *ir) { - fs_reg reg(this, ir->type); - this->result = reg; + /* Set this->result to reg at the bottom of the function because some code + * paths will cause this visitor to be applied to other fields. This will + * cause the value stored in this->result to be modified. + * + * Make reg constant so that it doesn't get accidentally modified along the + * way. Yes, I actually had this problem. :( + */ + const fs_reg reg(this, ir->type); + fs_reg dst_reg = reg; - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); - break; - default: - assert(!"Non-float/uint/int/bool constant"); + if (ir->type->is_array()) { + const unsigned size = type_size(ir->type->fields.array); + + for (unsigned i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else if (ir->type->is_record()) { + foreach_list(node, &ir->components) { + ir_instruction *const field = (ir_instruction *) node; + const unsigned size = type_size(field->type); + + field->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else { + const unsigned size = type_size(ir->type); + + for (unsigned i = 0; i < size; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); + break; + case GLSL_TYPE_UINT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); + break; + case GLSL_TYPE_INT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); + break; + case GLSL_TYPE_BOOL: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + dst_reg.reg_offset++; } - reg.reg_offset++; } + + this->result = reg; } void @@ -1574,7 +1625,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); + inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); inst->conditional_mod = BRW_CONDITIONAL_Z; return; @@ -1951,7 +2002,7 @@ fs_visitor::emit_interpolation_setup_gen6() emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); this->current_annotation = "compute 1/pos.w"; - this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); + this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->pixel_w = fs_reg(this, glsl_type::float_type); emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); @@ -1979,17 +2030,17 @@ fs_visitor::emit_fb_writes() nr += 2; } - if (c->key.aa_dest_stencil_reg) { + if (c->aa_dest_stencil_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); } /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4; - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { + if (c->source_depth_to_render_target) { + if (c->computes_depth) { /* Hand over gl_FragDepth. */ assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); @@ -1998,20 +2049,22 @@ fs_visitor::emit_fb_writes() } else { /* Pass through the payload depth. */ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); } } - if (c->key.dest_depth_reg) { + if (c->dest_depth_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); } fs_reg color = reg_undef; if (this->frag_color) color = *(variable_storage(this->frag_color)); - else if (this->frag_data) + else if (this->frag_data) { color = *(variable_storage(this->frag_data)); + color.type = BRW_REGISTER_TYPE_F; + } for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = talloc_asprintf(this->mem_ctx, @@ -2452,7 +2505,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) void fs_visitor::assign_curb_setup() { - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.first_curbe_grf = c->nr_payload_regs; c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ @@ -3227,6 +3280,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3241,6 +3295,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3373,10 +3431,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_DO: - /* FINISHME: We need to write the loop instruction support still. */ - if (intel->gen >= 6) - this->fail = true; - loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3386,7 +3440,11 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; @@ -3400,16 +3458,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3486,6 +3546,26 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 3b7b03a05b8..20bfa4c3ea3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: + case ir_unop_sin_reduced: + case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: for (i = 0; i < vector_elements; i++) { @@ -328,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_noise: assert(!"noise should have been broken down to function call"); break; + case ir_quadop_vector: + assert(!"should have been lowered"); + break; } ir->remove(); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index b0c76f4094d..73b41fdbcef 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -166,6 +166,9 @@ static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + int prim_gs_always; + memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -185,10 +188,14 @@ static void populate_key( struct brw_context *brw, key->pv_first = GL_TRUE; } - key->need_gs_prog = (key->hint_gs_always || - brw->primitive == GL_QUADS || + if (intel->gen == 6) + prim_gs_always = brw->primitive == GL_LINE_LOOP; + else + prim_gs_always = brw->primitive == GL_QUADS || brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP); + brw->primitive == GL_LINE_LOOP; + + key->need_gs_prog = (key->hint_gs_always || prim_gs_always); } /* Calculate interpolants for triangle and line rasterization. @@ -205,8 +212,10 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } + drm_intel_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = NULL; + if (brw->gs.prog_active) { - drm_intel_bo_unreference(brw->gs.prog_bo); brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1d350bc0413..a91b0528fac 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -38,40 +38,6 @@ #include "brw_state.h" #include "brw_defines.h" - - - - -/*********************************************************************** - * Blend color - */ - -static void upload_blend_constant_color(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->intel.ctx; - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; - bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; - bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; - bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); -} - - -const struct brw_tracked_state brw_blend_constant_color = { - .dirty = { - .mesa = _NEW_COLOR, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .emit = upload_blend_constant_color -}; - /* Constant single cliprect for framebuffer object or DRI2 drawing */ static void upload_drawing_rect(struct brw_context *brw) { @@ -339,6 +305,9 @@ static void upload_polygon_stipple(struct brw_context *brw) struct brw_polygon_stipple bps; GLuint i; + if (!ctx->Polygon.StippleFlag) + return; + memset(&bps, 0, sizeof(bps)); bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; @@ -381,6 +350,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_polygon_stipple_offset bpso; + if (!ctx->Polygon.StippleFlag) + return; + memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; @@ -409,7 +381,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { - .mesa = _NEW_WINDOW_POS, + .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -421,9 +393,10 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { */ static void upload_aa_line_parameters(struct brw_context *brw) { + struct gl_context *ctx = &brw->intel.ctx; struct brw_aa_line_parameters balp; - if (!brw->has_aa_line_parameters) + if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; /* use legacy aa line coverage computation */ @@ -436,7 +409,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) const struct brw_tracked_state brw_aa_line_parameters = { .dirty = { - .mesa = 0, + .mesa = _NEW_LINE, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -454,6 +427,9 @@ static void upload_line_stipple(struct brw_context *brw) GLfloat tmp; GLint tmpi; + if (!ctx->Line.StippleFlag) + return; + memset(&bls, 0, sizeof(bls)); bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; bls.header.length = sizeof(bls)/4 - 2; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 1367d814696..94efa791091 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); /* Don't reject fragment shaders for their Mesa IR state when we're * using the new FS backend. diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 338f3876b31..eba4411ca70 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] = &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ - &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_wm_constants, /* Before wm_state */ &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ce9af9c4fe..461f27048cc 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1064,6 +1064,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -1169,7 +1178,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1539,6 +1553,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4a41c7a5176..6ae75d22c14 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw, (void) ctx; aux_size = sizeof(c.prog_data); - if (c.vp->use_const_buffer) - aux_size += c.vp->program.Base.Parameters->NumParameters; + /* constant_map */ + aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, @@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POINT */ if (ctx->Point.PointSprite) { @@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9338a6b7dbf..0b88cc1ec76 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -44,6 +44,7 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; + GLuint two_side_color: 1; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7e43324a1f9..09887dae95d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, mrf, j; int attributes_in_vue; int first_reladdr_output; + int max_constant; + int constant = 0; + int vert_result_reoder[VERT_RESULT_MAX]; + int bfc = 0; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } - /* Vertex program parameters from curbe: + /* Assign some (probably all) of the vertex program constants to + * the push constant buffer/CURBE. + * + * There's an obvious limit to the numer of push constants equal to + * the number of register available, and that number is smaller + * than the minimum maximum number of vertex program parameters, so + * support for pull constants is required if we overflow. + * Additionally, on gen6 the number of push constants is even + * lower. + * + * When there's relative addressing, we don't know what range of + * Mesa IR registers can be accessed. And generally, when relative + * addressing is used we also have too many constants to load them + * all as push constants. So, we'll just support relative + * addressing out of the pull constant buffers, and try to load as + * many statically-accessed constants into the push constant buffer + * as we can. */ - if (c->vp->use_const_buffer) { - int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; - int constant = 0; - - /* We've got more constants than we can load with the push - * mechanism. This is often correlated with reladdr loads where - * we should probably be using a pull mechanism anyway to avoid - * excessive reading. However, the pull mechanism is slow in - * general. So, we try to allocate as many non-reladdr-loaded - * constants through the push buffer as we can before giving up. - */ - memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); - for (i = 0; - i < c->vp->program.Base.NumInstructions && constant < max_constant; - i++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; - int arg; - - for (arg = 0; arg < 3 && constant < max_constant; arg++) { - if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && - inst->SrcReg[arg].File != PROGRAM_CONSTANT && - inst->SrcReg[arg].File != PROGRAM_UNIFORM && - inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && - inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || - inst->SrcReg[arg].RelAddr) - continue; - - if (c->constant_map[inst->SrcReg[arg].Index] == -1) { - c->constant_map[inst->SrcReg[arg].Index] = constant++; - } + if (intel->gen >= 6) { + /* We can only load 32 regs of push constants. */ + max_constant = 32 * 2 - c->key.nr_userclip; + } else { + max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + } + + /* constant_map maps from ParameterValues[] index to index in the + * push constant buffer, or -1 if it's only in the pull constant + * buffer. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) { + continue; } - } - for (i = 0; i < constant; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, - (i%2) * 4), - 0, 4, 1); + if (inst->SrcReg[arg].RelAddr) { + c->vp->use_const_buffer = GL_TRUE; + continue; + } + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } } - reg += (constant + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - /* XXX 0 causes a bug elsewhere... */ - c->prog_data.nr_params = MAX2(constant * 4, 4); } - else { - /* use a section of the GRF for constants */ - GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; + /* If we ran out of push constant space, then we'll also upload all + * constants through the pull constant buffer so that they can be + * accessed no matter what. For relative addressing (the common + * case) we need them all in place anyway. + */ + if (constant == max_constant) + c->vp->use_const_buffer = GL_TRUE; + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), + 0, 4, 1); } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = constant * 4; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; /* Allocate input regs: */ @@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; first_reladdr_output = get_first_reladdr_output(&c->vp->program); - for (i = 0; i < VERT_RESULT_MAX; i++) { + + for (i = 0; i < VERT_RESULT_MAX; i++) + vert_result_reoder[i] = i; + + /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */ + if (intel->gen >= 6 && c->key.two_side_color) { + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + + if (bfc) { + for (i = 0; i < bfc; i++) { + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i; + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i; + } + + for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) { + vert_result_reoder[i] = i - bfc; + } + } + } + + for (j = 0; j < VERT_RESULT_MAX; j++) { + i = vert_result_reoder[j]; + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); @@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) else if (i == VERT_RESULT_PSIZ) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { /* Two restrictions on our compute-to-MRF here. The @@ -574,9 +625,18 @@ static void emit_max( struct brw_compile *p, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_SEL(p, dst, arg0, arg1); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } else { + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } } static void emit_min( struct brw_compile *p, @@ -584,9 +644,18 @@ static void emit_min( struct brw_compile *p, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_SEL(p, dst, arg0, arg1); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } else { + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } } static void emit_math1_gen4(struct brw_vs_compile *c, @@ -680,7 +749,7 @@ emit_math1(struct brw_vs_compile *c, emit_math1_gen4(c, function, dst, arg0, precision); } -static void emit_math2( struct brw_vs_compile *c, +static void emit_math2_gen4( struct brw_vs_compile *c, GLuint function, struct brw_reg dst, struct brw_reg arg0, @@ -688,14 +757,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; GLboolean need_tmp = GL_FALSE; - if (dst.file != BRW_GENERAL_REGISTER_FILE) - need_tmp = GL_TRUE; - - if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf) + if (dst.file != BRW_GENERAL_REGISTER_FILE || + dst.dw1.bits.writemask != 0xf) need_tmp = GL_TRUE; if (need_tmp) @@ -718,6 +784,53 @@ static void emit_math2( struct brw_vs_compile *c, } } +static void emit_math2_gen6( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_src0, tmp_src1, tmp_dst; + + tmp_src0 = get_tmp(c); + tmp_src1 = get_tmp(c); + tmp_dst = get_tmp(c); + + brw_MOV(p, tmp_src0, arg0); + brw_MOV(p, tmp_src1, arg1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + tmp_dst, + function, + tmp_src0, + tmp_src1); + brw_set_access_mode(p, BRW_ALIGN_16); + + brw_MOV(p, dst, tmp_dst); + + release_tmp(c, tmp_src0); + release_tmp(c, tmp_src1); + release_tmp(c, tmp_dst); +} + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) + emit_math2_gen6(c, function, dst, arg0, arg1, precision); + else + emit_math2_gen4(c, function, dst, arg0, arg1, precision); +} static void emit_exp_noalias( struct brw_vs_compile *c, struct brw_reg dst, @@ -990,8 +1103,6 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - if (c->current_const[argIndex].index != src->Index) { /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; @@ -1022,14 +1133,14 @@ get_reladdr_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg const_reg = c->current_const[argIndex].reg; - struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + uint32_t offset; assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - /* Can't reuse a reladdr constant load. */ c->current_const[argIndex].index = -1; @@ -1038,15 +1149,21 @@ get_reladdr_constant(struct brw_vs_compile *c, src->Index, argIndex, c->current_const[argIndex].reg.nr); #endif - brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16)); + if (intel->gen >= 6) { + offset = src->Index; + } else { + struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16)); + addr_reg = byte_addr_reg; + offset = 16 * src->Index; + } /* fetch the first vec4 */ brw_dp_READ_4_vs_relative(p, - const_reg, /* writeback dest */ - byte_addr_reg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ - ); + const_reg, + addr_reg, + offset, + SURF_INDEX_VERT_CONST_BUFFER); return const_reg; } @@ -1241,22 +1358,18 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_UNIFORM: case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: - if (c->vp->use_const_buffer) { - if (!relAddr && c->constant_map[index] != -1) { - assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); - return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; - } else if (relAddr) + if (!relAddr && c->constant_map[index] != -1) { + /* Take from the push constant buffer if possible. */ + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else { + /* Must be in the pull constant buffer then .*/ + assert(c->vp->use_const_buffer); + if (relAddr) return get_reladdr_constant(c, inst, argIndex); else return get_constant(c, inst, argIndex); } - else if (relAddr) { - return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16); - } - else { - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; - } case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -1585,6 +1698,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) break; if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i))) continue; + if (i == VERT_RESULT_PSIZ) + continue; if (i >= VERT_RESULT_TEX0 && c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) { @@ -1895,7 +2010,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL); break; case OPCODE_SEQ: @@ -1969,35 +2084,42 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p, if_depth_in_loop[loop_depth]); + if (intel->gen >= 6) { + brw_CONT_gen6(p, loop_inst[loop_depth - 1]); + } else { + brw_CONT(p, if_depth_in_loop[loop_depth]); + } brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: - { - clear_current_const(c); - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - loop_depth--; - - if (intel->gen == 5) - br = 2; - - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && + + case OPCODE_ENDLOOP: { + clear_current_const(c); + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (intel->gen == 5) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } break; + case OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); @@ -2088,6 +2210,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) } brw_resolve_cals(p); + brw_set_uip_jip(p); brw_optimize(p); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ccdc18e0b8d..656501b4f79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_emit(c); } +static void +brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + + if (intel->gen >= 6) { + /* R0-1: masks, pixel X/Y coordinates. */ + c->nr_payload_regs = 2; + /* R2: only for 32-pixel dispatch.*/ + /* R3-4: perspective pixel location barycentric */ + c->nr_payload_regs += 2; + /* R5-6: perspective pixel location bary for dispatch width != 8 */ + if (c->dispatch_width == 16) { + c->nr_payload_regs += 2; + } + /* R7-10: perspective centroid barycentric */ + /* R11-14: perspective sample barycentric */ + /* R15-18: linear pixel location barycentric */ + /* R19-22: linear centroid barycentric */ + /* R23-26: linear sample barycentric */ + + /* R27: interpolated depth if uses source depth */ + if (uses_depth) { + c->source_depth_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R28: interpolated depth if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. + */ + if (uses_depth) { + c->source_w_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R30: interpolated W if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R31: MSAA position offsets. */ + /* R32-: bary for 32-pixel. */ + /* R58-59: interp W for 32-pixel. */ + + if (c->fp->program.Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + c->source_depth_to_render_target = GL_TRUE; + c->computes_depth = GL_TRUE; + } + } else { + brw_wm_lookup_iz(intel, c); + } +} /** * All Mesa program -> GPU code generation goes through this function. @@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - /* temporary sanity check assertion */ - ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + brw_wm_payload_setup(brw, c); if (!brw_wm_fs_emit(brw, c)) { /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { - c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); - } + c->dispatch_width = 16; + brw_wm_payload_setup(brw, c); + brw_wm_non_glsl_emit(brw, c); } + c->prog_data.dispatch_width = c->dispatch_width; /* Scratch space is used for register spilling */ if (c->last_scratch) { @@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw, } } - if (intel->gen >= 6) { - /* R0-1: masks, pixel X/Y coordinates. */ - key->nr_payload_regs = 2; - /* R2: only for 32-pixel dispatch.*/ - /* R3-4: perspective pixel location barycentric */ - key->nr_payload_regs += 2; - /* R5-6: perspective pixel location bary for dispatch width != 8 */ - if (!fp->isGLSL) { /* dispatch_width != 8 */ - key->nr_payload_regs += 2; - } - /* R7-10: perspective centroid barycentric */ - /* R11-14: perspective sample barycentric */ - /* R15-18: linear pixel location barycentric */ - /* R19-22: linear centroid barycentric */ - /* R23-26: linear sample barycentric */ - - /* R27: interpolated depth if uses source depth */ - if (uses_depth) { - key->source_depth_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R28: interpolated depth if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. - */ - if (uses_depth) { - key->source_w_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R30: interpolated W if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R31: MSAA position offsets. */ - /* R32-: bary for 32-pixel. */ - /* R58-59: interp W for 32-pixel. */ - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - key->source_depth_to_render_target = GL_TRUE; - key->computes_depth = GL_TRUE; - } - - } else { - brw_wm_lookup_iz(intel, - line_aa, - lookup, - uses_depth, - key); - } + key->iz_lookup = lookup; + key->line_aa = line_aa; + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; @@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw, swizzles[2] = SWIZZLE_ZERO; } else if (t->DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; + } else if (t->DepthMode == GL_RED) { + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ZERO; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 2ca685784fc..e7f3cfbb75f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -59,16 +59,9 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - GLuint source_depth_reg:3; - GLuint source_w_reg:3; - GLuint aa_dest_stencil_reg:3; - GLuint dest_depth_reg:3; - GLuint nr_payload_regs:4; - GLuint computes_depth:1; /* could be derived from program string */ - GLuint source_depth_to_render_target:1; + GLuint stats_wm:1; GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ - GLuint runtime_check_aads_emit:1; GLuint nr_color_regions:5; GLuint render_to_fbo:1; @@ -81,6 +74,8 @@ struct brw_wm_prog_key { GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint iz_lookup; + GLuint line_aa; GLuint program_string_id:32; }; @@ -204,6 +199,15 @@ struct brw_wm_compile { PASS2_DONE } state; + GLuint source_depth_reg:3; + GLuint source_w_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_payload_regs:4; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint runtime_check_aads_emit:1; + /* Initial pass - translate fp instructions to fp instructions, * simplifying and adding instructions for interpolation and * framebuffer writes. @@ -306,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, void brw_wm_print_program( struct brw_wm_compile *c, const char *stage ); -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ); +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ @@ -381,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c, void emit_frontfacing(struct brw_compile *p, const struct brw_reg *dst, GLuint mask); -void emit_kil_nv(struct brw_wm_compile *c); void emit_linterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 96fecc97ee2..a0e86034e1e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -896,10 +896,14 @@ void emit_math1(struct brw_wm_compile *c, BRW_MATH_SATURATE_NONE); struct brw_reg src; - if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || - arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE) || + arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); @@ -1301,9 +1305,15 @@ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + struct intel_context *intel = &p->brw->intel; + struct brw_reg pixelmask; GLuint i, j; + if (intel->gen >= 6) + pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + else + pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + for (i = 0; i < 4; i++) { /* Check if we've already done the comparison for this reg * -- common when someone does KIL TEMP.wwww. @@ -1319,26 +1329,11 @@ static void emit_kil( struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); brw_pop_insn_state(p); } } -/* KIL_NV kills the pixels that are currently executing, not based on a test - * of the arguments. - */ -void emit_kil_nv( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ - brw_AND(p, r0uw, c->emit_mask_reg, r0uw); - brw_pop_insn_state(p); -} - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -1387,8 +1382,8 @@ static void emit_aa( struct brw_wm_compile *c, GLuint reg ) { struct brw_compile *p = &c->func; - GLuint comp = c->key.aa_dest_stencil_reg / 2; - GLuint off = c->key.aa_dest_stencil_reg % 2; + GLuint comp = c->aa_dest_stencil_reg / 2; + GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); @@ -1416,11 +1411,10 @@ void emit_fb_write(struct brw_wm_compile *c, struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; - int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ - if (c->key.aa_dest_stencil_reg) + if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave @@ -1428,11 +1422,6 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); - if (intel->gen >= 6) - base_reg = nr; - else - base_reg = 0; - for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: @@ -1493,9 +1482,9 @@ void emit_fb_write(struct brw_wm_compile *c, brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) + if (c->source_depth_to_render_target) { - if (c->key.computes_depth) + if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ @@ -1503,10 +1492,10 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } - if (c->key.dest_depth_reg) + if (c->dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + GLuint comp = c->dest_depth_reg / 2; + GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); @@ -1524,15 +1513,27 @@ void emit_fb_write(struct brw_wm_compile *c, } if (intel->gen >= 6) { - /* Subtract off the message header, since we send headerless. */ - nr -= 2; + /* Load the message header. There's no implied move from src0 + * to the base mrf on gen6. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0)); + brw_pop_insn_state(p); + + if (target != 0) { + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + 0, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(target)); + } } - if (!c->key.runtime_check_aads_emit) { - if (c->key.aa_dest_stencil_reg) + if (!c->runtime_check_aads_emit) { + if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, base_reg, nr, target, eot); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1897,10 +1898,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 2cae6988804..4759b289a0c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) { - /* This is only called for producing 1/w in pre-gen6 interp. for - * gen6, the interp opcodes don't use this argument. + /* This is called for producing 1/w in pre-gen6 interp. for gen6, + * the interp opcodes don't use this argument. But to keep the + * nr_args = 3 expectations of pinterp happy, just stuff delta_xy + * into the slot. */ if (c->func.brw->intel.gen >= 6) - return src_undef(); + return c->delta_xy; if (src_is_undef(c->pixel_w)) { struct prog_dst_register pixel_w = get_temp(c); @@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c, struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas; - if (c->func.brw->intel.gen < 6) { - deltas = get_delta_xy(c); - } else { - deltas = src_undef(); - } + deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those @@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) precalc_lit(c, inst); break; + case OPCODE_RSQ: + out = emit_scalar_insn(c, inst); + out->SrcReg[0].Abs = GL_TRUE; + break; + case OPCODE_TEX: precalc_tex(c, inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c deleted file mode 100644 index 7fe8ab1f334..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ /dev/null @@ -1,1035 +0,0 @@ -#include "main/macros.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component); - -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ - int i; - - if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE)) - return GL_TRUE; - - for (i = 0; i < fp->Base.NumInstructions; i++) { - const struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_IF: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_BGNLOOP: - return GL_TRUE; - default: - break; - } - } - return GL_FALSE; -} - - - -static void -reclaim_temps(struct brw_wm_compile *c); - - -/** Mark GRF register as used. */ -static void -prealloc_grf(struct brw_wm_compile *c, int r) -{ - c->used_grf[r] = GL_TRUE; -} - - -/** Mark given GRF register as not in use. */ -static void -release_grf(struct brw_wm_compile *c, int r) -{ - /*assert(c->used_grf[r]);*/ - c->used_grf[r] = GL_FALSE; - c->first_free_grf = MIN2(c->first_free_grf, r); -} - - -/** Return index of a free GRF, mark it as used. */ -static int -alloc_grf(struct brw_wm_compile *c) -{ - GLuint r; - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - /* no free temps, try to reclaim some */ - reclaim_temps(c); - c->first_free_grf = 0; - - /* try alloc again */ - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - for (r = 0; r < BRW_WM_MAX_GRF; r++) { - assert(c->used_grf[r]); - } - - /* really, no free GRF regs found */ - if (!c->out_of_regs) { - /* print warning once per compilation */ - _mesa_warning(NULL, "i965: ran out of registers for fragment program"); - c->out_of_regs = GL_TRUE; - } - - return -1; -} - - -/** Return number of GRF registers used */ -static int -num_grf_used(const struct brw_wm_compile *c) -{ - int r; - for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) - if (c->used_grf[r]) - return r + 1; - return 0; -} - - - -/** - * Record the mapping of a Mesa register to a hardware register. - */ -static void set_reg(struct brw_wm_compile *c, int file, int index, - int component, struct brw_reg reg) -{ - c->wm_regs[file][index][component].reg = reg; - c->wm_regs[file][index][component].inited = GL_TRUE; -} - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - struct brw_reg reg; - - /* if we need to allocate another temp, grow the tmp_regs[] array */ - if (c->tmp_index == c->tmp_max) { - int r = alloc_grf(c); - if (r < 0) { - /*printf("Out of temps in %s\n", __FUNCTION__);*/ - r = 50; /* XXX random register! */ - } - c->tmp_regs[ c->tmp_max++ ] = r; - } - - /* form the GRF register */ - reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); - /*printf("alloc_temp %d\n", reg.nr);*/ - assert(reg.nr < BRW_WM_MAX_GRF); - return reg; - -} - -/** - * Save current temp register info. - * There must be a matching call to release_tmps(). - */ -static int mark_tmps(struct brw_wm_compile *c) -{ - return c->tmp_index; -} - -static void release_tmps(struct brw_wm_compile *c, int mark) -{ - c->tmp_index = mark; -} - -/** - * Convert Mesa src register to brw register. - * - * Since we're running in SOA mode each Mesa register corresponds to four - * hardware registers. We allocate the hardware registers as needed here. - * - * \param file register file, one of PROGRAM_x - * \param index register number - * \param component src component (X=0, Y=1, Z=2, W=3) - * \param nr not used?!? - * \param neg negate value? - * \param abs take absolute value? - */ -static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, - int nr, GLuint neg, GLuint abs) -{ - struct brw_reg reg; - switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: - return brw_null_reg(); - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - case PROGRAM_PAYLOAD: - break; - default: - _mesa_problem(NULL, "Unexpected file in get_reg()"); - return brw_null_reg(); - } - - assert(index < 256); - assert(component < 4); - - /* see if we've already allocated a HW register for this Mesa register */ - if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; - } - else { - /* no, allocate new register */ - int grf = alloc_grf(c); - /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ - if (grf < 0) { - /* totally out of temps */ - grf = 51; /* XXX random register! */ - } - - reg = brw_vec8_grf(grf, 0); - /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - - set_reg(c, file, index, component, reg); - } - - if (neg & (1 << component)) { - reg = negate(reg); - } - if (abs) - reg = brw_abs(reg); - return reg; -} - - - -/** - * This is called if we run out of GRF registers. Examine the live intervals - * of temp regs in the program and free those which won't be used again. - */ -static void -reclaim_temps(struct brw_wm_compile *c) -{ - GLint intBegin[MAX_PROGRAM_TEMPS]; - GLint intEnd[MAX_PROGRAM_TEMPS]; - int index; - - /*printf("Reclaim temps:\n");*/ - - _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, - intBegin, intEnd); - - for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { - if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { - /* program temp[i] can be freed */ - int component; - /*printf(" temp[%d] is dead\n", index);*/ - for (component = 0; component < 4; component++) { - if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { - int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; - release_grf(c, r); - /* - printf(" Reclaim temp %d, reg %d at inst %d\n", - index, r, c->cur_inst); - */ - c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; - } - } - } - } -} - - - - -/** - * Preallocate registers. This sets up the Mesa to hardware register - * mapping for certain registers, such as constants (uniforms/state vars) - * and shader inputs. - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ - struct intel_context *intel = &c->func.brw->intel; - int i, j; - struct brw_reg reg; - int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; - GLuint reg_index = 0; - - memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); - c->first_free_grf = 0; - - for (i = 0; i < 4; i++) { - if (i < (c->key.nr_payload_regs + 1) / 2) - reg = brw_vec8_grf(i * 2, 0); - else - reg = brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); - } - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0, - brw_vec8_grf(c->key.source_w_reg, 0)); - reg_index += c->key.nr_payload_regs; - - /* constants */ - { - const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; - const GLuint nr_temps = c->fp->program.Base.NumTemporaries; - - /* use a real constant buffer, or just use a section of the GRF? */ - /* XXX this heuristic may need adjustment... */ - if ((nr_params + nr_temps) * 4 + reg_index > 80) { - for (i = 0; i < nr_params; i++) { - float *pv = c->fp->program.Base.Parameters->ParameterValues[i]; - for (j = 0; j < 4; j++) { - c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j]; - c->prog_data.nr_pull_params++; - } - } - - c->prog_data.nr_params = 0; - } - /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ - - if (!c->prog_data.nr_pull_params) { - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants in CURBE */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = ALIGN(nr_params, 2) / 2; - reg_index += c->nr_creg; - } - } - - /* fragment shader inputs: One 2-reg pair of interpolation - * coefficients for each vec4 to be set up. - */ - if (intel->gen >= 6) { - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i))) - continue; - - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) { - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - } - reg_index += 2; - } - urb_read_length = reg_index; - } else { - for (i = 0; i < VERT_RESULT_MAX; i++) { - int fp_input; - - if (i >= VERT_RESULT_VAR0) - fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; - else if (i <= VERT_RESULT_TEX7) - fp_input = i; - else - fp_input = -1; - - if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = reg_index; - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); - } - if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { - reg_index += 2; - } - } - } - - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; - c->prog_data.urb_read_length = urb_read_length; - c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index += 2; - - /* mark GRF regs [0..reg_index-1] as in-use */ - for (i = 0; i < reg_index; i++) - prealloc_grf(c, i); - - /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ - prealloc_grf(c, 126); - prealloc_grf(c, 127); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - struct brw_reg dst[4]; - - switch (inst->Opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - /* Allocate the channels of texture results contiguously, - * since they are written out that way by the sampler unit. - */ - for (j = 0; j < 4; j++) { - dst[j] = get_dst_reg(c, inst, j); - if (j != 0) - assert(dst[j].nr == dst[j - 1].nr + 1); - } - break; - default: - break; - } - } - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - - switch (inst->Opcode) { - case WM_DELTAXY: - /* Allocate WM_DELTAXY destination on G45/GM45 to an - * even-numbered GRF if possible so that we can use the PLN - * instruction. - */ - if (inst->DstReg.WriteMask == WRITEMASK_XY && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && - (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { - int grf; - - for (grf = c->first_free_grf & ~1; - grf < BRW_WM_MAX_GRF; - grf += 2) - { - if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { - c->used_grf[grf] = GL_TRUE; - c->used_grf[grf + 1] = GL_TRUE; - c->first_free_grf = grf + 2; /* a guess */ - - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, - brw_vec8_grf(grf, 0)); - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, - brw_vec8_grf(grf + 1, 0)); - break; - } - } - } - default: - break; - } - } - - /* An instruction may reference up to three constants. - * They'll be found in these registers. - * XXX alloc these on demand! - */ - if (c->prog_data.nr_pull_params) { - for (i = 0; i < 3; i++) { - c->current_const[i].index = -1; - c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); - } - } -#if 0 - printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); -#endif -} - - -/** - * Check if any of the instruction's src registers are constants, uniforms, - * or statevars. If so, fetch any constants that we don't already have in - * the three GRF slots. - */ -static void fetch_constants(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint i; - - /* loop over instruction src regs */ - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - if (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM) { - c->current_const[i].index = src->Index; - -#if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); -#endif - - /* need to fetch the constant now */ - brw_oword_block_read(p, - c->current_const[i].reg, - brw_message_reg(1), - 16 * src->Index, - SURF_INDEX_FRAG_CONST_BUFFER); - } - } -} - - -/** - * Convert Mesa dst register to brw register. - */ -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component) -{ - const int nr = 1; - return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, - 0, 0); -} - - -static struct brw_reg -get_src_reg_const(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint component) -{ - /* We should have already fetched the constant from the constant - * buffer in fetch_constants(). Now we just have to return a - * register description that extracts the needed component and - * smears it across all eight vector components. - */ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - struct brw_reg const_reg; - - assert(component < 4); - assert(srcRegIndex < 3); - assert(c->current_const[srcRegIndex].index != -1); - const_reg = c->current_const[srcRegIndex].reg; - - /* extract desired float from the const_reg, and smear */ - const_reg = stride(const_reg, 0, 1, 0); - const_reg.subnr = component * 4; - - if (src->Negate & (1 << component)) - const_reg = negate(const_reg); - if (src->Abs) - const_reg = brw_abs(const_reg); - -#if 0 - printf(" form const[%d].%d for arg %d, reg %d\n", - c->current_const[srcRegIndex].index, - component, - srcRegIndex, - const_reg.nr); -#endif - - return const_reg; -} - - -/** - * Convert Mesa src register to brw register. - */ -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - const GLuint nr = 1; - const GLuint component = GET_SWZ(src->Swizzle, channel); - - /* Only one immediate value can be used per native opcode, and it - * has be in the src1 slot, so not all Mesa instructions will get - * to take advantage of immediate constants. - */ - if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) { - const struct gl_program_parameter_list *params; - - params = c->fp->program.Base.Parameters; - - /* Extended swizzle terms */ - if (component == SWIZZLE_ZERO) { - return brw_imm_f(0.0F); - } else if (component == SWIZZLE_ONE) { - if (src->Negate) - return brw_imm_f(-1.0F); - else - return brw_imm_f(1.0F); - } - - if (src->File == PROGRAM_CONSTANT) { - float f = params->ParameterValues[src->Index][component]; - - if (src->Abs) - f = fabs(f); - if (src->Negate) - f = -f; - - return brw_imm_f(f); - } - } - - if (c->prog_data.nr_pull_params && - (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM)) { - return get_src_reg_const(c, inst, srcRegIndex, component); - } - else { - /* other type of source register */ - return get_reg(c, src->File, src->Index, component, nr, - src->Negate, src->Abs); - } -} - -static void emit_arl(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, addr_reg; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, 0); - src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ - brw_MOV(p, addr_reg, src0); - brw_set_saturate(p, 0); -} - -static INLINE struct brw_reg high_words( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), - 0, 8, 2 ); -} - -static INLINE struct brw_reg low_words( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); -} - -static INLINE struct brw_reg even_bytes( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); -} - -static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), - 0, 16, 2 ); -} - -/** - * Resolve subroutine calls after code emit is done. - */ -static void post_wm_emit( struct brw_wm_compile *c ) -{ - brw_resolve_cals(&c->func); -} - -static void -get_argument_regs(struct brw_wm_compile *c, - const struct prog_instruction *inst, - int index, - struct brw_reg *dst, - struct brw_reg *regs, - int mask) -{ - struct brw_compile *p = &c->func; - int i, j; - - for (i = 0; i < 4; i++) { - if (mask & (1 << i)) { - regs[i] = get_src_reg(c, inst, index, i); - - /* Unalias destination registers from our sources. */ - if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { - for (j = 0; j < 4; j++) { - if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { - struct brw_reg tmp = alloc_tmp(c); - brw_MOV(p, tmp, regs[i]); - regs[i] = tmp; - break; - } - } - } - } - } -} - -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) -{ - struct intel_context *intel = &brw->intel; -#define MAX_IF_DEPTH 32 -#define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; - int if_depth_in_loop[MAX_LOOP_DEPTH]; - GLuint i, if_depth = 0, loop_depth = 0; - struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); - - c->out_of_regs = GL_FALSE; - - if_depth_in_loop[loop_depth] = 0; - - prealloc_reg(c); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - - if (intel->gen >= 6) - brw_set_acc_write_control(p, 1); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - int dst_flags; - struct brw_reg args[3][4], dst[4]; - int j; - int mark = mark_tmps( c ); - - c->cur_inst = i; - -#if 0 - printf("Inst %d: ", i); - _mesa_print_instruction(inst); -#endif - - /* fetch any constants that this instruction needs */ - if (c->prog_data.nr_pull_params) - fetch_constants(c, inst); - - if (inst->Opcode != OPCODE_ARL) { - for (j = 0; j < 4; j++) { - if (inst->DstReg.WriteMask & (1 << j)) - dst[j] = get_dst_reg(c, inst, j); - else - dst[j] = brw_null_reg(); - } - } - for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); - - dst_flags = inst->DstReg.WriteMask; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - dst_flags |= SATURATE; - - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - switch (inst->Opcode) { - case WM_PIXELXY: - emit_pixel_xy(c, dst, dst_flags); - break; - case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0]); - break; - case WM_PIXELW: - emit_pixel_w(c, dst, dst_flags, args[0], args[1]); - break; - case WM_LINTERP: - emit_linterp(p, dst, dst_flags, args[0], args[1]); - break; - case WM_PINTERP: - emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case WM_CINTERP: - emit_cinterp(p, dst, dst_flags, args[0]); - break; - case WM_WPOSXY: - emit_wpos_xy(c, dst, dst_flags, args[0]); - break; - case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2], - INST_AUX_GET_TARGET(inst->Aux), - inst->Aux & INST_AUX_EOT); - break; - case WM_FRONTFACING: - emit_frontfacing(p, dst, dst_flags); - break; - case OPCODE_ADD: - emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_ARL: - emit_arl(c, inst); - break; - case OPCODE_FRC: - emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); - break; - case OPCODE_FLR: - emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); - break; - case OPCODE_LRP: - emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TRUNC: - emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); - break; - case OPCODE_DP2: - emit_dp2(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP3: - emit_dp3(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP4: - emit_dp4(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_XPD: - emit_xpd(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DPH: - emit_dph(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); - break; - case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); - break; - case OPCODE_SIN: - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); - break; - case OPCODE_COS: - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); - break; - case OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); - break; - case OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); - break; - case OPCODE_CMP: - emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_MIN: - emit_min(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAX: - emit_max(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DDX: - case OPCODE_DDY: - emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), - args[0]); - break; - case OPCODE_SLT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_L, args[0], args[1]); - break; - case OPCODE_SLE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_LE, args[0], args[1]); - break; - case OPCODE_SGT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_G, args[0], args[1]); - break; - case OPCODE_SGE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_GE, args[0], args[1]); - break; - case OPCODE_SEQ: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_EQ, args[0], args[1]); - break; - case OPCODE_SNE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_NEQ, args[0], args[1]); - break; - case OPCODE_SSG: - emit_sign(p, dst, dst_flags, args[0]); - break; - case OPCODE_MUL: - emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, - dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAD: - emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TEX: - emit_tex(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - inst->TexSrcUnit, - (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); - break; - case OPCODE_TXB: - emit_txb(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); - break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - case OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth]++; - break; - case OPCODE_ELSE: - assert(if_depth > 0); - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); - break; - case OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); - if_depth_in_loop[loop_depth]--; - break; - case OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); - break; - case OPCODE_ENDSUB: - /* no-op */ - break; - case OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(&c->func, inst->Comment, p->nr_insn); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case OPCODE_RET: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); - - break; - case OPCODE_BGNLOOP: - /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth] = 0; - break; - case OPCODE_BRK: - brw_BREAK(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_CONT: - brw_CONT(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_ENDLOOP: - { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen == 5) - br = 2; - - assert(loop_depth > 0); - loop_depth--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - break; - default: - printf("unsupported opcode %d (%s) in fragment shader\n", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : "unknown"); - } - - /* Release temporaries containing any unaliased source regs. */ - release_tmps( c, mark ); - - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - post_wm_emit(c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stdout, &p->store[i], intel->gen); - printf("\n"); - } -} - -/** - * Do GPU code generation for shaders that use GLSL features such as - * flow control. Other shaders will be compiled with the - */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("brw_wm_glsl_emit:\n"); - } - - /* initial instruction translation/simplification */ - brw_wm_pass_fp(c); - - /* actual code generation */ - brw_wm_emit_glsl(brw, c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "brw_wm_glsl_emit done"); - } - - c->prog_data.total_grf = num_grf_used(c); - c->prog_data.total_scratch = 0; -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 62e556698ba..471ea1c18d6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -120,14 +120,14 @@ const struct { * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES * \param lookup bitmask of IZ_* flags */ -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ) +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c) { GLuint reg = 2; GLboolean kill_stats_promoted_workaround = GL_FALSE; + int lookup = c->key.iz_lookup; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; assert (lookup < IZ_BIT_MAX); @@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel, * statistics are enabled..." paragraph of 11.5.3.2: Early Depth * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. */ - if (intel->stats_wm && + if (c->key.stats_wm && (lookup & IZ_PS_KILL_ALPHATEST_BIT) && wm_iz_table[lookup].mode == P) { kill_stats_promoted_workaround = GL_TRUE; } if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) - key->computes_depth = 1; + c->computes_depth = 1; - if (wm_iz_table[lookup].sd_present || ps_uses_depth || + if (wm_iz_table[lookup].sd_present || uses_depth || kill_stats_promoted_workaround) { - key->source_depth_reg = reg; + c->source_depth_reg = reg; reg += 2; } if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) - key->source_depth_to_render_target = 1; + c->source_depth_to_render_target = 1; - if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { - key->aa_dest_stencil_reg = reg; - key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && - line_aa == AA_SOMETIMES); + if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) { + c->aa_dest_stencil_reg = reg; + c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + c->key.line_aa == AA_SOMETIMES); reg++; } if (wm_iz_table[lookup].dd_present) { - key->dest_depth_reg = reg; + c->dest_depth_reg = reg; reg+=2; } - key->nr_payload_regs = reg; + c->nr_payload_regs = reg; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 83152526b3a..f78bdc31866 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) GLuint i; for (i = 0; i < 4; i++) { - GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i; + GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i; pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 3a2874b6ddf..7d6a3fa9f12 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) if (inst->opcode == WM_FB_WRITE) { track_arg(c, inst, 0, WRITEMASK_XYZW); track_arg(c, inst, 1, WRITEMASK_XYZW); - if (c->key.source_depth_to_render_target && - c->key.computes_depth) + if (c->source_depth_to_render_target && c->computes_depth) track_arg(c, inst, 2, WRITEMASK_Z); else track_arg(c, inst, 2, 0); @@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 44e39538145..8c2b9e7020b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { + struct brw_context *brw = c->func.brw; + struct intel_context *intel = &brw->intel; GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->grf_limit; j++) c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; - for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++) + for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++) prealloc_reg(c, &c->payload.depth[j], i++); for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < VERT_RESULT_MAX; j++) { - if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { - int fp_index; - - if (j >= VERT_RESULT_VAR0) - fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); - else if (j <= VERT_RESULT_TEX7) - fp_index = j; - else - fp_index = -1; - - nr_interp_regs++; - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + if (intel->gen >= 6) { + for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) { + nr_interp_regs++; + prealloc_reg(c, &c->payload.input_interp[j], i++); + } + } + } else { + for (j = 0; j < VERT_RESULT_MAX; j++) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; + else + fp_index = -1; + + nr_interp_regs++; + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + } } + assert(nr_interp_regs >= 1); } - assert(nr_interp_regs >= 1); - c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2); + c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2); c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index fea96d35381..e7c97a1cb05 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap ) static drm_intel_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { - struct brw_sampler_default_color sdc; + struct intel_context *intel = &brw->intel; - COPY_4V(sdc.color, color); - - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + if (intel->gen >= 5) { + struct gen5_sampler_default_color sdc; + + memset(&sdc, 0, sizeof(sdc)); + + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); + + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); + + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); + + /* XXX: Fill in half floats */ + /* XXX: Fill in signed bytes */ + + COPY_4V(sdc.f, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } else { + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 76de7b7b6f6..e9ef635bca2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { struct gl_context *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; /* If using the fragment shader backend, the program is always * 8-wide. diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 76fc94df1f6..ad744044c70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -139,6 +139,8 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I16_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A16_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R16_UNORM; else return BRW_SURFACEFORMAT_L16_UNORM; @@ -174,6 +176,8 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I24X8_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A24X8_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS; else return BRW_SURFACEFORMAT_L24X8_UNORM; @@ -274,6 +278,7 @@ brw_create_constant_surface(struct brw_context *brw, drm_intel_bo **out_bo, uint32_t *out_offset) { + struct intel_context *intel = &brw->intel; const GLint w = width - 1; struct brw_surface_state surf; void *map; @@ -284,6 +289,9 @@ brw_create_constant_surface(struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + if (intel->gen >= 6) + surf.ss0.render_cache_read_write = 1; + assert(bo); surf.ss1.base_addr = bo->offset; /* reloc */ diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 800a2555214..c2631a7b4df 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -35,6 +35,7 @@ struct gen6_blend_state_key { GLboolean color_blend, alpha_enabled; GLboolean dither; + GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4]; GLenum logic_op; @@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw, memset(key, 0, sizeof(*key)); /* _NEW_COLOR */ + memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask)); + + /* _NEW_COLOR */ if (ctx->Color._LogicOpEnabled) key->logic_op = ctx->Color.LogicOp; else @@ -87,54 +91,62 @@ static drm_intel_bo * blend_state_create_from_key(struct brw_context *brw, struct gen6_blend_state_key *key) { - struct gen6_blend_state blend; + struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS]; drm_intel_bo *bo; + int b; memset(&blend, 0, sizeof(blend)); - if (key->logic_op != GL_COPY) { - blend.blend1.logic_op_enable = 1; - blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; + for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) { + if (key->logic_op != GL_COPY) { + blend[b].blend1.logic_op_enable = 1; + blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend & (1 << b)) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend[b].blend0.blend_enable = 1; + blend[b].blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); } - blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); - blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); - blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); - - blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); - blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + if (key->alpha_enabled) { + blend[b].blend1.alpha_test_enable = 1; + blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); - blend.blend0.blend_enable = 1; - blend.blend0.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - blend.blend1.alpha_test_enable = 1; - blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + } - } + if (key->dither) { + blend[b].blend1.dither_enable = 1; + blend[b].blend1.y_dither_offset = 0; + blend[b].blend1.x_dither_offset = 0; + } - if (key->dither) { - blend.blend1.dither_enable = 1; - blend.blend1.y_dither_offset = 0; - blend.blend1.x_dither_offset = 0; + blend[b].blend1.write_disable_r = !key->color_mask[b][0]; + blend[b].blend1.write_disable_g = !key->color_mask[b][1]; + blend[b].blend1.write_disable_b = !key->color_mask[b][2]; + blend[b].blend1.write_disable_a = !key->color_mask[b][3]; } bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, @@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = { }; struct gen6_color_calc_state_key { - GLubyte blend_constant_color[4]; + float blend_constant_color[4]; GLclampf alpha_ref; GLubyte stencil_ref[2]; }; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index c65b41e2b6b..c7c4eb1f27d 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -64,7 +64,9 @@ upload_clip_state(struct brw_context *brw) userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); - OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); + OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | + U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | + GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 471067e8f02..45c148baedd 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -33,9 +33,10 @@ #include "intel_batchbuffer.h" static uint32_t -get_attr_override(struct brw_context *brw, int fs_attr) +get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color) { int attr_index = 0, i, vs_attr; + int bfc = 0; if (fs_attr <= FRAG_ATTRIB_TEX7) vs_attr = fs_attr; @@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr) attr_index++; } + assert(attr_index < 32); + + if (two_side_color) { + if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + } + + if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) { + if (fs_attr == FRAG_ATTRIB_COL0) + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) { + attr_index++; + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + } else { + attr_index += bfc; + } + } + return attr_index; } @@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; /* CACHE_NEW_VS_PROG */ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); - uint32_t dw1, dw2, dw3, dw4, dw16; + uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; int urb_start; + int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) @@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw) dw3 = 0; dw4 = 0; dw16 = 0; + dw17 = 0; /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) @@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; + if (ctx->Polygon.OffsetLine) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; + + if (ctx->Polygon.OffsetPoint) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; + + switch (ctx->Polygon.FrontMode) { + case GL_FILL: + dw2 |= GEN6_SF_FRONT_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_FRONT_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_FRONT_POINT; + break; + + default: + assert(0); + break; + } + + switch (ctx->Polygon.BackMode) { + case GL_FILL: + dw2 |= GEN6_SF_BACK_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_BACK_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_BACK_POINT; + break; + + default: + assert(0); + break; + } + /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; @@ -160,6 +230,12 @@ upload_sf_state(struct brw_context *brw) } } + /* flat shading */ + if (ctx->Light.ShadeModel == GL_FLAT) { + dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >> + ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); + } + BEGIN_BATCH(20); OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); OUT_BATCH(dw1); @@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr); + attr_overrides |= get_attr_override(brw, attr, two_side_color); attr++; break; } @@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr) << 16; + attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; attr++; break; } @@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw) OUT_BATCH(attr_overrides); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ - OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(dw17); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); @@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_BUFFERS | _NEW_POINT | _NEW_TRANSFORM), - .brw = BRW_NEW_CONTEXT, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index a34123478fb..de97fd3783d 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT, - .cache = CACHE_NEW_VS_PROG, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), }, .prepare = prepare_urb, .emit = upload_urb, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e94d0c0ddbb..4ef9e2e6072 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -54,7 +54,7 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { - int params_uploaded = 0; + int params_uploaded = 0, param_regs; float *param; if (brw->vertex_program->IsNVProgram) @@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw) params_uploaded++; } - if (vp->use_const_buffer) { - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; - } - } - } else { - for (i = 0; i < nr_params; i++) { - memcpy(param, vp->program.Base.Parameters->ParameterValues[i], + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); - param += 4; params_uploaded++; } } @@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw) drm_intel_gem_bo_unmap_gtt(constant_bo); + param_regs = (params_uploaded + 1) / 2; + assert(param_regs <= 32); + BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(params_uploaded, 2) / 2 - 1); + param_regs - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index ea5418bacf1..d80df4e254b 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw) constants[i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); } + + if (0) { + printf("WM constants:\n"); + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { + if ((i & 7) == 0) + printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8); + printf("%8f ", constants[i]); + if ((i & 7) == 7) + printf("\n"); + } + if ((i & 7) != 0) + printf("\n"); + printf("\n"); + } + drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } } @@ -88,6 +103,7 @@ upload_wm_state(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); @@ -104,7 +120,8 @@ upload_wm_state(struct brw_context *brw) (5 - 2)); OUT_RELOC(brw->wm.push_const_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1); + ALIGN(brw->wm.prog_data->nr_params, + brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -126,8 +143,8 @@ upload_wm_state(struct brw_context *brw) dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (fp->isGLSL) + /* CACHE_NEW_WM_PROG */ + if (brw->wm.prog_data->dispatch_width == 8) dw5 |= GEN6_WM_8_DISPATCH_ENABLE; else dw5 |= GEN6_WM_16_DISPATCH_ENABLE; @@ -176,13 +193,14 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen6_wm_state = { .dirty = { .mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS | - _NEW_PROGRAM_CONSTANTS), + _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), - .cache = CACHE_NEW_SAMPLER + .cache = (CACHE_NEW_SAMPLER | + CACHE_NEW_WM_PROG) }, .emit = upload_wm_state, }; diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 4b498f8c5b2..21fc9ece886 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -92,7 +92,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) batch->ptr = NULL; - if (!intel->no_hw) { + if (!intel->intelScreen->no_hw) { drm_intel_bo_exec(batch->buf, used, NULL, 0, (x_off & 0xffff) | (y_off << 16)); } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 152cdcaf37d..9c222c7b485 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -519,7 +519,6 @@ static const struct dri_debug_control debug_control[] = { { "sing", DEBUG_SINGLE_THREAD }, { "thre", DEBUG_SINGLE_THREAD }, { "wm", DEBUG_WM }, - { "glsl_force", DEBUG_GLSL_FORCE }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, { "clip", DEBUG_CLIP }, @@ -800,11 +799,6 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - /* XXX force SIMD8 kernel for Sandybridge before we fixed - SIMD16 interpolation. */ - if (intel->gen == 6) - INTEL_DEBUG |= DEBUG_GLSL_FORCE; - intel->batch = intel_batchbuffer_alloc(intel); intel_fbo_init(intel); @@ -838,11 +832,6 @@ intelInitContext(struct intel_context *intel, intel->always_flush_cache = 1; } - /* Disable all hardware rendering (skip emitting batches and fences/waits - * to the kernel) - */ - intel->no_hw = getenv("INTEL_NO_HW") != NULL; - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 9d5139c0000..96493c0f2bb 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -207,7 +207,6 @@ struct intel_context GLboolean hw_stipple; GLboolean depth_buffer_is_float; GLboolean no_rast; - GLboolean no_hw; GLboolean always_flush_batch; GLboolean always_flush_cache; @@ -362,7 +361,6 @@ extern int INTEL_DEBUG; #define DEBUG_WM 0x800000 #define DEBUG_URB 0x1000000 #define DEBUG_VS 0x2000000 -#define DEBUG_GLSL_FORCE 0x4000000 #define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 862a13d2ea5..18e796a1186 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -465,10 +465,12 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to XGBA8 texture OK\n"); } +#ifndef I915 else if (texImage->TexFormat == MESA_FORMAT_SARGB8) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to SARGB8 texture OK\n"); } +#endif else if (texImage->TexFormat == MESA_FORMAT_RGB565) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); @@ -481,6 +483,7 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to ARGB4444 texture OK\n"); } +#ifndef I915 else if (texImage->TexFormat == MESA_FORMAT_A8) { irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to A8 texture OK\n"); @@ -501,6 +504,7 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, irb->Base.DataType = GL_UNSIGNED_SHORT; DBG("Render to RG88 texture OK\n"); } +#endif else if (texImage->TexFormat == MESA_FORMAT_Z16) { irb->Base.DataType = GL_UNSIGNED_SHORT; DBG("Render to DEPTH16 texture OK\n"); @@ -710,15 +714,17 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) switch (irb->Base.Format) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: - case MESA_FORMAT_SARGB8: case MESA_FORMAT_RGB565: case MESA_FORMAT_ARGB1555: case MESA_FORMAT_ARGB4444: +#ifndef I915 + case MESA_FORMAT_SARGB8: case MESA_FORMAT_A8: case MESA_FORMAT_R8: case MESA_FORMAT_R16: case MESA_FORMAT_RG88: case MESA_FORMAT_RG1616: +#endif break; default: fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 061f0d278d6..3f13589a214 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -452,7 +452,7 @@ intelCreateContext(gl_api api, return brwCreateContext(api, mesaVis, driContextPriv, sharedContextPrivate); #endif - fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); + fprintf(stderr, "Unrecognized deviceID 0x%x\n", intelScreen->deviceID); return GL_FALSE; } @@ -462,7 +462,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen) __DRIscreen *spriv = intelScreen->driScrnPriv; int num_fences = 0; - intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; + intelScreen->no_hw = (getenv("INTEL_NO_HW") != NULL || + getenv("INTEL_DEVID_OVERRIDE") != NULL); intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); if (intelScreen->bufmgr == NULL) { @@ -497,6 +498,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) GLenum fb_format[3]; GLenum fb_type[3]; unsigned int api_mask; + char *devid_override; static const GLenum back_buffer_modes[] = { GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML @@ -523,6 +525,16 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) &intelScreen->deviceID)) return GL_FALSE; + /* Allow an override of the device ID for the purpose of making the + * driver produce dumps for debugging of new chipset enablement. + * This implies INTEL_NO_HW, to avoid programming your actual GPU + * incorrectly. + */ + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + intelScreen->deviceID = strtod(devid_override, NULL); + } + api_mask = (1 << __DRI_API_OPENGL); #if FEATURE_ES1 api_mask |= (1 << __DRI_API_GLES); diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 9d73a2fb375..f8316ae2f8d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -204,11 +204,13 @@ intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat, * { R, G, 1.0, 1.0 } from a red-green texture would be useful. */ case GL_RED: + case GL_COMPRESSED_RED: case GL_R8: return MESA_FORMAT_R8; case GL_R16: return MESA_FORMAT_R16; case GL_RG: + case GL_COMPRESSED_RG: case GL_RG8: return MESA_FORMAT_RG88; case GL_RG16: diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 8a047e6419b..b62290231b9 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -200,6 +200,7 @@ void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev ) } default: assert(0); + emitsize = 0; } if (!rmesa->radeon.tcl.aos[nr].bo) { rcommon_emit_vector( ctx, diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 8be32ea91fe..1db8678e890 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 2d28b065390..05d3da8a10d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) */ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { + const struct swizzle_data* sd; unsigned int relevant; int j; @@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return 0; - if (!lookup_native_swizzle(reg.Swizzle)) + sd = lookup_native_swizzle(reg.Swizzle); + if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0)) return 0; return 1; @@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); - if (!sd) { + if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) { fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 2f130198d35..e0d349b98ce 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -24,6 +24,7 @@ #include <stdio.h> +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" @@ -54,6 +55,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { struct rc_sub_instruction * inst = &rci->U.I; + unsigned i; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode); if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; @@ -65,27 +68,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) continue; } - switch (inst->Opcode) { - case RC_OPCODE_FRC: - case RC_OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - break; - case RC_OPCODE_ADD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - break; - case RC_OPCODE_CMP: - case RC_OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); - break; - default: - // Scalar instructions needn't be reswizzled - break; + if (!info->IsComponentwise) { + continue; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]); } } } @@ -93,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -133,11 +120,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"emulate loops", 1, !is_r500, rc_emulate_loops, NULL}, {"dataflow optimize", 1, opt, rc_optimize, NULL}, {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ - {"register rename", 1, !is_r500, rc_rename_regs, NULL}, + {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"register allocation", 1, opt, rc_pair_regalloc, NULL}, @@ -150,9 +137,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_FRAGMENT_PROGRAM; c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps; - rc_run_compiler(&c->Base, fs_list, "Fragment Program"); + rc_run_compiler(&c->Base, fs_list); rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index bf8341f0173..472029f63d0 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -26,6 +26,7 @@ #include "../r300_reg.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "radeon_swizzle.h" @@ -790,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) if (!hwtemps[j]) break; } - if (j >= c->max_temp_regs) { - rc_error(c, "Too many temporaries\n"); - return; + ta[orig].Allocated = 1; + if (last_inst_src_reladdr && + last_inst_src_reladdr->IP > inst->IP) { + ta[orig].HwTemp = orig; } else { - ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } - hwtemps[ta[orig].HwTemp] = 1; + ta[orig].HwTemp = j; } + hwtemps[ta[orig].HwTemp] = 1; } inst->U.I.DstReg.Index = ta[orig].HwTemp; @@ -1018,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { int is_r500 = c->Base.is_r500; - int kill_consts = c->Base.remove_unused_constants; int opt = !c->Base.disable_optimizations; /* Lists of instruction transformations. */ @@ -1062,18 +1057,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"dataflow optimize", 1, opt, rc_optimize, NULL}, /* This pass must be done after optimizations. */ {"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts}, - {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL}, {"register allocation", 1, opt, allocate_temporary_registers, NULL}, - {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, + {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, {NULL, 0, 0, NULL, NULL} }; + c->Base.type = RC_VERTEX_PROGRAM; c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - rc_run_compiler(&c->Base, vs_list, "Vertex Program"); + rc_run_compiler(&c->Base, vs_list); c->code->InputsRead = c->Base.Program.InputsRead; c->code->OutputsWritten = c->Base.Program.OutputsWritten; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 289bb87ae59..ef81be48f77 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -29,6 +29,7 @@ #include <stdio.h> +#include "radeon_compiler_util.h" #include "../r300_reg.h" /** diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 6f101c68eb6..5da82d90f67 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -45,9 +45,6 @@ #include "radeon_program_pair.h" -#define MAX_BRANCH_DEPTH_FULL 32 -#define MAX_BRANCH_DEPTH_PARTIAL 4 - #define PROG_CODE \ struct r500_fragment_program_code *code = &c->code->code.r500 @@ -200,6 +197,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src) { + if (!src.Used) + return 0; + if (src.File == RC_FILE_CONSTANT) { return src.Index | 0x100; } else if (src.File == RC_FILE_TEMPORARY) { @@ -506,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst break; } case RC_OPCODE_IF: - if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { + if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); return; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index cfb6df2cd79..b69e81698ae 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -34,6 +34,8 @@ #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 +#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32 +#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 4286baed0c6..65548604bcc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -29,6 +29,7 @@ #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" void rc_init(struct radeon_compiler * c) @@ -356,66 +357,92 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) static void reg_count_callback(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { - unsigned int * max_reg = userdata; + int *max_reg = userdata; if (file == RC_FILE_TEMPORARY) - index > *max_reg ? *max_reg = index : 0; + (int)index > *max_reg ? *max_reg = index : 0; } -static void print_stats(struct radeon_compiler * c) +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) { + int max_reg = -1; struct rc_instruction * tmp; - unsigned max_reg, insts, fc, tex, alpha, rgb, presub; - max_reg = insts = fc = tex = alpha = rgb = presub = 0; + memset(s, 0, sizeof(*s)); + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; tmp = tmp->Next){ const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) - presub++; + s->num_presub_ops++; info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) - presub++; + s->num_presub_ops++; /* Assuming alpha will never be a flow control or * a tex instruction. */ if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) - alpha++; + s->num_alpha_insts++; if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) - rgb++; + s->num_rgb_insts++; info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); } if (info->IsFlowControl) - fc++; + s->num_fc_insts++; if (info->HasTexture) - tex++; - insts++; + s->num_tex_insts++; + s->num_insts++; } - if (insts < 4) - return; - fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" - "~%4u Instructions\n" - "~%4u Vector Instructions (RGB)\n" - "~%4u Scalar Instructions (Alpha)\n" - "~%4u Flow Control Instructions\n" - "~%4u Texture Instructions\n" - "~%4u Presub Operations\n" - "~%4u Temporary Registers\n" - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - insts, rgb, alpha, fc, tex, presub, max_reg + 1); + s->num_temp_regs = max_reg + 1; } -/* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name) +static void print_stats(struct radeon_compiler * c) { - if (c->Debug & RC_DBG_LOG) { - fprintf(stderr, "%s: before compilation\n", shader_name); - rc_print_program(&c->Program); + struct rc_program_stats s; + + rc_get_stats(c, &s); + + if (s.num_insts < 4) + return; + + switch (c->type) { + case RC_VERTEX_PROGRAM: + fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Flow Control Instructions\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_fc_insts, s.num_temp_regs); + break; + + case RC_FRAGMENT_PROGRAM: + fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", + s.num_insts, s.num_rgb_insts, s.num_alpha_insts, + s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, + s.num_temp_regs); + break; + default: + assert(0); } +} +static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { + "Vertex Program", + "Fragment Program" +}; + +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ for (unsigned i = 0; list[i].name; i++) { if (list[i].predicate) { list[i].run(c, list[i].user); @@ -424,11 +451,23 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis return; if ((c->Debug & RC_DBG_LOG) && list[i].dump) { - fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name); + fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); rc_print_program(&c->Program); } } } +} + +/* Executes a list of compiler passes given in the parameter 'list'. */ +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) +{ + if (c->Debug & RC_DBG_LOG) { + fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); + rc_print_program(&c->Program); + } + + rc_run_compiler_passes(c, list); + if (c->Debug & RC_DBG_STATS) print_stats(c); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 31fd469a04f..e6633395895 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -35,9 +35,16 @@ struct rc_swizzle_caps; +enum rc_program_type { + RC_VERTEX_PROGRAM, + RC_FRAGMENT_PROGRAM, + RC_NUM_PROGRAM_TYPES +}; + struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; + enum rc_program_type type; unsigned Debug:2; unsigned Error:1; char * ErrorMsg; @@ -140,9 +147,21 @@ struct radeon_compiler_pass { void *user; /* Optional parameter which is passed to the run function. */ }; +struct rc_program_stats { + unsigned num_insts; + unsigned num_fc_insts; + unsigned num_tex_insts; + unsigned num_rgb_insts; + unsigned num_alpha_insts; + unsigned num_presub_ops; + unsigned num_temp_regs; +}; + +void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s); + /* Executes a list of compiler passes given in the parameter 'list'. */ -void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, - const char *shader_name); +void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list); +void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list); void rc_validate_final_shader(struct radeon_compiler *c, void *user); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 97f4c758492..bf393a9fb16 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -31,6 +31,8 @@ #include "radeon_compiler_util.h" +#include "radeon_compiler.h" +#include "radeon_dataflow.h" /** */ unsigned int rc_swizzle_to_writemask(unsigned int swz) @@ -46,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz) return mask; } +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) +{ + if (idx & 0x4) + return idx; + return GET_SWZ(swz, idx); +} + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) +{ + unsigned int ret = 0; + + ret |= get_swz(src, swz_x); + ret |= get_swz(src, swz_y) << 3; + ret |= get_swz(src, swz_z) << 6; + ret |= get_swz(src, swz_w) << 9; + + return ret; +} + +unsigned int combine_swizzles(unsigned int src, unsigned int swz) +{ + unsigned int ret = 0; + + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; + + return ret; +} + +/** + * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W + */ +rc_swizzle rc_mask_to_swizzle(unsigned int mask) +{ + switch (mask) { + case RC_MASK_X: return RC_SWIZZLE_X; + case RC_MASK_Y: return RC_SWIZZLE_Y; + case RC_MASK_Z: return RC_SWIZZLE_Z; + case RC_MASK_W: return RC_SWIZZLE_W; + } + return RC_SWIZZLE_UNUSED; +} + +/* Reorder mask bits according to swizzle. */ +unsigned swizzle_mask(unsigned swizzle, unsigned mask) +{ + unsigned ret = 0; + for (unsigned chan = 0; chan < 4; ++chan) { + unsigned swz = GET_SWZ(swizzle, chan); + if (swz < 4) + ret |= GET_BIT(mask, swz) << chan; + } + return ret; +} + +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + +void reset_srcreg(struct rc_src_register* reg) +{ + memset(reg, 0, sizeof(struct rc_src_register)); + reg->Swizzle = RC_SWIZZLE_XYZW; +} + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, @@ -59,3 +146,123 @@ unsigned int rc_src_reads_dst_mask( } return dst_mask & rc_swizzle_to_writemask(src_swz); } + +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +{ + unsigned int chan; + unsigned int swz = RC_SWIZZLE_UNUSED; + unsigned int ret = RC_SOURCE_NONE; + + for(chan = 0; chan < channels; chan++) { + swz = GET_SWZ(swizzle, chan); + if (swz == RC_SWIZZLE_W) { + ret |= RC_SOURCE_ALPHA; + } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y + || swz == RC_SWIZZLE_Z) { + ret |= RC_SOURCE_RGB; + } + } + return ret; +} + +unsigned int rc_source_type_mask(unsigned int mask) +{ + unsigned int ret = RC_SOURCE_NONE; + + if (mask & RC_MASK_XYZ) + ret |= RC_SOURCE_RGB; + + if (mask & RC_MASK_W) + ret |= RC_SOURCE_ALPHA; + + return ret; +} + +struct can_use_presub_data { + struct rc_src_register RemoveSrcs[3]; + unsigned int RGBCount; + unsigned int AlphaCount; +}; + +static void can_use_presub_read_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct can_use_presub_data * d = userdata; + unsigned int src_type = rc_source_type_mask(mask); + unsigned int i; + + if (file == RC_FILE_NONE) + return; + + for(i = 0; i < 3; i++) { + if (d->RemoveSrcs[i].File == file + && d->RemoveSrcs[i].Index == index) { + src_type &= + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + } + } + + if (src_type & RC_SOURCE_RGB) + d->RGBCount++; + + if (src_type & RC_SOURCE_ALPHA) + d->AlphaCount++; +} + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1) +{ + struct can_use_presub_data d; + unsigned int num_presub_srcs; + unsigned int presub_src_type = rc_source_type_mask(presub_writemask); + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + + if (presub_op == RC_PRESUB_NONE) { + return 1; + } + + if (info->HasTexture) { + return 0; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + return 0; + } + + memset(&d, 0, sizeof(d)); + d.RemoveSrcs[0] = replace_reg; + d.RemoveSrcs[1] = presub_src0; + d.RemoveSrcs[2] = presub_src1; + + rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d); + + num_presub_srcs = rc_presubtract_src_reg_count(presub_op); + + if ((presub_src_type & RC_SOURCE_RGB) + && d.RGBCount + num_presub_srcs > 3) { + return 0; + } + + if ((presub_src_type & RC_SOURCE_ALPHA) + && d.AlphaCount + num_presub_srcs > 3) { + return 0; + } + + return 1; +} + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 1a14e7cb0ef..461ab9ffb10 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,8 +3,27 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +struct rc_instruction; +struct rc_src_register; + unsigned int rc_swizzle_to_writemask(unsigned int swz); +rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); + +unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, + rc_swizzle swz_z, rc_swizzle swz_w); + +unsigned int combine_swizzles(unsigned int src, unsigned int swz); + +rc_swizzle rc_mask_to_swizzle(unsigned int mask); + +unsigned swizzle_mask(unsigned swizzle, unsigned mask); + +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +void reset_srcreg(struct rc_src_register* reg); + unsigned int rc_src_reads_dst_mask( rc_register_file src_file, unsigned int src_idx, @@ -13,4 +32,16 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); +unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); + +unsigned int rc_source_type_mask(unsigned int mask); + +unsigned int rc_inst_can_use_presub( + struct rc_instruction * inst, + rc_presubtract_op presub_op, + unsigned int presub_writemask, + struct rc_src_register replace_reg, + struct rc_src_register presub_src0, + struct rc_src_register presub_src1); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index fd94194dc34..d0a64d936e0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -139,7 +139,46 @@ static void pair_sub_for_all_args( const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); for(i = 0; i < info->NumSrcRegs; i++) { - cb(userdata, fullinst, &sub->Arg[i]); + unsigned int src_type; + unsigned int channels = 0; + if (&fullinst->U.P.RGB == sub) + channels = 3; + else if (&fullinst->U.P.Alpha == sub) + channels = 1; + + assert(channels > 0); + src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + if (src_type == RC_SOURCE_NONE) + continue; + + if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) { + unsigned int presub_type; + unsigned int presub_src_count; + struct rc_pair_instruction_source * src_array; + unsigned int j; + if (src_type & RC_SOURCE_RGB) { + presub_type = fullinst-> + U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.RGB.Src; + } else { + presub_type = fullinst-> + U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index; + src_array = fullinst->U.P.Alpha.Src; + } + presub_src_count + = rc_presubtract_src_reg_count(presub_type); + for(j = 0; j < presub_src_count; j++) { + cb(userdata, fullinst, &sub->Arg[i], + &src_array[j]); + } + } else { + struct rc_pair_instruction_source * src = + rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); + if (src) { + cb(userdata, fullinst, &sub->Arg[i], src); + } + } } } @@ -308,6 +347,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + unsigned int remapped_presub = 0; if (opcode->HasDstReg) { rc_register_file file = inst->DstReg.File; @@ -327,6 +367,12 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, unsigned int i; unsigned int srcp_srcs = rc_presubtract_src_reg_count( inst->PreSub.Opcode); + /* Make sure we only remap presubtract sources once in + * case more than one source register reads the + * presubtract result. */ + if (remapped_presub) + continue; + for(i = 0; i < srcp_srcs; i++) { file = inst->PreSub.SrcReg[i].File; index = inst->PreSub.SrcReg[i].Index; @@ -334,7 +380,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst, inst->PreSub.SrcReg[i].File = file; inst->PreSub.SrcReg[i].Index = index; } - + remapped_presub = 1; } else { cb(userdata, fullinst, &file, &index); @@ -430,12 +476,29 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst) } +struct branch_write_mask { + unsigned int IfWriteMask:4; + unsigned int ElseWriteMask:4; + unsigned int HasElse:1; +}; + +union get_readers_read_cb { + rc_read_src_fn I; + rc_pair_read_arg_fn P; +}; + struct get_readers_callback_data { struct radeon_compiler * C; struct rc_reader_data * ReaderData; - rc_read_src_fn ReadCB; + rc_read_src_fn ReadNormalCB; + rc_pair_read_arg_fn ReadPairCB; rc_read_write_mask_fn WriteCB; + rc_register_file DstFile; + unsigned int DstIndex; + unsigned int DstMask; unsigned int AliveWriteMask; + /* For convenience, this is indexed starting at 1 */ + struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1]; }; static void add_reader( @@ -443,7 +506,7 @@ static void add_reader( struct rc_reader_data * data, struct rc_instruction * inst, unsigned int mask, - struct rc_src_register * src) + void * arg_or_src) { struct rc_reader * new; memory_pool_array_reserve(pool, struct rc_reader, data->Readers, @@ -451,7 +514,74 @@ static void add_reader( new = &data->Readers[data->ReaderCount++]; new->Inst = inst; new->WriteMask = mask; - new->Src = src; + if (inst->Type == RC_INSTRUCTION_NORMAL) { + new->U.Src = arg_or_src; + } else { + new->U.Arg = arg_or_src; + } +} + +static unsigned int get_readers_read_callback( + struct get_readers_callback_data * cb_data, + unsigned int has_rel_addr, + rc_register_file file, + unsigned int index, + unsigned int swizzle) +{ + unsigned int shared_mask, read_mask; + + if (has_rel_addr) { + cb_data->ReaderData->Abort = 1; + return RC_MASK_NONE; + } + + shared_mask = rc_src_reads_dst_mask(file, index, swizzle, + cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask); + + if (shared_mask == RC_MASK_NONE) + return shared_mask; + + /* If we make it this far, it means that this source reads from the + * same register written to by d->ReaderData->Writer. */ + + read_mask = rc_swizzle_to_writemask(swizzle); + if (cb_data->ReaderData->AbortOnRead & read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + /* XXX The behavior in this case should be configurable. */ + if ((read_mask & cb_data->AliveWriteMask) != read_mask) { + cb_data->ReaderData->Abort = 1; + return shared_mask; + } + + return shared_mask; +} + +static void get_readers_pair_read_callback( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int shared_mask; + struct get_readers_callback_data * d = userdata; + + shared_mask = get_readers_read_callback(d, + 0 /*Pair Instructions don't use RelAddr*/, + src->File, src->Index, arg->Swizzle); + + if (shared_mask == RC_MASK_NONE) + return; + + if (d->ReadPairCB) + d->ReadPairCB(d->ReaderData, inst, arg, src); + + if (d->ReaderData->Abort) + return; + + add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg); } /** @@ -464,37 +594,18 @@ static void get_readers_normal_read_callback( struct rc_src_register * src) { struct get_readers_callback_data * d = userdata; - unsigned int read_mask; unsigned int shared_mask; - if (src->RelAddr) - d->ReaderData->Abort = 1; - - shared_mask = rc_src_reads_dst_mask(src->File, src->Index, - src->Swizzle, - d->ReaderData->Writer->U.I.DstReg.File, - d->ReaderData->Writer->U.I.DstReg.Index, - d->AliveWriteMask); + shared_mask = get_readers_read_callback(d, + src->RelAddr, src->File, src->Index, src->Swizzle); if (shared_mask == RC_MASK_NONE) return; + /* The callback function could potentially clear d->ReaderData->Abort, + * so we need to call it before we return. */ + if (d->ReadNormalCB) + d->ReadNormalCB(d->ReaderData, inst, src); - /* If we make it this far, it means that this source reads from the - * same register written to by d->ReaderData->Writer. */ - - if (d->ReaderData->AbortOnRead) { - d->ReaderData->Abort = 1; - return; - } - - read_mask = rc_swizzle_to_writemask(src->Swizzle); - /* XXX The behavior in this case should be configurable. */ - if ((read_mask & d->AliveWriteMask) != read_mask) { - d->ReaderData->Abort = 1; - return; - } - - d->ReadCB(d->ReaderData, inst, src); if (d->ReaderData->Abort) return; @@ -515,29 +626,132 @@ static void get_readers_write_callback( { struct get_readers_callback_data * d = userdata; - if (index == d->ReaderData->Writer->U.I.DstReg.Index - && file == d->ReaderData->Writer->U.I.DstReg.File) { - unsigned int shared_mask = mask - & d->ReaderData->Writer->U.I.DstReg.WriteMask; - if (d->ReaderData->InElse) { - if (shared_mask & d->AliveWriteMask) { - /* We set AbortOnRead here because the - * destination register of d->ReaderData->Writer - * is written to in both the IF and the - * ELSE block of this IF/ELSE statement. - * This means that readers of this - * destination register that follow this IF/ELSE - * statement use the value of different - * instructions depending on the control flow - * decisions made by the program. */ - d->ReaderData->AbortOnRead = 1; + if (index == d->DstIndex && file == d->DstFile) { + unsigned int shared_mask = mask & d->DstMask; + d->ReaderData->AbortOnRead &= ~shared_mask; + d->AliveWriteMask &= ~shared_mask; + } + + if(d->WriteCB) + d->WriteCB(d->ReaderData, inst, file, index, mask); +} + +static void get_readers_for_single_write( + void * userdata, + struct rc_instruction * writer, + rc_register_file dst_file, + unsigned int dst_index, + unsigned int dst_mask) +{ + struct rc_instruction * tmp; + unsigned int branch_depth = 0; + struct get_readers_callback_data * d = userdata; + + d->ReaderData->Writer = writer; + d->ReaderData->AbortOnRead = 0; + d->ReaderData->InElse = 0; + d->DstFile = dst_file; + d->DstIndex = dst_index; + d->DstMask = dst_mask; + d->AliveWriteMask = dst_mask; + memset(d->BranchMasks, 0, sizeof(d->BranchMasks)); + + if (!dst_mask) + return; + + for(tmp = writer->Next; tmp != &d->C->Program.Instructions; + tmp = tmp->Next){ + rc_opcode opcode = get_flow_control_inst(tmp); + switch(opcode) { + case RC_OPCODE_BGNLOOP: + /* XXX We can do better when we see a BGNLOOP if we + * add a flag called AbortOnWrite to struct + * rc_reader_data and leave it set until the next + * ENDLOOP. */ + case RC_OPCODE_ENDLOOP: + /* XXX We can do better when we see an ENDLOOP by + * searching backwards from writer and looking for + * readers of writer's destination index. If we find a + * reader before we get to the BGNLOOP, we must abort + * unless there is another writer between that reader + * and the BGNLOOP. */ + case RC_OPCODE_BRK: + case RC_OPCODE_CONT: + d->ReaderData->Abort = 1; + return; + case RC_OPCODE_IF: + branch_depth++; + if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) { + d->ReaderData->Abort = 1; + return; + } + d->BranchMasks[branch_depth].IfWriteMask = + d->AliveWriteMask; + break; + case RC_OPCODE_ELSE: + if (branch_depth == 0) { + d->ReaderData->InElse = 1; + } else { + unsigned int temp_mask = d->AliveWriteMask; + d->AliveWriteMask = + d->BranchMasks[branch_depth].IfWriteMask; + d->BranchMasks[branch_depth].ElseWriteMask = + temp_mask; + d->BranchMasks[branch_depth].HasElse = 1; } + break; + case RC_OPCODE_ENDIF: + if (branch_depth == 0) { + d->ReaderData->AbortOnRead = d->AliveWriteMask; + d->ReaderData->InElse = 0; + } + else { + struct branch_write_mask * masks = + &d->BranchMasks[branch_depth]; + + if (masks->HasElse) { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~masks->ElseWriteMask; + d->AliveWriteMask = masks->IfWriteMask + ^ ((masks->IfWriteMask ^ + masks->ElseWriteMask) + & (masks->IfWriteMask + ^ d->AliveWriteMask)); + } else { + d->ReaderData->AbortOnRead |= + masks->IfWriteMask + & ~d->AliveWriteMask; + d->AliveWriteMask = masks->IfWriteMask; + + } + memset(masks, 0, + sizeof(struct branch_write_mask)); + branch_depth--; + } + break; + default: + break; + } + + if (d->ReaderData->InElse) + continue; + + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + rc_for_all_reads_src(tmp, + get_readers_normal_read_callback, d); } else { - d->AliveWriteMask &= ~shared_mask; + rc_pair_for_all_reads_arg(tmp, + get_readers_pair_read_callback, d); } - } + rc_for_all_writes_mask(tmp, get_readers_write_callback, d); - d->WriteCB(d->ReaderData, inst, file, index, mask); + if (d->ReaderData->Abort) + return; + + if (branch_depth == 0 && !d->AliveWriteMask) + return; + } } /** @@ -578,83 +792,26 @@ static void get_readers_write_callback( * @param write_cb This function will be called for every instruction after * writer. */ -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, struct rc_instruction * writer, struct rc_reader_data * data, - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb) { - struct rc_instruction * tmp; struct get_readers_callback_data d; - unsigned int branch_depth = 0; - data->Writer = writer; data->Abort = 0; - data->AbortOnRead = 0; - data->InElse = 0; data->ReaderCount = 0; data->ReadersReserved = 0; data->Readers = NULL; d.C = c; - d.AliveWriteMask = writer->U.I.DstReg.WriteMask; d.ReaderData = data; - d.ReadCB = read_cb; + d.ReadNormalCB = read_normal_cb; + d.ReadPairCB = read_pair_cb; d.WriteCB = write_cb; - if (!writer->U.I.DstReg.WriteMask) - return; - - for(tmp = writer->Next; tmp != &c->Program.Instructions; - tmp = tmp->Next){ - rc_opcode opcode = get_flow_control_inst(tmp); - switch(opcode) { - case RC_OPCODE_BGNLOOP: - /* XXX We can do better when we see a BGNLOOP if we - * add a flag called AbortOnWrite to struct - * rc_reader_data and leave it set until the next - * ENDLOOP. */ - case RC_OPCODE_ENDLOOP: - /* XXX We can do better when we see an ENDLOOP by - * searching backwards from writer and looking for - * readers of writer's destination index. If we find a - * reader before we get to the BGNLOOP, we must abort - * unless there is another writer between that reader - * and the BGNLOOP. */ - data->Abort = 1; - return; - case RC_OPCODE_IF: - /* XXX We can do better here, but this will have to - * do until this dataflow analysis is more mature. */ - data->Abort = 1; - branch_depth++; - break; - case RC_OPCODE_ELSE: - if (branch_depth == 0) - data->InElse = 1; - break; - case RC_OPCODE_ENDIF: - if (branch_depth == 0) { - data->AbortOnRead = 1; - data->InElse = 0; - } - else { - branch_depth--; - } - break; - default: - break; - } - - if (!data->InElse) - rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d); - rc_for_all_writes_mask(tmp, get_readers_write_callback, &d); - - if (data->Abort) - return; - - if (!d.AliveWriteMask) - return; - } + rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 7de6b98f763..ef971c5b234 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -36,6 +36,7 @@ struct rc_instruction; struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; +struct rc_pair_instruction_source; struct rc_compiler; @@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb, void * userdata); typedef void (*rc_pair_read_arg_fn)(void * userdata, - struct rc_instruction * inst, struct rc_pair_instruction_arg * arg); + struct rc_instruction * inst, struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src); void rc_pair_for_all_reads_arg(struct rc_instruction * inst, rc_pair_read_arg_fn cb, void * userdata); @@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v struct rc_reader { struct rc_instruction * Inst; unsigned int WriteMask; - struct rc_src_register * Src; + union { + struct rc_src_register * Src; + struct rc_pair_instruction_arg * Arg; + } U; }; struct rc_reader_data { @@ -87,14 +92,13 @@ struct rc_reader_data { void * CbData; }; -void rc_get_readers_normal( +void rc_get_readers( struct radeon_compiler * c, - struct rc_instruction * inst, + struct rc_instruction * writer, struct rc_reader_data * data, - /*XXX: These should be their own function types. */ - rc_read_src_fn read_cb, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); - /** * Compiler passes based on dataflow analysis. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index da495a3afaa..25afd272bee 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_CLAMP, + .Name = "CLAMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, @@ -453,6 +460,7 @@ void rc_compute_sources_for_writemask( srcmasks[1] |= RC_MASK_XY; break; case RC_OPCODE_DP3: + case RC_OPCODE_XPD: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; break; @@ -460,6 +468,10 @@ void rc_compute_sources_for_writemask( srcmasks[0] |= RC_MASK_XYZW; srcmasks[1] |= RC_MASK_XYZW; break; + case RC_OPCODE_DPH: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZW; + break; case RC_OPCODE_TXB: case RC_OPCODE_TXP: srcmasks[0] |= RC_MASK_W; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index d3f639c8701..7e666101276 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -50,6 +50,9 @@ typedef enum { /** vec4 instruction: dst.c = ceil(src0.c) */ RC_OPCODE_CEIL, + /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */ + RC_OPCODE_CLAMP, + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ RC_OPCODE_CMP, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 15b9c5e7dc3..44f4c0fbdc7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct combine.Negate = outer.Negate; } else { combine.Abs = inner.Abs; - combine.Negate = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(outer.Swizzle, chan); - if (swz < 4) - combine.Negate |= GET_BIT(inner.Negate, swz) << chan; - } + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); combine.Negate ^= outer.Negate; } combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); @@ -71,12 +66,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, { rc_register_file file = src->File; struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); - /* It is possible to do copy propigation in this situation, - * just not right now, see peephole_add_presub_inv() */ - if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE && - (info->NumSrcRegs > 2 || info->HasTexture)) { + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + *src, + reader_data->Writer->U.I.PreSub.SrcReg[0], + reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -112,11 +108,11 @@ static void src_clobbered_reads_cb( && src->Index == sc_data->Index && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { - sc_data->ReaderData->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { - sc_data->ReaderData->AbortOnRead = 1; + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } } @@ -149,8 +145,9 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i return; /* Get a list of all the readers of this MOV instruction. */ - rc_get_readers_normal(c, inst_mov, &reader_data, - copy_propagate_scan_read, is_src_clobbered_scan_write); + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return; @@ -158,7 +155,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]); + *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]); if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) inst->U.I.PreSub = inst_mov->U.I.PreSub; @@ -423,24 +420,13 @@ static void presub_scan_read( struct rc_src_register * src) { struct rc_reader_data * reader_data = data; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. */ - if (info->NumSrcRegs > 2 || info->HasTexture) { - reader_data->Abort = 1; - return; - } + rc_presubtract_op * presub_opcode = reader_data->CbData; - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. - * XXX For now we will limit instructions to only one presubtract - * value.*/ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + *src, + reader_data->Writer->U.I.SrcReg[0], + reader_data->Writer->U.I.SrcReg[1])) { reader_data->Abort = 1; return; } @@ -454,8 +440,10 @@ static int presub_helper( { struct rc_reader_data reader_data; unsigned int i; + rc_presubtract_op cb_op = presub_opcode; - rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read, + reader_data.CbData = &cb_op; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) @@ -468,7 +456,7 @@ static int presub_helper( rc_get_opcode_info(reader.Inst->U.I.Opcode); for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { - if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src) + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src) presub_replace(inst_add, reader.Inst, src_index); } } @@ -505,7 +493,9 @@ static void presub_replace_add( inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } -static int is_presub_candidate(struct rc_instruction * inst) +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; @@ -514,7 +504,12 @@ static int is_presub_candidate(struct rc_instruction * inst) return 0; for(i = 0; i < info->NumSrcRegs; i++) { - if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg)) + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) return 0; } return 1; @@ -528,7 +523,7 @@ static int peephole_add_presub_add( struct rc_src_register * src1 = NULL; unsigned int i; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) @@ -592,7 +587,7 @@ static int peephole_add_presub_inv( { unsigned int i, swz, mask; - if (!is_presub_candidate(inst_add)) + if (!is_presub_candidate(c, inst_add)) return 0; mask = inst_add->U.I.DstReg.WriteMask; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 91524f5ec68..d53181e1f75 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -66,10 +66,13 @@ struct regalloc_state { struct hardware_register * HwTemporary; unsigned int NumHwTemporaries; /** - * If an instruction is inside of a loop, end_loop will be the - * IP of the ENDLOOP instruction, otherwise end_loop will be 0 + * If an instruction is inside of a loop, EndLoop will be the + * IP of the ENDLOOP instruction, and BeginLoop will be the IP + * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop + * will be -1. */ - int end_loop; + int EndLoop; + int BeginLoop; }; static void print_live_intervals(struct live_intervals * src) @@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst, reg->Used = 1; if (file == RC_FILE_INPUT) reg->Live.Start = -1; + else if (s->BeginLoop >= 0) + reg->Live.Start = s->BeginLoop; else reg->Live.Start = inst->IP; reg->Live.End = inst->IP; - } else if (s->end_loop) - reg->Live.End = s->end_loop; + } else if (s->EndLoop >= 0) + reg->Live.End = s->EndLoop; else if (inst->IP > reg->Live.End) reg->Live.End = inst->IP; } @@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c, memset(s, 0, sizeof(*s)); s->C = c; s->NumHwTemporaries = c->max_temp_regs; + s->BeginLoop = -1; + s->EndLoop = -1; s->HwTemporary = memory_pool_malloc(&c->Pool, s->NumHwTemporaries * sizeof(struct hardware_register)); @@ -207,10 +214,12 @@ static void compute_live_intervals(struct radeon_compiler *c, inst = inst->Next) { /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { + * instruction is used as the end of the live interval and + * the BGNLOOP instruction is used as the beginning. */ + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { int loops = 1; struct rc_instruction * tmp; + s->BeginLoop = inst->IP; for(tmp = inst->Next; tmp != &s->C->Program.Instructions; tmp = tmp->Next) { @@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c, } else if (tmp->U.I.Opcode == RC_OPCODE_ENDLOOP) { if(!--loops) { - s->end_loop = tmp->IP; + s->EndLoop = tmp->IP; break; } } } } - if (inst->IP == s->end_loop) - s->end_loop = 0; + if (inst->IP == s->EndLoop) { + s->EndLoop = -1; + s->BeginLoop = -1; + } rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 553e9dcf7c1..9beb5d63579 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" @@ -54,6 +55,11 @@ struct schedule_instruction { * this instruction can be scheduled. */ unsigned int NumDependencies:5; + + /** List of all readers (see rc_get_readers() for the definition of + * "all readers"), even those outside the basic block this instruction + * lives in. */ + struct rc_reader_data GlobalReaders; }; @@ -94,6 +100,16 @@ struct register_state { struct reg_value * Values[4]; }; +struct remap_reg { + struct rc_instruciont * Inst; + unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int OldSwizzle:3; + unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1); + unsigned int NewSwizzle:3; + unsigned int OnlyTexReads:1; + struct remap_reg * Next; +}; + struct schedule_state { struct radeon_compiler * C; struct schedule_instruction * Current; @@ -126,15 +142,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s, return &s->Temporary[index].Values[chan]; } -static struct reg_value * get_reg_value(struct schedule_state * s, - rc_register_file file, unsigned int index, unsigned int chan) -{ - struct reg_value ** pv = get_reg_valuep(s, file, index, chan); - if (!pv) - return 0; - return *pv; -} - static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) { inst->NextReady = *list; @@ -295,12 +302,12 @@ static int merge_presub_sources( assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP); switch(type) { - case RC_PAIR_SOURCE_RGB: + case RC_SOURCE_RGB: is_rgb = 1; is_alpha = 0; dst_sub = &dst_full->RGB; break; - case RC_PAIR_SOURCE_ALPHA: + case RC_SOURCE_ALPHA: is_rgb = 0; is_alpha = 1; dst_sub = &dst_full->Alpha; @@ -341,6 +348,8 @@ static int merge_presub_sources( continue; free_source = rc_pair_alloc_source(dst_full, is_rgb, is_alpha, temp.File, temp.Index); + if (free_source < 0) + return 0; one_way = 1; } else { dst_sub->Src[free_source] = temp; @@ -356,11 +365,11 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_that_arg_reads( - dst_full->RGB.Arg[arg].Source, - dst_full->RGB.Arg[arg].Swizzle) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, + 3) & type)) { continue; } + if (dst_full->RGB.Arg[arg].Source == srcp_src) dst_full->RGB.Arg[arg].Source = free_source; /* We need to do this just in case register @@ -392,13 +401,13 @@ static int destructive_merge_instructions( /* Merge the rgb presubtract registers. */ if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) { - if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) { + if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) { return 0; } } /* Merge the alpha presubtract registers */ if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) { - if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){ + if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){ return 0; } } @@ -525,6 +534,222 @@ static void presub_nop(struct rc_instruction * emitted) { } } } + +static void rgb_to_alpha_remap ( + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + rc_register_file old_file, + rc_swizzle old_swz, + unsigned int new_index) +{ + int new_src_index; + unsigned int i; + struct rc_pair_instruction_source * old_src = + rc_pair_get_src(&inst->U.P, arg); + if (!old_src) { + return; + } + + for (i = 0; i < 3; i++) { + if (get_swz(arg->Swizzle, i) == old_swz) { + SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); + } + } + memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); + new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, + old_file, new_index); + /* This conversion is not possible, we must have made a mistake in + * is_rgb_to_alpha_possible. */ + if (new_src_index < 0) { + assert(0); + return; + } + + arg->Source = new_src_index; +} + +static int can_remap(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static int can_convert_opcode_to_alpha(unsigned int opcode) +{ + switch(opcode) { + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DPH: + return 0; + default: + return 1; + } +} + +static void is_rgb_to_alpha_possible( + void * userdata, + struct rc_instruction * inst, + struct rc_pair_instruction_arg * arg, + struct rc_pair_instruction_source * src) +{ + unsigned int chan_count = 0; + unsigned int alpha_sources = 0; + unsigned int i; + struct rc_reader_data * reader_data = userdata; + + if (!can_remap(inst->U.P.RGB.Opcode) + || !can_remap(inst->U.P.Alpha.Opcode)) { + reader_data->Abort = 1; + return; + } + + if (!src) + return; + + /* XXX There are some cases where we can still do the conversion if + * a reader reads from a presubtract source, but for now we'll prevent + * it. */ + if (arg->Source == RC_PAIR_PRESUB_SRC) { + reader_data->Abort = 1; + return; + } + + /* Make sure the source only reads from one component. + * XXX We should allow the source to read from the same component twice. + * XXX If the index we will be converting to is the same as the + * current index, then it is OK to read from more than one component. + */ + for (i = 0; i < 3; i++) { + rc_swizzle swz = get_swz(arg->Swizzle, i); + switch(swz) { + case RC_SWIZZLE_X: + case RC_SWIZZLE_Y: + case RC_SWIZZLE_Z: + case RC_SWIZZLE_W: + chan_count++; + break; + default: + break; + } + } + if (chan_count > 1) { + reader_data->Abort = 1; + return; + } + + /* Make sure there are enough alpha sources. + * XXX If we know what register all the readers are going + * to be remapped to, then in some situations we can still do + * the subsitution, even if all 3 alpha sources are being used.*/ + for (i = 0; i < 3; i++) { + if (inst->U.P.Alpha.Src[i].Used) { + alpha_sources++; + } + } + if (alpha_sources > 2) { + reader_data->Abort = 1; + return; + } +} + +static int convert_rgb_to_alpha( + struct schedule_state * s, + struct schedule_instruction * sched_inst) +{ + struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P; + unsigned int old_mask = pair_inst->RGB.WriteMask; + unsigned int old_swz = rc_mask_to_swizzle(old_mask); + const struct rc_opcode_info * info = + rc_get_opcode_info(pair_inst->RGB.Opcode); + int new_index = -1; + unsigned int i; + + if (sched_inst->GlobalReaders.Abort) + return 0; + + if (!pair_inst->RGB.WriteMask) + return 0; + + if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode) + || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) { + return 0; + } + + assert(sched_inst->NumWriteValues == 1); + + if (!sched_inst->WriteValues[0]) { + assert(0); + return 0; + } + + /* We start at the old index, because if we can reuse the same + * register and just change the swizzle then it is more likely we + * will be able to convert all the readers. */ + for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) { + struct reg_value ** new_regvalp = get_reg_valuep( + s, RC_FILE_TEMPORARY, i, 3); + if (!*new_regvalp) { + struct reg_value ** old_regvalp = + get_reg_valuep(s, + RC_FILE_TEMPORARY, + pair_inst->RGB.DestIndex, + rc_mask_to_swizzle(old_mask)); + new_index = i; + *new_regvalp = *old_regvalp; + *old_regvalp = NULL; + new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3); + break; + } + } + if (new_index < 0) { + return 0; + } + + pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode; + pair_inst->Alpha.DestIndex = new_index; + pair_inst->Alpha.WriteMask = 1; + pair_inst->Alpha.Target = pair_inst->RGB.Target; + pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask; + pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask; + pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate; + memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg, + sizeof(pair_inst->Alpha.Arg)); + /* Move the swizzles into the first chan */ + for (i = 0; i < info->NumSrcRegs; i++) { + unsigned int j; + for (j = 0; j < 3; j++) { + unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); + if (swz != RC_SWIZZLE_UNUSED) { + pair_inst->Alpha.Arg[i].Swizzle = swz; + break; + } + } + } + pair_inst->RGB.Opcode = RC_OPCODE_NOP; + pair_inst->RGB.DestIndex = 0; + pair_inst->RGB.WriteMask = 0; + pair_inst->RGB.Target = 0; + pair_inst->RGB.OutputWriteMask = 0; + pair_inst->RGB.DepthWriteMask = 0; + pair_inst->RGB.Saturate = 0; + memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg)); + + for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) { + struct rc_reader reader = sched_inst->GlobalReaders.Readers[i]; + rgb_to_alpha_remap(reader.Inst, reader.U.Arg, + RC_FILE_TEMPORARY, old_swz, new_index); + } + return 1; +} + /** * Find a good ALU instruction or pair of ALU instruction and emit it. * @@ -536,24 +761,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor { struct schedule_instruction * sinst; - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - sinst = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - sinst = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); } else { struct schedule_instruction **prgb; struct schedule_instruction **palpha; - + struct schedule_instruction *prev; +pair: /* Some pairings might fail because they require too * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { @@ -572,10 +789,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor goto success; } } - - /* No success in pairing; just take the first RGB instruction */ - sinst = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; + prev = NULL; + /* No success in pairing, now try to convert one of the RGB + * instructions to an Alpha so we can pair it with another RGB. + */ + if (s->ReadyRGB && s->ReadyRGB->NextReady) { + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + if ((*prgb)->NumWriteValues == 1) { + struct schedule_instruction * prgb_next; + if (!convert_rgb_to_alpha(s, *prgb)) + goto cont_loop; + prgb_next = (*prgb)->NextReady; + /* Add instruction to the Alpha ready list. */ + (*prgb)->NextReady = s->ReadyAlpha; + s->ReadyAlpha = *prgb; + /* Remove instruction from the RGB ready list.*/ + if (prev) + prev->NextReady = prgb_next; + else + s->ReadyRGB = prgb_next; + goto pair; + } +cont_loop: + prev = *prgb; + } + } + /* Still no success in pairing, just take the first RGB + * or alpha instruction. */ + if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else if (s->ReadyAlpha) { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } else { + /*XXX Something real bad has happened. */ + assert(0); + } rc_insert_instruction(before->Prev, sinst->Instruction); commit_alu_instruction(s, sinst); @@ -591,13 +841,13 @@ static void scan_read(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan) { struct schedule_state * s = data; - struct reg_value * v = get_reg_value(s, file, index, chan); + struct reg_value ** v = get_reg_valuep(s, file, index, chan); struct reg_value_reader * reader; if (!v) return; - if (v->Writer == s->Current) { + if (*v && (*v)->Writer == s->Current) { /* The instruction reads and writes to a register component. * In this case, we only want to increment dependencies by one. */ @@ -608,16 +858,28 @@ static void scan_read(void * data, struct rc_instruction * inst, reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); reader->Reader = s->Current; - reader->Next = v->Readers; - v->Readers = reader; - v->NumReaders++; - - s->Current->NumDependencies++; + if (!*v) { + /* In this situation, the instruction reads from a register + * that hasn't been written to or read from in the current + * block. */ + *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value)); + memset(*v, 0, sizeof(struct reg_value)); + (*v)->Readers = reader; + } else { + reader->Next = (*v)->Readers; + (*v)->Readers = reader; + /* Only update the current instruction's dependencies if the + * register it reads from has been written to in this block. */ + if ((*v)->Writer) { + s->Current->NumDependencies++; + } + } + (*v)->NumReaders++; if (s->Current->NumReadValues >= 12) { rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); } else { - s->Current->ReadValues[s->Current->NumReadValues++] = v; + s->Current->ReadValues[s->Current->NumReadValues++] = *v; } } @@ -652,6 +914,16 @@ static void scan_write(void * data, struct rc_instruction * inst, } } +static void is_rgb_to_alpha_possible_normal( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = userdata; + reader_data->Abort = 1; + +} + static void schedule_block(struct r300_fragment_program_compiler * c, struct rc_instruction * begin, struct rc_instruction * end) { @@ -683,6 +955,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c, if (!s.Current->NumDependencies) instruction_ready(&s, s.Current); + + /* Get global readers for possible RGB->Alpha conversion. */ + rc_get_readers(s.C, inst, &s.Current->GlobalReaders, + is_rgb_to_alpha_possible_normal, + is_rgb_to_alpha_possible, NULL); } /* Temporarily unlink all instructions */ @@ -711,8 +988,13 @@ static int is_controlflow(struct rc_instruction * inst) void rc_pair_schedule(struct radeon_compiler *cc, void *user) { + struct schedule_state s; + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; struct rc_instruction * inst = c->Base.Program.Instructions.Next; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index c549be52183..fc05366f50e 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, pair->RGB.DestIndex = inst->DstReg.Index; pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; } + if (needalpha) { - pair->Alpha.DestIndex = inst->DstReg.Index; pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + if (pair->Alpha.WriteMask) { + pair->Alpha.DestIndex = inst->DstReg.Index; + } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 24b685fbeb4..fe5756ebc45 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -30,6 +30,7 @@ #include <stdio.h> #include "radeon_compiler.h" +#include "radeon_dataflow.h" /** @@ -70,58 +71,98 @@ void rc_local_transform( } } +struct get_used_temporaries_data { + unsigned char * Used; + unsigned int UsedLength; +}; + +static void get_used_temporaries_cb( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct get_used_temporaries_data * d = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= d->UsedLength) + return; + + d->Used[index] |= mask; +} + /** - * Left multiplication of a register with a swizzle + * This function fills in the parameter 'used' with a writemask that + * represent which components of each temporary register are used by the + * program. This is meant to be combined with rc_find_free_temporary_list as a + * more efficient version of rc_find_free_temporary. + * @param used The function does not initialize this parameter. */ -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length) +{ + struct rc_instruction * inst; + struct get_used_temporaries_data d; + d.Used = used; + d.UsedLength = used_length; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d); + rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d); + } +} + +/* Search a list of used temporaries for a free one + * \sa rc_get_used_temporaries + * @note If this functions finds a free temporary, it will mark it as used + * in the used temporary list (param 'used') + * @param used list of used temporaries + * @param used_length number of items in param 'used' + * @param mask which components must be free in the temporary index that is + * returned. + * @return -1 If there are no more free temporaries, otherwise the index of + * a temporary register where the components specified in param 'mask' are + * not being used. + */ +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask) { - struct rc_src_register tmp = srcreg; int i; - tmp.Swizzle = 0; - tmp.Negate = 0; - for(i = 0; i < 4; ++i) { - rc_swizzle swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); + for(i = 0; i < used_length; i++) { + if ((~used[i] & mask) == mask) { + used[i] |= mask; + return i; } } - return tmp; + return -1; } unsigned int rc_find_free_temporary(struct radeon_compiler * c) { - char used[RC_REGISTER_MAX_INDEX]; - unsigned int i; - struct rc_instruction * rcinst; + unsigned char used[RC_REGISTER_MAX_INDEX]; + int free; memset(used, 0, sizeof(used)); - for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct rc_sub_instruction *inst = &rcinst->U.I; - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); - unsigned int k; - - for (k = 0; k < opcode->NumSrcRegs; k++) { - if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) - used[inst->SrcReg[k].Index] = 1; - } - - if (opcode->HasDstReg) { - if (inst->DstReg.File == RC_FILE_TEMPORARY) - used[inst->DstReg.Index] = 1; - } - } + rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX); - for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (!used[i]) - return i; + free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX, + RC_MASK_XYZW); + if (free < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return 0; } - - rc_error(c, "Ran out of temporary registers\n"); - return 0; + return free; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index f0a77d7b539..df6c94b35f9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -159,47 +159,6 @@ struct rc_program { struct rc_constant_list Constants; }; -static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) -{ - if (idx & 0x4) - return idx; - return GET_SWZ(swz, idx); -} - -static inline unsigned int combine_swizzles4(unsigned int src, - rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) -{ - unsigned int ret = 0; - - ret |= get_swz(src, swz_x); - ret |= get_swz(src, swz_y) << 3; - ret |= get_swz(src, swz_z) << 6; - ret |= get_swz(src, swz_w) << 9; - - return ret; -} - -static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) -{ - unsigned int ret = 0; - - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; - - return ret; -} - -struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); - -static inline void reset_srcreg(struct rc_src_register* reg) -{ - memset(reg, 0, sizeof(struct rc_src_register)); - reg->Swizzle = RC_SWIZZLE_XYZW; -} - - /** * A transformation that can be passed to \ref rc_local_transform. * @@ -222,6 +181,17 @@ void rc_local_transform( struct radeon_compiler *c, void *user); +void rc_get_used_temporaries( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length); + +int rc_find_free_temporary_list( + struct radeon_compiler * c, + unsigned char * used, + unsigned int used_length, + unsigned int mask); + unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); @@ -233,4 +203,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c); void rc_print_program(const struct rc_program *prog); +rc_swizzle rc_mask_to_swizzle(unsigned int mask); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 39408845d5a..58977a40c7c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -36,6 +36,7 @@ #include "radeon_program_alu.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" static struct rc_instruction *emit1( @@ -84,16 +85,6 @@ static struct rc_instruction *emit3( return fpi; } -static struct rc_dst_register dstreg(int file, int index) -{ - struct rc_dst_register dst; - dst.File = file; - dst.Index = index; - dst.WriteMask = RC_MASK_XYZW; - dst.RelAddr = 0; - return dst; -} - static struct rc_dst_register dstregtmpmask(int index, int mask) { struct rc_dst_register dst = {0}; @@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg) return swizzle_smear(reg, RC_SWIZZLE_W); } +static int is_dst_safe_to_reuse(struct rc_instruction *inst) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + assert(info->HasDstReg); + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) + return 0; + + for (i = 0; i < info->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) + return 0; + } + + return 1; +} + +static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + unsigned tmp; + + if (is_dst_safe_to_reuse(inst)) + tmp = inst->U.I.DstReg.Index; + else + tmp = rc_find_free_temporary(c); + + return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); +} + static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c, * ceil(x) = x+frac(-x) */ - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0])); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg)); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); + rc_remove_instruction(inst); +} + +static void transform_CLAMP(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* CLAMP dst, src, min, max + * into: + * MIN tmp, src, max + * MAX dst, tmp, min + */ + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, + inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c, static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c, static void transform_LRP(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_ADD, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, - inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c, static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); - struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); - struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); tempdst.WriteMask = RC_MASK_W; tempsrc.Swizzle = RC_SWIZZLE_WWWW; @@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c, static void transform_SEQ(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c, static void transform_SGE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c, static void transform_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c, static void transform_SLE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c, static void transform_SLT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SNE(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, - negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); + negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c, * CMP tmp1, x, 1, 0 * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0; + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit3(c, inst->Prev, RC_OPCODE_CMP, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, negate(inst->U.I.SrcReg[0]), builtin_one, builtin_zero); @@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c, static void transform_XPD(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); - emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), - negate(srcreg(RC_FILE_TEMPORARY, tempreg))); + negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); rc_remove_instruction(inst); } @@ -553,6 +592,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; @@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, { /* There is no decent CMP available, so let's rig one up. * CMP is defined as dst = src0 < 0.0 ? src1 : src2 - * The following sequence consumes two temps and two extra slots + * The following sequence consumes zero to two temps and two extra slots * (the second temp and the second slot is consumed by transform_LRP), * but should be equivalent: * @@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, * LRP dst, tmp0, src1, src2 * * Yes, I know, I'm a mad scientist. ~ C. & M. */ - int tempreg0 = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); /* SLT tmp0, src0, 0.0 */ emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstreg(RC_FILE_TEMPORARY, tempreg0), + dst, inst->U.I.SrcReg[0], builtin_zero); /* LRP dst, tmp0, src1, src2 */ transform_LRP(c, emit3(c, inst->Prev, RC_OPCODE_LRP, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); + srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); rc_remove_instruction(inst); } @@ -642,7 +682,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c, static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - int tempreg = rc_find_free_temporary(c); + struct rc_dst_register dst = try_to_reuse_dst(c, inst); unsigned constant_swizzle; int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 0.0000000000000000001, @@ -650,16 +690,16 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, /* MOV dst, src */ emit1(c, inst->Prev, RC_OPCODE_MOV, 0, - dstreg(RC_FILE_TEMPORARY, tempreg), + dst, inst->U.I.SrcReg[0]); /* MAX dst.z, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, - dstregtmpmask(tempreg, RC_MASK_Y), - srcreg(RC_FILE_TEMPORARY, tempreg), + dstregtmpmask(dst.Index, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, dst.Index), srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); - inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); } static void transform_r300_vertex_SEQ(struct radeon_compiler *c, @@ -743,12 +783,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, * SLT tmp1, x, 0; * ADD result, tmp0, -tmp1; */ - unsigned tmp0, tmp1; + struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); + unsigned tmp1; /* 0 < x */ - tmp0 = rc_find_free_temporary(c); + dst0 = try_to_reuse_dst(c, inst); emit2(c, inst->Prev, RC_OPCODE_SLT, 0, - dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + dst0, builtin_zero, inst->U.I.SrcReg[0]); @@ -763,7 +804,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c, /* result = tmp0 - tmp1 */ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, - srcreg(RC_FILE_TEMPORARY, tmp0), + srcreg(RC_FILE_TEMPORARY, dst0.Index), negate(srcreg(RC_FILE_TEMPORARY, tmp1))); rc_remove_instruction(inst); @@ -781,6 +822,7 @@ int r300_transform_vertex_alu( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 9dcd44c522d..45f79ece5ba 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){ return 0; } } + +#define RC_SOURCE_NONE 0x0 +#define RC_SOURCE_RGB 0x1 +#define RC_SOURCE_ALPHA 0x2 + #endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index a21fe8d3df8..5905d26e521 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -27,6 +27,9 @@ #include "radeon_program_pair.h" +#include "radeon_compiler_util.h" + +#include <stdlib.h> /** * Return the source slot where we installed the given register access, @@ -204,24 +207,37 @@ void rc_pair_foreach_source_that_rgb_reads( } } -/*return 0 for rgb, 1 for alpha -1 for error. */ - -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle) +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg) { - unsigned int chan; - unsigned int swz = RC_SWIZZLE_UNUSED; - unsigned int ret = RC_PAIR_SOURCE_NONE; - - for(chan = 0; chan < 3; chan++) { - swz = GET_SWZ(swizzle, chan); - if (swz == RC_SWIZZLE_W) { - ret |= RC_PAIR_SOURCE_ALPHA; - } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y - || swz == RC_SWIZZLE_Z) { - ret |= RC_PAIR_SOURCE_RGB; + unsigned int i, type; + unsigned int channels = 0; + + for(i = 0; i < 3; i++) { + if (arg == pair_inst->RGB.Arg + i) { + channels = 3; + break; } } - return ret; + + if (channels == 0) { + for (i = 0; i < 3; i++) { + if (arg == pair_inst->Alpha.Arg + i) { + channels = 1; + break; + } + } + } + + assert(channels > 0); + type = rc_source_type_swz(arg->Swizzle, channels); + + if (type & RC_SOURCE_RGB) { + return &pair_inst->RGB.Src[arg->Source]; + } else if (type & RC_SOURCE_ALPHA) { + return &pair_inst->Alpha.Src[arg->Source]; + } else { + return NULL; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 54d44a2098b..ccf7a0070cd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -55,10 +55,6 @@ struct radeon_compiler; */ #define RC_PAIR_PRESUB_SRC 3 -#define RC_PAIR_SOURCE_NONE 0x0 -#define RC_PAIR_SOURCE_RGB 0x1 -#define RC_PAIR_SOURCE_ALPHA 0x2 - struct rc_pair_instruction_source { unsigned int Used:1; unsigned int File:3; @@ -115,9 +111,9 @@ void rc_pair_foreach_source_that_rgb_reads( void * data, rc_pair_foreach_src_fn cb); -unsigned int rc_source_type_that_arg_reads( - unsigned int source, - unsigned int swizzle); +struct rc_pair_instruction_source * rc_pair_get_src( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_arg * arg); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index 618ab5a099b..ae13f6742f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz) case RC_SWIZZLE_HALF: return 'H'; case RC_SWIZZLE_UNUSED: return '_'; } + fprintf(stderr, "bad swz: %u\n", swz); return '?'; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 530afa5e08e..f9d9f34b6ad 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -28,6 +28,8 @@ #include "radeon_program_tex.h" +#include "radeon_compiler_util.h" + /* Series of transformations to be done on textures. */ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index 5f67f536f61..7d76585a593 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) rc_for_all_reads_src(inst, mark_used, &d); } - /* Pass 2: If there is relative addressing, mark all externals as used. */ - if (has_rel_addr) { + /* Pass 2: If there is relative addressing or dead constant elimination + * is disabled, mark all externals as used. */ + if (has_rel_addr || !c->remove_unused_constants) { for (unsigned i = 0; i < c->Program.Constants.Count; i++) if (constants[i].Type == RC_CONSTANT_EXTERNAL) const_used[i] = 1; @@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) /* is_identity ==> new_count == old_count * !is_identity ==> new_count < old_count */ assert( is_identity || new_count < c->Program.Constants.Count); - assert(!(has_rel_addr && are_externals_remapped)); + assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped)); /* Pass 4: Redirect reads of all constants to their new locations. */ if (!is_identity) { @@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) inst != &c->Program.Instructions; inst = inst->Next) { rc_remap_registers(inst, remap_regs, inv_remap_table); } - } /* Set the new constant count. Note that new_count may be less than diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 60e228be5bd..88165f78953 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -33,100 +33,51 @@ #include "radeon_compiler.h" #include "radeon_dataflow.h" - -struct reg_rename { - int old_index; - int new_index; - int temp_index; -}; - -static void rename_reg(void * data, struct rc_instruction * inst, - rc_register_file * file, unsigned int * index) -{ - struct reg_rename *r = data; - - if(r->old_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->new_index; - } - else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) { - *index = r->temp_index; - } -} - -static void rename_all( - struct radeon_compiler *c, - struct rc_instruction * start, - unsigned int old, - unsigned int new, - unsigned int temp) -{ - struct rc_instruction * inst; - struct reg_rename r; - r.old_index = old; - r.new_index = new; - r.temp_index = temp; - for(inst = start; inst != &c->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, rename_reg, &r); - } -} +#include "radeon_program.h" /** * This function renames registers in an attempt to get the code close to * SSA form. After this function has completed, most of the register are only - * written to one time, with a few exceptions. For example, this block of code - * will not be modified by this function: - * Mov Temp[0].x Const[0].x - * Mov Temp[0].y Const[0].y - * Basically, destination registers will be renamed if: - * 1. There have been no previous writes to that register - * or - * 2. If the instruction is writting to the exact components (no more, no less) - * of a register that has been written to by previous instructions. + * written to one time, with a few exceptions. * * This function assumes all the instructions are still of type * RC_INSTRUCTION_NORMAL. */ void rc_rename_regs(struct radeon_compiler *c, void *user) { - unsigned int cur_index = 0; - unsigned int icount; + unsigned int i, used_length; + int new_index; struct rc_instruction * inst; - unsigned int * masks; + struct rc_reader_data reader_data; + unsigned char * used; - /* The number of instructions in the program is also the maximum - * number of temp registers that could potentially be used. */ - icount = rc_recompute_ips(c); - masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int)); - memset(masks, 0, icount * sizeof(unsigned int)); + used_length = 2 * rc_recompute_ips(c); + used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); + memset(used, 0, sizeof(unsigned char) * used_length); + rc_get_used_temporaries(c, used, used_length); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info * info; - unsigned int old_index, temp_index; - struct rc_dst_register * dst; - if(inst->Type != RC_INSTRUCTION_NORMAL) { - rc_error(c, "%s only works with normal instructions.", - __FUNCTION__); - return; - } - dst = &inst->U.I.DstReg; - info = rc_get_opcode_info(inst->U.I.Opcode); - if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) { + + if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) continue; + + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + continue; + + new_index = rc_find_free_temporary_list(c, used, used_length, + RC_MASK_XYZW); + if (new_index < 0) { + rc_error(c, "Ran out of temporary registers\n"); + return; } - if(dst->Index >= icount || !masks[dst->Index] || - masks[dst->Index] == dst->WriteMask) { - old_index = dst->Index; - /* We need to set dst->Index here so get free temporary - * will work. */ - dst->Index = cur_index++; - temp_index = rc_find_free_temporary(c); - rename_all(c, inst->Next, old_index, - dst->Index, temp_index); + + reader_data.Writer->U.I.DstReg.Index = new_index; + for(i = 0; i < reader_data.ReaderCount; i++) { + reader_data.Readers[i].U.Src->Index = new_index; } - assert(dst->Index < icount); - masks[dst->Index] |= dst->WriteMask; } } diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c index 2c9e4e2b844..53dacbfdf39 100644 --- a/src/mesa/drivers/dri/r600/evergreen_chip.c +++ b/src/mesa/drivers/dri/r600/evergreen_chip.c @@ -286,7 +286,11 @@ static void evergreenSetupVTXConstants(struct gl_context * ctx, if (!paos->bo) return; - r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM)) + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); //uSQ_VTX_CONSTANT_WORD0_0 uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c index a77be183a12..076a608573c 100644 --- a/src/mesa/drivers/dri/r600/evergreen_state.c +++ b/src/mesa/drivers/dri/r600/evergreen_state.c @@ -1461,6 +1461,14 @@ static void evergreenInitSQConfig(struct gl_context * ctx) uMaxThreads = 248; uMaxStackEntries = 512; break; + case CHIP_FAMILY_PALM: + uSqNumCfInsts = 1; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 96; + uMaxThreads = 192; + uMaxStackEntries = 256; + break; default: uSqNumCfInsts = 2; bVC_ENABLE = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c index 2f4c92d6767..3b5448a0e4e 100644 --- a/src/mesa/drivers/dri/r600/evergreen_tex.c +++ b/src/mesa/drivers/dri/r600/evergreen_tex.c @@ -31,7 +31,6 @@ #include "main/enums.h" #include "main/image.h" #include "main/teximage.h" -#include "main/mipmap.h" #include "main/simple_list.h" #include "main/texobj.h" diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index b6443bf0c53..aa1891eac32 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -259,7 +259,7 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) - &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_HEMLOCK) ) + &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_PALM) ) { r700->bShaderUseMemConstant = GL_TRUE; } @@ -285,8 +285,13 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ - ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ + if (screen->chip_family >= CHIP_FAMILY_CEDAR) { + ctx->Const.MaxTextureLevels = 15; + ctx->Const.MaxTextureRectSize = 16384; + } else { + ctx->Const.MaxTextureLevels = 14; + ctx->Const.MaxTextureRectSize = 8192; + } ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2bf24096a0d..1fa559cec1a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3334,7 +3334,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGE; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + } pAsm->D.dst.op3 = 1; tmp = (-1); @@ -3416,8 +3423,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) checkop1(pAsm); tmp = gethelpr(pAsm); - - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -3457,7 +3470,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4742,7 +4762,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -4782,7 +4809,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_MULADD; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + } pAsm->D.dst.op3 = 1; setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); @@ -5010,7 +5044,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) GLuint tmp = gethelpr(pAsm); /* tmp = (src > 0 ? 1 : src) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -5033,7 +5074,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm) } /* dst = (-tmp > 0 ? -1 : tmp) */ - pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + if(8 == pAsm->unAsic) + { + pAsm->D.dst.opcode = EG_OP3_INST_CNDGT; + } + else + { + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT; + } pAsm->D.dst.op3 = 1; if( GL_FALSE == assemble_dst(pAsm) ) diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 61106fbc43f..82789cec5ed 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -440,6 +440,11 @@ #define PCI_CHIP_HEMLOCK_689C 0x689C #define PCI_CHIP_HEMLOCK_689D 0x689D +#define PCI_CHIP_PALM_9802 0x9802 +#define PCI_CHIP_PALM_9803 0x9803 +#define PCI_CHIP_PALM_9804 0x9804 +#define PCI_CHIP_PALM_9805 0x9805 + enum { CHIP_FAMILY_R100, CHIP_FAMILY_RV100, @@ -483,6 +488,7 @@ enum { CHIP_FAMILY_JUNIPER, CHIP_FAMILY_CYPRESS, CHIP_FAMILY_HEMLOCK, + CHIP_FAMILY_PALM, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index fecdd119059..ca6ab46ca43 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -99,6 +99,7 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_JUNIPER: return "JUNIPER"; case CHIP_FAMILY_CYPRESS: return "CYPRESS"; case CHIP_FAMILY_HEMLOCK: return "HEMLOCK"; + case CHIP_FAMILY_PALM: return "PALM"; default: return "unknown"; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h index 088f9701722..a68a9768779 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h @@ -49,7 +49,7 @@ struct _radeon_mipmap_level { }; /* store the max possible in the miptree */ -#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13 +#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 15 /** * A mipmap tree contains texture images in the layout that the hardware diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index b379240579d..94e56c2ade6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1155,6 +1155,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_PALM_9802: + case PCI_CHIP_PALM_9803: + case PCI_CHIP_PALM_9804: + case PCI_CHIP_PALM_9805: + screen->chip_family = CHIP_FAMILY_PALM; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", device_id); diff --git a/src/mesa/drivers/dri/sis/server/sis_dri.h b/src/mesa/drivers/dri/sis/server/sis_dri.h index f0171f3c0f8..7d8f507115d 100644 --- a/src/mesa/drivers/dri/sis/server/sis_dri.h +++ b/src/mesa/drivers/dri/sis/server/sis_dri.h @@ -72,13 +72,4 @@ typedef struct { int dummy; } SISDRIContextRec, *SISDRIContextPtr; -#ifdef XFree86Server - -#include "screenint.h" - -Bool SISDRIScreenInit(ScreenPtr pScreen); -void SISDRICloseScreen(ScreenPtr pScreen); -Bool SISDRIFinishScreenInit(ScreenPtr pScreen); - -#endif #endif diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.h b/src/mesa/drivers/dri/tdfx/tdfx_context.h index fb38419dcdd..7e2f0e00a8e 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_context.h +++ b/src/mesa/drivers/dri/tdfx/tdfx_context.h @@ -41,11 +41,7 @@ #include <sys/time.h> #include "dri_util.h" -#ifdef XFree86Server -#include "GL/xf86glx.h" -#else #include "main/glheader.h" -#endif #if defined(__linux__) #include <signal.h> #endif diff --git a/src/mesa/drivers/dri/unichrome/server/via_dri.h b/src/mesa/drivers/dri/unichrome/server/via_dri.h index b47397d5728..c6eed03c1c9 100644 --- a/src/mesa/drivers/dri/unichrome/server/via_dri.h +++ b/src/mesa/drivers/dri/unichrome/server/via_dri.h @@ -35,9 +35,7 @@ #define VIA_DRIDDX_VERSION_MINOR 0 #define VIA_DRIDDX_VERSION_PATCH 0 -#ifndef XFree86Server typedef int Bool; -#endif typedef struct { drm_handle_t handle; diff --git a/src/mesa/drivers/windows/gdi/InitCritSections.cpp b/src/mesa/drivers/windows/gdi/InitCritSections.cpp index 7145bffa510..69f03b8e47c 100644 --- a/src/mesa/drivers/windows/gdi/InitCritSections.cpp +++ b/src/mesa/drivers/windows/gdi/InitCritSections.cpp @@ -1,7 +1,8 @@ #include "glapi.h" #include "glThread.h" -#ifdef WIN32_THREADS +#ifdef WIN32 + extern "C" _glthread_Mutex OneTimeLock; extern "C" _glthread_Mutex GenTexturesLock; @@ -29,4 +30,4 @@ public: _CriticalSectionInit _CriticalSectionInit::m_inst; -#endif +#endif /* WIN32 */ diff --git a/src/mesa/drivers/x11/glxheader.h b/src/mesa/drivers/x11/glxheader.h index d88afba20e7..ee002191bc0 100644 --- a/src/mesa/drivers/x11/glxheader.h +++ b/src/mesa/drivers/x11/glxheader.h @@ -32,13 +32,6 @@ #include "main/glheader.h" -#ifdef XFree86Server - -# include "xorg-server.h" -# include "resource.h" -# include "windowstr.h" - -#else # include <X11/Xlib.h> # include <X11/Xlibint.h> @@ -51,7 +44,6 @@ # include <GL/glx.h> # include <sys/time.h> -#endif diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 00ceb960c62..b5eabadf486 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -158,14 +158,12 @@ static short hpcr_rgbTbl[3][256] = { /** * Return the host's byte order as LSBFirst or MSBFirst ala X. */ -#ifndef XFree86Server static int host_byte_order( void ) { int i = 1; char *cptr = (char *) &i; return (*cptr==1) ? LSBFirst : MSBFirst; } -#endif /** @@ -176,7 +174,7 @@ static int host_byte_order( void ) */ static int check_for_xshm( XMesaDisplay *display ) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) int major, minor, ignore; Bool pixmaps; @@ -227,16 +225,6 @@ gamma_adjust( GLfloat gamma, GLint value, GLint max ) static int bits_per_pixel( XMesaVisual xmv ) { -#ifdef XFree86Server - const int depth = xmv->nplanes; - int i; - assert(depth > 0); - for (i = 0; i < screenInfo.numPixmapFormats; i++) { - if (screenInfo.formats[i].depth == depth) - return screenInfo.formats[i].bitsPerPixel; - } - return depth; /* should never get here, but this should be safe */ -#else XMesaDisplay *dpy = xmv->display; XMesaVisualInfo visinfo = xmv->visinfo; XMesaImage *img; @@ -257,7 +245,6 @@ bits_per_pixel( XMesaVisual xmv ) img->data = NULL; XMesaDestroyImage( img ); return bitsPerPixel; -#endif } @@ -271,7 +258,6 @@ bits_per_pixel( XMesaVisual xmv ) * Return: GL_TRUE - window exists * GL_FALSE - window doesn't exist */ -#ifndef XFree86Server static GLboolean WindowExistsFlag; static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr ) @@ -306,7 +292,6 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, GLuint *width, GLuint *height *height = h; return stat; } -#endif /** @@ -319,10 +304,6 @@ void xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, GLuint *width, GLuint *height) { -#ifdef XFree86Server - *width = MIN2(b->frontxrb->drawable->width, MAX_WIDTH); - *height = MIN2(b->frontxrb->drawable->height, MAX_HEIGHT); -#else Status stat; _glthread_LOCK_MUTEX(_xmesa_lock); @@ -335,7 +316,6 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, _mesa_warning(NULL, "XGetGeometry failed!\n"); *width = *height = 1; } -#endif } @@ -549,16 +529,11 @@ noFaultXAllocColor( int client, XMesaColor *color, int *exact, int *alloced ) { -#ifdef XFree86Server - Pixel *ppixIn; - xrgb *ctable; -#else /* we'll try to cache ctable for better remote display performance */ static Display *prevDisplay = NULL; static XMesaColormap prevCmap = 0; static int prevCmapSize = 0; static XMesaColor *ctable = NULL; -#endif XMesaColor subColor; int i, bestmatch; double mindist; /* 3*2^16^2 exceeds long int precision. */ @@ -566,14 +541,7 @@ noFaultXAllocColor( int client, (void) client; /* First try just using XAllocColor. */ -#ifdef XFree86Server - if (AllocColor(cmap, - &color->red, &color->green, &color->blue, - &color->pixel, - client) == Success) -#else if (XAllocColor(dpy, cmap, color)) -#endif { *exact = 1; *alloced = 1; @@ -584,14 +552,6 @@ noFaultXAllocColor( int client, /* Retrieve color table entries. */ /* XXX alloca candidate. */ -#ifdef XFree86Server - ppixIn = (Pixel *) MALLOC(cmapSize * sizeof(Pixel)); - ctable = (xrgb *) MALLOC(cmapSize * sizeof(xrgb)); - for (i = 0; i < cmapSize; i++) { - ppixIn[i] = i; - } - QueryColors(cmap, cmapSize, ppixIn, ctable); -#else if (prevDisplay != dpy || prevCmap != cmap || prevCmapSize != cmapSize || !ctable) { /* free previously cached color table */ @@ -608,7 +568,6 @@ noFaultXAllocColor( int client, prevCmap = cmap; prevCmapSize = cmapSize; } -#endif /* Find best match. */ bestmatch = -1; @@ -632,14 +591,7 @@ noFaultXAllocColor( int client, * fail if the cell is read/write. Otherwise, we're incrementing * the cell's reference count. */ -#ifdef XFree86Server - if (AllocColor(cmap, - &subColor.red, &subColor.green, &subColor.blue, - &subColor.pixel, - client) == Success) { -#else if (XAllocColor(dpy, cmap, &subColor)) { -#endif *alloced = 1; } else { @@ -651,12 +603,7 @@ noFaultXAllocColor( int client, subColor.flags = DoRed | DoGreen | DoBlue; *alloced = 0; } -#ifdef XFree86Server - free(ppixIn); - free(ctable); -#else /* don't free table, save it for next time */ -#endif *color = subColor; *exact = 0; @@ -873,10 +820,8 @@ setup_8bit_hpcr(XMesaVisual v) v->hpcr_clear_pixmap = XMesaCreatePixmap(v->display, DefaultRootWindow(v->display), 16, 2, 8); -#ifndef XFree86Server v->hpcr_clear_ximage = XGetImage(v->display, v->hpcr_clear_pixmap, 0, 0, 16, 2, AllPlanes, ZPixmap); -#endif } } @@ -1049,9 +994,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, int client = 0; const int xclass = v->visualType; -#ifdef XFree86Server - client = (window) ? CLIENT_ID(window->id) : 0; -#endif ASSERT(!b || b->xm_visual == v); @@ -1120,40 +1062,23 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, } /* X11 graphics contexts */ -#ifdef XFree86Server - b->gc = CreateScratchGC(v->display, window->depth); -#else b->gc = XCreateGC( v->display, window, 0, NULL ); -#endif XMesaSetFunction( v->display, b->gc, GXcopy ); /* cleargc - for glClear() */ -#ifdef XFree86Server - b->cleargc = CreateScratchGC(v->display, window->depth); -#else b->cleargc = XCreateGC( v->display, window, 0, NULL ); -#endif XMesaSetFunction( v->display, b->cleargc, GXcopy ); /* * Don't generate Graphics Expose/NoExpose events in swapbuffers(). * Patch contributed by Michael Pichler May 15, 1995. */ -#ifdef XFree86Server - b->swapgc = CreateScratchGC(v->display, window->depth); - { - CARD32 v[1]; - v[0] = FALSE; - dixChangeGC(NullClient, b->swapgc, GCGraphicsExposures, v, NULL); - } -#else { XGCValues gcvalues; gcvalues.graphics_exposures = False; b->swapgc = XCreateGC(v->display, window, GCGraphicsExposures, &gcvalues); } -#endif XMesaSetFunction( v->display, b->swapgc, GXcopy ); /* * Set fill style and tile pixmap once for all for HPCR stuff @@ -1175,9 +1100,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, /* Initialize the row buffer XImage for use in write_color_span() */ data = (char*) MALLOC(MAX_WIDTH*4); -#ifdef XFree86Server - b->rowimage = XMesaCreateImage(GET_VISUAL_DEPTH(v), MAX_WIDTH, 1, data); -#else b->rowimage = XCreateImage( v->display, v->visinfo->visual, v->visinfo->depth, @@ -1186,7 +1108,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, MAX_WIDTH, 1, /*width, height*/ 32, /*bitmap_pad*/ 0 /*bytes_per_line*/ ); -#endif if (!b->rowimage) return GL_FALSE; } @@ -1334,7 +1255,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, XMesaVisual v; GLint red_bits, green_bits, blue_bits, alpha_bits; -#ifndef XFree86Server /* For debugging only */ if (_mesa_getenv("MESA_XSYNC")) { /* This makes debugging X easier. @@ -1343,7 +1263,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, */ XSynchronize( display, 1 ); } -#endif /* Color-index rendering not supported. */ if (!rgb_flag) @@ -1360,14 +1279,12 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, * the struct but we may need some of the information contained in it * at a later time. */ -#ifndef XFree86Server v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo)); if(!v->visinfo) { free(v); return NULL; } memcpy(v->visinfo, visinfo, sizeof(*visinfo)); -#endif /* check for MESA_GAMMA environment variable */ gamma = _mesa_getenv("MESA_GAMMA"); @@ -1384,30 +1301,13 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, v->ximage_flag = ximage_flag; -#ifdef XFree86Server - /* We could calculate these values by ourselves. nplanes is either the sum - * of the red, green, and blue bits or the number index bits. - * ColormapEntries is either (1U << index_bits) or - * (1U << max(redBits, greenBits, blueBits)). - */ - assert(visinfo->nplanes > 0); - v->nplanes = visinfo->nplanes; - v->ColormapEntries = visinfo->ColormapEntries; - - v->mesa_visual.redMask = visinfo->redMask; - v->mesa_visual.greenMask = visinfo->greenMask; - v->mesa_visual.blueMask = visinfo->blueMask; - v->visualID = visinfo->vid; - v->screen = 0; /* FIXME: What should be done here? */ -#else v->mesa_visual.redMask = visinfo->red_mask; v->mesa_visual.greenMask = visinfo->green_mask; v->mesa_visual.blueMask = visinfo->blue_mask; v->visualID = visinfo->visualid; v->screen = visinfo->screen; -#endif -#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus)) +#if !(defined(__cplusplus) || defined(c_plusplus)) v->visualType = xmesa_convert_from_x_visual_type(visinfo->class); #else v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); @@ -1461,9 +1361,7 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, PUBLIC void XMesaDestroyVisual( XMesaVisual v ) { -#ifndef XFree86Server free(v->visinfo); -#endif free(v); } @@ -1532,12 +1430,6 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) _mesa_enable_extension(mesaCtx, "GL_EXT_timer_query"); #endif -#ifdef XFree86Server - /* If we're running in the X server, do bounds checking to prevent - * segfaults and server crashes! - */ - mesaCtx->Const.CheckArrayBounds = GL_TRUE; -#endif /* finish up xmesa context initializations */ c->swapbytes = CHECK_BYTE_ORDER(v) ? GL_FALSE : GL_TRUE; @@ -1602,9 +1494,7 @@ void XMesaDestroyContext( XMesaContext c ) PUBLIC XMesaBuffer XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) { -#ifndef XFree86Server XWindowAttributes attr; -#endif XMesaBuffer b; XMesaColormap cmap; int depth; @@ -1613,12 +1503,8 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) assert(w); /* Check that window depth matches visual depth */ -#ifdef XFree86Server - depth = ((XMesaDrawable)w)->depth; -#else XGetWindowAttributes( v->display, w, &attr ); depth = attr.depth; -#endif if (GET_VISUAL_DEPTH(v) != depth) { _mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n", GET_VISUAL_DEPTH(v), depth); @@ -1626,9 +1512,6 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) } /* Find colormap */ -#ifdef XFree86Server - cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP); -#else if (attr.colormap) { cmap = attr.colormap; } @@ -1638,7 +1521,6 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) /* OK, let's just allocate a new one and hope for the best */ cmap = XCreateColormap(v->display, w, attr.visual, AllocNone); } -#endif b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap); if (!b) @@ -1748,7 +1630,6 @@ XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, unsigned int width, unsigned int height) { -#ifndef XFree86Server XMesaWindow root; XMesaDrawable drawable; /* X Pixmap Drawable */ XMesaBuffer b; @@ -1770,9 +1651,6 @@ XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, } return b; -#else - return 0; -#endif } @@ -1931,40 +1809,6 @@ XMesaBuffer XMesaGetCurrentReadBuffer( void ) } -#ifdef XFree86Server -PUBLIC -GLboolean XMesaForceCurrent(XMesaContext c) -{ - if (c) { - _glapi_set_dispatch(c->mesa.CurrentDispatch); - - if (&(c->mesa) != _mesa_get_current_context()) { - _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer); - } - } - else { - _mesa_make_current(NULL, NULL, NULL); - } - return GL_TRUE; -} - - -PUBLIC -GLboolean XMesaLoseCurrent(XMesaContext c) -{ - (void) c; - _mesa_make_current(NULL, NULL, NULL); - return GL_TRUE; -} - - -PUBLIC -GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask ) -{ - _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask); - return GL_TRUE; -} -#endif /* XFree86Server */ #ifndef FX @@ -2004,7 +1848,7 @@ void XMesaSwapBuffers( XMesaBuffer b ) #endif if (b->backxrb->ximage) { /* Copy Ximage (back buf) from client memory to server window */ -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { /*_glthread_LOCK_MUTEX(_xmesa_lock);*/ XShmPutImage( b->xm_visual->display, b->frontxrb->drawable, @@ -2041,9 +1885,7 @@ void XMesaSwapBuffers( XMesaBuffer b ) if (b->swAlpha) _mesa_copy_soft_alpha_renderbuffers(ctx, &b->mesa_buffer); } -#if !defined(XFree86Server) XSync( b->xm_visual->display, False ); -#endif } @@ -2074,7 +1916,7 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) #endif if (b->backxrb->ximage) { /* Copy Ximage from host's memory to server's window */ -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { /* XXX assuming width and height aren't too large! */ XShmPutImage( b->xm_visual->display, b->frontxrb->drawable, @@ -2116,7 +1958,6 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) * Return: GL_TRUE = context is double buffered * GL_FALSE = context is single buffered */ -#ifndef XFree86Server GLboolean XMesaGetBackBuffer( XMesaBuffer b, XMesaPixmap *pixmap, XMesaImage **ximage ) @@ -2134,7 +1975,6 @@ GLboolean XMesaGetBackBuffer( XMesaBuffer b, return GL_FALSE; } } -#endif /* XFree86Server */ /* @@ -2171,11 +2011,7 @@ GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height, void XMesaFlush( XMesaContext c ) { if (c && c->xm_visual) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else XSync( c->xm_visual->display, False ); -#endif } } @@ -2234,15 +2070,11 @@ void XMesaGarbageCollect( void ) for (b=XMesaBufferList; b; b=next) { next = b->Next; if (b->display && b->frontxrb->drawable && b->type == WINDOW) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else XSync(b->display, False); if (!window_exists( b->display, b->frontxrb->drawable )) { /* found a dead window, free the ancillary info */ XMesaDestroyBuffer( b ); } -#endif } } } diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c index 2683bd44d19..10829b4284f 100644 --- a/src/mesa/drivers/x11/xm_buffer.c +++ b/src/mesa/drivers/x11/xm_buffer.c @@ -37,7 +37,7 @@ #include "main/renderbuffer.h" -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) static volatile int mesaXErrorFlag = 0; /** @@ -170,7 +170,7 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) if (b->db_mode == BACK_XIMAGE) { /* Deallocate the old backxrb->ximage, if any */ if (b->backxrb->ximage) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { XShmDetach(b->xm_visual->display, &b->shminfo); XDestroyImage(b->backxrb->ximage); @@ -188,10 +188,6 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) /* Allocate new back buffer */ if (b->shm == 0 || !alloc_back_shm_ximage(b, width, height)) { /* Allocate a regular XImage for the back buffer. */ -#ifdef XFree86Server - b->backxrb->ximage = XMesaCreateImage(b->xm_visual->BitsPerPixel, - width, height, NULL); -#else b->backxrb->ximage = XCreateImage(b->xm_visual->display, b->xm_visual->visinfo->visual, GET_VISUAL_DEPTH(b->xm_visual), @@ -199,7 +195,6 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height) NULL, width, height, 8, 0); /* pad, bytes_per_line */ -#endif if (!b->backxrb->ximage) { _mesa_warning(NULL, "alloc_back_buffer: XCreateImage failed.\n"); return; @@ -359,16 +354,8 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb) if (b->num_alloced > 0) { /* If no other buffer uses this X colormap then free the colors. */ if (!xmesa_find_buffer(b->display, b->cmap, b)) { -#ifdef XFree86Server - int client = 0; - if (b->frontxrb->drawable) - client = CLIENT_ID(b->frontxrb->drawable->id); - (void)FreeColors(b->cmap, client, - b->num_alloced, b->alloced_colors, 0); -#else XFreeColors(b->display, b->cmap, b->alloced_colors, b->num_alloced, 0); -#endif } } @@ -382,7 +369,7 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb) if (fb->Visual.doubleBufferMode) { /* free back ximage/pixmap/shmregion */ if (b->backxrb->ximage) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) if (b->shm) { XShmDetach( b->display, &b->shminfo ); XDestroyImage( b->backxrb->ximage ); diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index acece2025cf..b8d9e20c426 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -93,16 +93,12 @@ const int xmesa_kernel1[16] = { static void finish_or_flush( struct gl_context *ctx ) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else const XMesaContext xmesa = XMESA_CONTEXT(ctx); if (xmesa) { _glthread_LOCK_MUTEX(_xmesa_lock); XSync( xmesa->display, False ); _glthread_UNLOCK_MUTEX(_xmesa_lock); } -#endif } @@ -388,7 +384,6 @@ clear_buffers(struct gl_context *ctx, GLbitfield buffers) } -#ifndef XFree86Server /* XXX these functions haven't been tested in the Xserver environment */ @@ -731,7 +726,6 @@ xmesa_CopyPixels( struct gl_context *ctx, } } -#endif /* XFree86Server */ @@ -745,17 +739,9 @@ get_string( struct gl_context *ctx, GLenum name ) (void) ctx; switch (name) { case GL_RENDERER: -#ifdef XFree86Server - return (const GLubyte *) "Mesa GLX Indirect"; -#else return (const GLubyte *) "Mesa X11"; -#endif case GL_VENDOR: -#ifdef XFree86Server - return (const GLubyte *) "Mesa project: www.mesa3d.org"; -#else return NULL; -#endif default: return NULL; } @@ -948,43 +934,6 @@ xmesa_update_state( struct gl_context *ctx, GLbitfield new_state ) /** - * Called via ctx->Driver.TestProxyTeximage(). Normally, we'd just use - * the _mesa_test_proxy_teximage() fallback function, but we're going to - * special-case the 3D texture case to allow textures up to 512x512x32 - * texels. - */ -static GLboolean -test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level, - GLint internalFormat, GLenum format, GLenum type, - GLint width, GLint height, GLint depth, GLint border) -{ - if (target == GL_PROXY_TEXTURE_3D) { - /* special case for 3D textures */ - if (width * height * depth > 512 * 512 * 64 || - width < 2 * border || - (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || - height < 2 * border || - (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(height - 2 * border) != 1) || - depth < 2 * border || - (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(depth - 2 * border) != 1)) { - /* Bad size, or too many texels */ - return GL_FALSE; - } - return GL_TRUE; - } - else { - /* use the fallback routine for 1D, 2D, cube and rect targets */ - return _mesa_test_proxy_teximage(ctx, target, level, internalFormat, - format, type, width, height, depth, - border); - } -} - - -/** * In SW, we don't really compress GL_COMPRESSED_RGB[A] textures! */ static gl_format @@ -1124,7 +1073,6 @@ xmesa_init_driver_functions( XMesaVisual xmvisual, } else { driver->Clear = clear_buffers; -#ifndef XFree86Server driver->CopyPixels = xmesa_CopyPixels; if (xmvisual->undithered_pf == PF_8R8G8B && xmvisual->dithered_pf == PF_8R8G8B && @@ -1134,9 +1082,8 @@ xmesa_init_driver_functions( XMesaVisual xmvisual, else if (xmvisual->undithered_pf == PF_5R6G5B) { driver->DrawPixels = xmesa_DrawPixels_5R6G5B; } -#endif } - driver->TestProxyTexImage = test_proxy_teximage; + #if ENABLE_EXT_texure_compression_s3tc driver->ChooseTextureFormat = choose_tex_format; #else diff --git a/src/mesa/drivers/x11/xm_glide.c b/src/mesa/drivers/x11/xm_glide.c index cbd69b011a1..d8a0e6de6d0 100644 --- a/src/mesa/drivers/x11/xm_glide.c +++ b/src/mesa/drivers/x11/xm_glide.c @@ -140,16 +140,8 @@ static void FXgetImage( XMesaBuffer b ) GLuint x, y; GLuint width, height; -#ifdef XFree86Server - x = b->frontxrb->pixmap->x; - y = b->frontxrb->pixmap->y; - width = b->frontxrb->pixmap->width; - height = b->frontxrb->pixmap->height; - depth = b->frontxrb->pixmap->depth; -#else xmesa_get_window_size(b->display, b, &width, &height); x = y = 0; -#endif if (b->mesa_buffer.Width != width || b->mesa_buffer.Height != height) { b->mesa_buffer.Width = MIN2((int)width, b->FXctx->width); b->mesa_buffer.Height = MIN2((int)height, b->FXctx->height); diff --git a/src/mesa/drivers/x11/xm_image.c b/src/mesa/drivers/x11/xm_image.c index 087b4e4c3a7..12fef7dad34 100644 --- a/src/mesa/drivers/x11/xm_image.c +++ b/src/mesa/drivers/x11/xm_image.c @@ -37,97 +37,3 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "glxheader.h" #include "xmesaP.h" -#ifdef XFree86Server - -#ifdef ROUNDUP -#undef ROUNDUP -#endif - -#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3)) - -XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data) -{ - XMesaImage *image; - - image = (XMesaImage *)xalloc(sizeof(XMesaImage)); - - if (image) { - image->width = width; - image->height = height; - image->data = data; - /* Always pad to 32 bits */ - image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32); - image->bits_per_pixel = bitsPerPixel; - } - - return image; -} - -void XMesaDestroyImage(XMesaImage *image) -{ - if (image->data) - free(image->data); - xfree(image); -} - -unsigned long XMesaGetPixel(XMesaImage *image, int x, int y) -{ - CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); - CARD8 *i8; - CARD16 *i16; - CARD32 *i32; - switch (image->bits_per_pixel) { - case 8: - i8 = (CARD8 *)row; - return i8[x]; - break; - case 15: - case 16: - i16 = (CARD16 *)row; - return i16[x]; - break; - case 24: /* WARNING: architecture specific code */ - i8 = (CARD8 *)row; - return (((CARD32)i8[x*3]) | - (((CARD32)i8[x*3+1])<<8) | - (((CARD32)i8[x*3+2])<<16)); - break; - case 32: - i32 = (CARD32 *)row; - return i32[x]; - break; - } - return 0; -} - -#ifndef XMESA_USE_PUTPIXEL_MACRO -void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel) -{ - CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); - CARD8 *i8; - CARD16 *i16; - CARD32 *i32; - switch (image->bits_per_pixel) { - case 8: - i8 = (CARD8 *)row; - i8[x] = (CARD8)pixel; - break; - case 15: - case 16: - i16 = (CARD16 *)row; - i16[x] = (CARD16)pixel; - break; - case 24: /* WARNING: architecture specific code */ - i8 = (CARD8 *)__row; - i8[x*3] = (CARD8)(p); - i8[x*3+1] = (CARD8)(p>>8); - i8[x*3+2] = (CARD8)(p>>16); - case 32: - i32 = (CARD32 *)row; - i32[x] = (CARD32)pixel; - break; - } -} -#endif - -#endif /* XFree86Server */ diff --git a/src/mesa/drivers/x11/xm_line.c b/src/mesa/drivers/x11/xm_line.c index f03f99f918f..04cedcd4ec0 100644 --- a/src/mesa/drivers/x11/xm_line.c +++ b/src/mesa/drivers/x11/xm_line.c @@ -537,7 +537,6 @@ void xmesa_choose_point( struct gl_context *ctx ) -#ifndef XFree86Server /** * Draw fast, XOR line with XDrawLine in front color buffer. * WARNING: this isn't fully OpenGL conformant because different pixels @@ -567,7 +566,6 @@ xor_line(struct gl_context *ctx, const SWvertex *vert0, const SWvertex *vert1) XDrawLine(dpy, xrb->pixmap, gc, x0, y0, x1, y1); XMesaSetFunction(dpy, gc, GXcopy); /* this gc is used elsewhere */ } -#endif /* XFree86Server */ #endif /* CHAN_BITS == 8 */ @@ -660,7 +658,6 @@ get_line_func(struct gl_context *ctx) } } -#ifndef XFree86Server if (ctx->DrawBuffer->_NumColorDrawBuffers == 1 && ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT && swrast->_RasterMask == LOGIC_OP_BIT @@ -669,7 +666,6 @@ get_line_func(struct gl_context *ctx) && !ctx->Line.SmoothFlag) { return xor_line; } -#endif /* XFree86Server */ #endif /* CHAN_BITS == 8 */ return (swrast_line_func) NULL; diff --git a/src/mesa/drivers/x11/xm_span.c b/src/mesa/drivers/x11/xm_span.c index ab66c5e1f12..294b93a57cc 100644 --- a/src/mesa/drivers/x11/xm_span.c +++ b/src/mesa/drivers/x11/xm_span.c @@ -42,7 +42,6 @@ * generate BadMatch errors if the drawable isn't mapped. */ -#ifndef XFree86Server static int caught_xgetimage_error = 0; static int (*old_xerror_handler)( XMesaDisplay *dpy, XErrorEvent *ev ); static unsigned long xgetimage_serial; @@ -87,7 +86,6 @@ static int check_xgetimage_errors( void ) /* return 0=no error, 1=error caught */ return caught_xgetimage_error; } -#endif /* @@ -97,7 +95,6 @@ static unsigned long read_pixel( XMesaDisplay *dpy, XMesaDrawable d, int x, int y ) { unsigned long p; -#ifndef XFree86Server XMesaImage *pixel = NULL; int error; @@ -113,9 +110,6 @@ static unsigned long read_pixel( XMesaDisplay *dpy, if (pixel) { XMesaDestroyImage( pixel ); } -#else - (*dpy->GetImage)(d, x, y, 1, 1, ZPixmap, ~0L, (pointer)&p); -#endif return p; } @@ -3763,7 +3757,6 @@ static void put_values_ci_ximage( PUT_VALUES_ARGS ) /***** Pixel reading *****/ /**********************************************************************/ -#ifndef XFree86Server /** * Do clip testing prior to calling XGetImage. If any of the region lies * outside the screen's bounds, XGetImage will return NULL. @@ -3806,7 +3799,6 @@ clip_for_xgetimage(struct gl_context *ctx, XMesaPixmap pixmap, GLuint *n, GLint } return 0; } -#endif /* @@ -3824,7 +3816,6 @@ get_row_ci(struct gl_context *ctx, struct gl_renderbuffer *rb, y = YFLIP(xrb, y); if (xrb->pixmap) { -#ifndef XFree86Server XMesaImage *span = NULL; int error; int k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y); @@ -3850,11 +3841,6 @@ get_row_ci(struct gl_context *ctx, struct gl_renderbuffer *rb, if (span) { XMesaDestroyImage( span ); } -#else - (*xmesa->display->GetImage)(xrb->drawable, - x, y, n, 1, ZPixmap, - ~0L, (pointer)index); -#endif } else if (xrb->ximage) { XMesaImage *img = xrb->ximage; @@ -3882,14 +3868,6 @@ get_row_rgba(struct gl_context *ctx, struct gl_renderbuffer *rb, /* Read from Pixmap or Window */ XMesaImage *span = NULL; int error; -#ifdef XFree86Server - span = XMesaCreateImage(xmesa->xm_visual->BitsPerPixel, n, 1, NULL); - span->data = (char *)MALLOC(span->height * span->bytes_per_line); - error = (!span->data); - (*xmesa->display->GetImage)(xrb->drawable, - x, YFLIP(xrb, y), n, 1, ZPixmap, - ~0L, (pointer)span->data); -#else int k; y = YFLIP(xrb, y); k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y); @@ -3900,7 +3878,6 @@ get_row_rgba(struct gl_context *ctx, struct gl_renderbuffer *rb, span = XGetImage( xmesa->display, xrb->pixmap, x, y, n, 1, AllPlanes, ZPixmap ); error = check_xgetimage_errors(); -#endif if (span && !error) { switch (xmesa->pixelformat) { case PF_Truecolor: diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h index f63626a9702..98737fab248 100644 --- a/src/mesa/drivers/x11/xmesa.h +++ b/src/mesa/drivers/x11/xmesa.h @@ -72,13 +72,9 @@ and create a window, you must do the following to use the X/Mesa interface: extern "C" { #endif -#ifdef XFree86Server -#include "xmesa_xf86.h" -#else #include <X11/Xlib.h> #include <X11/Xutil.h> #include "xmesa_x.h" -#endif #include "GL/gl.h" #ifdef AMIWIN @@ -180,19 +176,6 @@ extern XMesaContext XMesaCreateContext( XMesaVisual v, extern void XMesaDestroyContext( XMesaContext c ); -#ifdef XFree86Server -/* - * These are the extra routines required for integration with XFree86. - * None of these routines should be user visible. -KEM - */ -extern GLboolean XMesaForceCurrent( XMesaContext c ); - -extern GLboolean XMesaLoseCurrent( XMesaContext c ); - -extern GLboolean XMesaCopyContext( XMesaContext src, - XMesaContext dst, - GLuint mask ); -#endif /* XFree86Server */ /* diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h index 5d34b430cb6..63e3e211bf6 100644 --- a/src/mesa/drivers/x11/xmesaP.h +++ b/src/mesa/drivers/x11/xmesaP.h @@ -33,9 +33,6 @@ #include "fxmesa.h" #include "xm_glide.h" #endif -#ifdef XFree86Server -#include "xm_image.h" -#endif extern _glthread_Mutex _xmesa_lock; @@ -88,13 +85,8 @@ struct xmesa_visual { XMesaDisplay *display; /* The X11 display */ int screen, visualID; int visualType; -#ifdef XFree86Server - GLint ColormapEntries; - GLint nplanes; -#else XMesaVisualInfo visinfo; /* X's visual info (pointer to private copy) */ XVisualInfo *vishandle; /* Only used in fakeglx.c */ -#endif GLint BitsPerPixel; /* True bits per pixel for XImages */ GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */ @@ -233,7 +225,7 @@ struct xmesa_buffer { /* 0 = not available */ /* 1 = XImage support available */ /* 2 = Pixmap support available too */ -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) XShmSegmentInfo shminfo; #endif @@ -259,11 +251,7 @@ struct xmesa_buffer { /* Used to do XAllocColor/XFreeColors accounting: */ int num_alloced; -#if defined(XFree86Server) - Pixel alloced_colors[256]; -#else unsigned long alloced_colors[256]; -#endif #if defined( FX ) /* For 3Dfx Glide only */ @@ -578,9 +566,7 @@ extern void xmesa_register_swrast_functions( struct gl_context *ctx ); #define ENABLE_EXT_texure_compression_s3tc 0 /* SW texture compression */ -#ifdef XFree86Server -#define ENABLE_EXT_timer_query 0 -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define ENABLE_EXT_timer_query 1 /* should have 64-bit GLuint64EXT */ #else #define ENABLE_EXT_timer_query 0 /* may not have 64-bit GLuint64EXT */ diff --git a/src/mesa/main/APIspec.xml b/src/mesa/main/APIspec.xml index 4dc0b0d4851..16d0c9413d0 100644 --- a/src/mesa/main/APIspec.xml +++ b/src/mesa/main/APIspec.xml @@ -3536,7 +3536,7 @@ <api name="mesa" implementation="true"> <category name="MESA"/> - <function name="Color4f" default_prefix="_vbo_" template="Color" gltype="GLfloat" vector_size="4" expand_vector="true"/> + <function name="Color4f" default_prefix="_es_" template="Color" gltype="GLfloat" vector_size="4" expand_vector="true"/> <function name="ClipPlane" template="ClipPlane" gltype="GLdouble"/> <function name="CullFace" template="CullFace"/> @@ -3554,8 +3554,8 @@ <function name="LineWidth" template="LineWidth" gltype="GLfloat"/> - <function name="Materialf" default_prefix="_vbo_" template="Material" gltype="GLfloat" expand_vector="true"/> - <function name="Materialfv" default_prefix="_vbo_" template="Material" gltype="GLfloat"/> + <function name="Materialf" default_prefix="_es_" template="Material" gltype="GLfloat" expand_vector="true"/> + <function name="Materialfv" default_prefix="_es_" template="Material" gltype="GLfloat"/> <function name="PointSize" template="PointSize" gltype="GLfloat"/> <function name="PointSizePointer" template="PointSizePointer"/> @@ -3650,7 +3650,7 @@ <function name="EnableClientState" template="EnableClientState"/> <function name="GetPointerv" template="GetPointer"/> - <function name="Normal3f" default_prefix="_vbo_" template="Normal" gltype="GLfloat" expand_vector="true"/> + <function name="Normal3f" default_prefix="_es_" template="Normal" gltype="GLfloat" expand_vector="true"/> <function name="NormalPointer" template="NormalPointer"/> <function name="TexCoordPointer" template="TexCoordPointer"/> <function name="VertexPointer" template="VertexPointer"/> @@ -3679,7 +3679,7 @@ <function name="ActiveTextureARB" template="ActiveTexture"/> <function name="ClientActiveTextureARB" template="ClientActiveTexture"/> - <function name="MultiTexCoord4f" default_prefix="_vbo_" template="MultiTexCoord" gltype="GLfloat" vector_size="4" expand_vector="true"/> + <function name="MultiTexCoord4f" default_prefix="_es_" template="MultiTexCoord" gltype="GLfloat" vector_size="4" expand_vector="true"/> <function name="SampleCoverageARB" template="SampleCoverage" gltype="GLclampf"/> @@ -3691,14 +3691,14 @@ <function name="PointParameterf" template="PointParameter" gltype="GLfloat" expand_vector="true"/> <function name="PointParameterfv" template="PointParameter" gltype="GLfloat"/> - <function name="VertexAttrib1f" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="1" expand_vector="true"/> - <function name="VertexAttrib2f" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="2" expand_vector="true"/> - <function name="VertexAttrib3f" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="3" expand_vector="true"/> - <function name="VertexAttrib4f" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="4" expand_vector="true"/> - <function name="VertexAttrib1fv" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="1"/> - <function name="VertexAttrib2fv" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="2"/> - <function name="VertexAttrib3fv" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="3"/> - <function name="VertexAttrib4fv" default_prefix="_vbo_" template="VertexAttrib" gltype="GLfloat" vector_size="4"/> + <function name="VertexAttrib1f" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="1" expand_vector="true"/> + <function name="VertexAttrib2f" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="2" expand_vector="true"/> + <function name="VertexAttrib3f" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="3" expand_vector="true"/> + <function name="VertexAttrib4f" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="4" expand_vector="true"/> + <function name="VertexAttrib1fv" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="1"/> + <function name="VertexAttrib2fv" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="2"/> + <function name="VertexAttrib3fv" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="3"/> + <function name="VertexAttrib4fv" default_prefix="_es_" template="VertexAttrib" gltype="GLfloat" vector_size="4"/> <function name="VertexAttribPointerARB" template="VertexAttribPointer"/> <function name="EnableVertexAttribArrayARB" template="EnableVertexAttribArray"/> @@ -3850,7 +3850,7 @@ <category name="OES_matrix_palette"/> - <function name="Color4f" template="Color" gltype="GLfloat" vector_size="4" expand_vector="true"/> + <function name="Color4f" external="true" template="Color" gltype="GLfloat" vector_size="4" expand_vector="true"/> <function name="Color4ub" template="Color" gltype="GLubyte" vector_size="4" expand_vector="true"/> <function name="Color4x" template="Color" gltype="GLfixed" vector_size="4" expand_vector="true"/> @@ -3880,8 +3880,8 @@ <function name="LineWidth" template="LineWidth" gltype="GLfloat"/> <function name="LineWidthx" template="LineWidth" gltype="GLfixed"/> - <function name="Materialf" template="Material" gltype="GLfloat" expand_vector="true"/> - <function name="Materialfv" template="Material" gltype="GLfloat"/> + <function name="Materialf" external="true" template="Material" gltype="GLfloat" expand_vector="true"/> + <function name="Materialfv" external="true" template="Material" gltype="GLfloat"/> <function name="Materialx" template="Material" gltype="GLfixed" expand_vector="true"/> <function name="Materialxv" template="Material" gltype="GLfixed"/> @@ -4012,7 +4012,7 @@ <function name="GetPointerv" template="GetPointer"/> - <function name="Normal3f" template="Normal" gltype="GLfloat" expand_vector="true"/> + <function name="Normal3f" external="true" template="Normal" gltype="GLfloat" expand_vector="true"/> <function name="Normal3x" template="Normal" gltype="GLfixed" expand_vector="true"/> <function name="NormalPointer" template="NormalPointer"/> <function name="TexCoordPointer" template="TexCoordPointer"/> @@ -4039,7 +4039,7 @@ <function name="ActiveTexture" template="ActiveTexture"/> <function name="ClientActiveTexture" template="ClientActiveTexture"/> - <function name="MultiTexCoord4f" template="MultiTexCoord" gltype="GLfloat" vector_size="4" expand_vector="true"/> + <function name="MultiTexCoord4f" external="true" template="MultiTexCoord" gltype="GLfloat" vector_size="4" expand_vector="true"/> <function name="SampleCoverage" template="SampleCoverage" gltype="GLclampf"/> <function name="SampleCoveragex" template="SampleCoverage" gltype="GLclampx"/> @@ -4227,14 +4227,14 @@ <function name="BlendFuncSeparate" template="BlendFuncSeparate"/> - <function name="VertexAttrib1f" template="VertexAttrib" gltype="GLfloat" vector_size="1" expand_vector="true"/> - <function name="VertexAttrib2f" template="VertexAttrib" gltype="GLfloat" vector_size="2" expand_vector="true"/> - <function name="VertexAttrib3f" template="VertexAttrib" gltype="GLfloat" vector_size="3" expand_vector="true"/> - <function name="VertexAttrib4f" template="VertexAttrib" gltype="GLfloat" vector_size="4" expand_vector="true"/> - <function name="VertexAttrib1fv" template="VertexAttrib" gltype="GLfloat" vector_size="1"/> - <function name="VertexAttrib2fv" template="VertexAttrib" gltype="GLfloat" vector_size="2"/> - <function name="VertexAttrib3fv" template="VertexAttrib" gltype="GLfloat" vector_size="3"/> - <function name="VertexAttrib4fv" template="VertexAttrib" gltype="GLfloat" vector_size="4"/> + <function name="VertexAttrib1f" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="1" expand_vector="true"/> + <function name="VertexAttrib2f" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="2" expand_vector="true"/> + <function name="VertexAttrib3f" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="3" expand_vector="true"/> + <function name="VertexAttrib4f" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="4" expand_vector="true"/> + <function name="VertexAttrib1fv" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="1"/> + <function name="VertexAttrib2fv" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="2"/> + <function name="VertexAttrib3fv" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="3"/> + <function name="VertexAttrib4fv" external="true" template="VertexAttrib" gltype="GLfloat" vector_size="4"/> <function name="VertexAttribPointer" template="VertexAttribPointer"/> diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 800eb839005..5557a3b5cb5 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -358,6 +358,10 @@ static INLINE GLuint CPU_TO_LE32(GLuint x) #define M_E (2.7182818284590452354) #endif +#ifndef M_LOG2E +#define M_LOG2E (1.4426950408889634074) +#endif + #ifndef ONE_DIV_LN2 #define ONE_DIV_LN2 (1.442695040888963456) #endif diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 0f2d1a8f8da..fffb1a7d2ec 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -97,17 +97,20 @@ /** Max texture palette / color table size */ #define MAX_COLOR_TABLE_SIZE 256 +/** Max memory to allow for a single texture image (in megabytes) */ +#define MAX_TEXTURE_MBYTES 1024 + /** Number of 1D/2D texture mipmap levels */ -#define MAX_TEXTURE_LEVELS 13 +#define MAX_TEXTURE_LEVELS 15 /** Number of 3D texture mipmap levels */ -#define MAX_3D_TEXTURE_LEVELS 9 +#define MAX_3D_TEXTURE_LEVELS 15 /** Number of cube texture mipmap levels - GL_ARB_texture_cube_map */ -#define MAX_CUBE_TEXTURE_LEVELS 13 +#define MAX_CUBE_TEXTURE_LEVELS 15 /** Maximum rectangular texture size - GL_NV_texture_rectangle */ -#define MAX_TEXTURE_RECT_SIZE 4096 +#define MAX_TEXTURE_RECT_SIZE 16384 /** Maximum number of layers in a 1D or 2D array texture - GL_MESA_texture_array */ #define MAX_ARRAY_TEXTURE_LAYERS 64 @@ -140,11 +143,28 @@ */ #ifndef MAX_WIDTH -# define MAX_WIDTH 4096 +# define MAX_WIDTH 16384 #endif /** Maximum viewport/image height */ #ifndef MAX_HEIGHT -# define MAX_HEIGHT 4096 +# define MAX_HEIGHT 16384 +#endif + +/* XXX: hack to prevent stack overflow on windows until all temporary arrays + * [MAX_WIDTH] are allocated from the heap */ +#ifdef WIN32 +#undef MAX_TEXTURE_LEVELS +#undef MAX_3D_TEXTURE_LEVELS +#undef MAX_CUBE_TEXTURE_LEVELS +#undef MAX_TEXTURE_RECT_SIZE +#undef MAX_WIDTH +#undef MAX_HEIGHT +#define MAX_TEXTURE_LEVELS 13 +#define MAX_3D_TEXTURE_LEVELS 9 +#define MAX_CUBE_TEXTURE_LEVELS 13 +#define MAX_TEXTURE_RECT_SIZE 4096 +#define MAX_WIDTH 4096 +#define MAX_HEIGHT 4096 #endif /** Maxmimum size for CVA. May be overridden by the drivers. */ @@ -168,7 +188,7 @@ #define MAX_TEXTURE_MAX_ANISOTROPY 16.0 /** For GL_EXT_texture_lod_bias (typically MAX_TEXTURE_LEVELS - 1) */ -#define MAX_TEXTURE_LOD_BIAS 12.0 +#define MAX_TEXTURE_LOD_BIAS 14.0 /** For any program target/extension */ /*@{*/ diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index b132030b9b1..f42a566c302 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -535,6 +535,7 @@ _mesa_init_constants(struct gl_context *ctx) assert(ctx); /* Constants, may be overriden (usually only reduced) by device drivers */ + ctx->Const.MaxTextureMbytes = MAX_TEXTURE_MBYTES; ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; ctx->Const.Max3DTextureLevels = MAX_3D_TEXTURE_LEVELS; ctx->Const.MaxCubeTextureLevels = MAX_CUBE_TEXTURE_LEVELS; @@ -1399,6 +1400,8 @@ _mesa_make_current( struct gl_context *newCtx, struct gl_framebuffer *drawBuffer, struct gl_framebuffer *readBuffer ) { + GET_CURRENT_CONTEXT(curCtx); + if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(newCtx, "_mesa_make_current()\n"); @@ -1419,6 +1422,11 @@ _mesa_make_current( struct gl_context *newCtx, } } + if (curCtx && + (curCtx->WinSysDrawBuffer || curCtx->WinSysReadBuffer) && /* make sure this context is valid for flushing */ + curCtx != newCtx) + _mesa_flush(curCtx); + /* We used to call _glapi_check_multithread() here. Now do it in drivers */ _glapi_set_context((void *) newCtx); ASSERT(_mesa_get_current_context() == newCtx); @@ -1822,7 +1830,7 @@ _mesa_valid_to_render(struct gl_context *ctx, const char *where) #ifdef DEBUG if (ctx->Shader.Flags & GLSL_LOG) { struct gl_shader_program *shProg[MESA_SHADER_TYPES]; - unsigned i; + gl_shader_type i; shProg[MESA_SHADER_VERTEX] = ctx->Shader.CurrentVertexProgram; shProg[MESA_SHADER_GEOMETRY] = ctx->Shader.CurrentGeometryProgram; diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 3d5830c01cc..b71afdd61f3 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -873,8 +873,12 @@ make_extension_string_es2(const struct gl_context *ctx, GLubyte *str) if (ctx->Extensions.ARB_vertex_buffer_object) len += append_extension(&str, "GL_OES_mapbuffer"); +#if 0 + /* disabled because of missing GLSL support */ if (ctx->Extensions.EXT_texture3D) len += append_extension(&str, "GL_OES_texture_3D"); +#endif + if (ctx->Extensions.ARB_texture_non_power_of_two) len += append_extension(&str, "GL_OES_texture_npot"); if (ctx->Extensions.EXT_texture_filter_anisotropic) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 7c3357043fa..975063d0d78 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2014,7 +2014,7 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, switch (pname) { case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT: - *params = att->Type; + *params = buffer->Name == 0 ? GL_FRAMEBUFFER_DEFAULT : att->Type; return; case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT: if (att->Type == GL_RENDERBUFFER_EXT) { @@ -2024,8 +2024,8 @@ _mesa_GetFramebufferAttachmentParameterivEXT(GLenum target, GLenum attachment, *params = att->Texture->Name; } else { - _mesa_error(ctx, GL_INVALID_ENUM, - "glGetFramebufferAttachmentParameterivEXT(pname)"); + assert(att->Type == GL_NONE); + *params = 0; } return; case GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT: @@ -2146,6 +2146,7 @@ _mesa_GenerateMipmapEXT(GLenum target) /* OK, legal value */ break; default: + /* XXX need to implement GL_TEXTURE_1D_ARRAY and GL_TEXTURE_2D_ARRAY */ _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target)"); return; } @@ -2157,6 +2158,13 @@ _mesa_GenerateMipmapEXT(GLenum target) return; } + if (texObj->Target == GL_TEXTURE_CUBE_MAP && + !_mesa_cube_complete(texObj)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glGenerateMipmap(incomplete cube map)"); + return; + } + _mesa_lock_texture(ctx, texObj); if (target == GL_TEXTURE_CUBE_MAP) { GLuint face; diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 88a04e888e4..cd9eb81852f 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -1057,11 +1057,12 @@ _mesa_format_image_size(gl_format format, GLsizei width, const struct gl_format_info *info = _mesa_get_format_info(format); /* Strictly speaking, a conditional isn't needed here */ if (info->BlockWidth > 1 || info->BlockHeight > 1) { - /* compressed format */ + /* compressed format (2D only for now) */ const GLuint bw = info->BlockWidth, bh = info->BlockHeight; const GLuint wblocks = (width + bw - 1) / bw; const GLuint hblocks = (height + bh - 1) / bh; const GLuint sz = wblocks * hblocks * info->BytesPerBlock; + assert(depth == 1); return sz; } else { @@ -1072,6 +1073,36 @@ _mesa_format_image_size(gl_format format, GLsizei width, } +/** + * Same as _mesa_format_image_size() but returns a 64-bit value to + * accomodate very large textures. + */ +uint64_t +_mesa_format_image_size64(gl_format format, GLsizei width, + GLsizei height, GLsizei depth) +{ + const struct gl_format_info *info = _mesa_get_format_info(format); + /* Strictly speaking, a conditional isn't needed here */ + if (info->BlockWidth > 1 || info->BlockHeight > 1) { + /* compressed format (2D only for now) */ + const uint64_t bw = info->BlockWidth, bh = info->BlockHeight; + const uint64_t wblocks = (width + bw - 1) / bw; + const uint64_t hblocks = (height + bh - 1) / bh; + const uint64_t sz = wblocks * hblocks * info->BytesPerBlock; + assert(depth == 1); + return sz; + } + else { + /* non-compressed */ + const uint64_t sz = ((uint64_t) width * + (uint64_t) height * + (uint64_t) depth * + info->BytesPerBlock); + return sz; + } +} + + GLint _mesa_format_row_stride(gl_format format, GLsizei width) diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index eeb460dabe7..997229bf9f4 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -209,6 +209,10 @@ extern GLuint _mesa_format_image_size(gl_format format, GLsizei width, GLsizei height, GLsizei depth); +extern uint64_t +_mesa_format_image_size64(gl_format format, GLsizei width, + GLsizei height, GLsizei depth); + extern GLint _mesa_format_row_stride(gl_format format, GLsizei width); diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index b54af6ee86b..5ae35b868e3 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -314,6 +314,7 @@ EXTRA_EXT2(ARB_vertex_program, NV_vertex_program); EXTRA_EXT2(ARB_vertex_program, ARB_fragment_program); EXTRA_EXT(ARB_vertex_buffer_object); EXTRA_EXT(ARB_geometry_shader4); +EXTRA_EXT(ARB_copy_buffer); static const int extra_ARB_vertex_program_ARB_fragment_program_NV_vertex_program[] = { @@ -469,6 +470,10 @@ static const struct value_desc values[] = { { GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB, LOC_CUSTOM, TYPE_INT, 0, extra_ARB_vertex_buffer_object }, + /* GL_ARB_copy_buffer */ + { GL_COPY_READ_BUFFER, LOC_CUSTOM, TYPE_INT, 0, extra_ARB_copy_buffer }, + { GL_COPY_WRITE_BUFFER, LOC_CUSTOM, TYPE_INT, 0, extra_ARB_copy_buffer }, + /* GL_OES_read_format */ { GL_IMPLEMENTATION_COLOR_READ_TYPE_OES, LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers_OES_read_format }, @@ -700,12 +705,9 @@ static const struct value_desc values[] = { #if FEATURE_ES2 /* Enums unique to OpenGL ES 2.0 */ { 0, 0, TYPE_API_MASK, API_OPENGLES2_BIT, NO_EXTRA }, - { GL_MAX_FRAGMENT_UNIFORM_VECTORS, LOC_CUSTOM, TYPE_INT, - offsetof(struct gl_context, Const.FragmentProgram.MaxUniformComponents), NO_EXTRA }, - { GL_MAX_VARYING_VECTORS, LOC_CUSTOM, TYPE_INT, - offsetof(struct gl_context, Const.MaxVarying), NO_EXTRA }, - { GL_MAX_VERTEX_UNIFORM_VECTORS, LOC_CUSTOM, TYPE_INT, - offsetof(struct gl_context, Const.VertexProgram.MaxUniformComponents), NO_EXTRA }, + { GL_MAX_FRAGMENT_UNIFORM_VECTORS, LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA }, + { GL_MAX_VARYING_VECTORS, CONTEXT_INT(Const.MaxVarying), NO_EXTRA }, + { GL_MAX_VERTEX_UNIFORM_VECTORS, LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA }, { GL_SHADER_COMPILER, CONST(1), NO_EXTRA }, /* OES_get_program_binary */ { GL_NUM_SHADER_BINARY_FORMATS, CONST(0), NO_EXTRA }, @@ -1112,6 +1114,14 @@ static const struct value_desc values[] = { extra_valid_draw_buffer }, { GL_DRAW_BUFFER3_ARB, BUFFER_ENUM(ColorDrawBuffer[3]), extra_valid_draw_buffer }, + { GL_DRAW_BUFFER4_ARB, BUFFER_ENUM(ColorDrawBuffer[4]), + extra_valid_draw_buffer }, + { GL_DRAW_BUFFER5_ARB, BUFFER_ENUM(ColorDrawBuffer[5]), + extra_valid_draw_buffer }, + { GL_DRAW_BUFFER6_ARB, BUFFER_ENUM(ColorDrawBuffer[6]), + extra_valid_draw_buffer }, + { GL_DRAW_BUFFER7_ARB, BUFFER_ENUM(ColorDrawBuffer[7]), + extra_valid_draw_buffer }, /* GL_ATI_fragment_shader */ { GL_NUM_FRAGMENT_REGISTERS_ATI, CONST(6), extra_ATI_fragment_shader }, @@ -1564,6 +1574,14 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu v->value_int = ctx->Array.ElementArrayBufferObj->Name; break; + /* ARB_copy_buffer */ + case GL_COPY_READ_BUFFER: + v->value_int = ctx->CopyReadBuffer->Name; + break; + case GL_COPY_WRITE_BUFFER: + v->value_int = ctx->CopyWriteBuffer->Name; + break; + case GL_FRAGMENT_PROGRAM_BINDING_NV: v->value_int = ctx->FragmentProgram.Current ? ctx->FragmentProgram.Current->Base.Id : 0; @@ -1604,6 +1622,14 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu case GL_POINT_SIZE_ARRAY_BUFFER_BINDING_OES: v->value_int = ctx->Array.ArrayObj->PointSize.BufferObj->Name; break; + + case GL_MAX_VERTEX_UNIFORM_VECTORS: + v->value_int = ctx->Const.VertexProgram.MaxUniformComponents / 4; + break; + + case GL_MAX_FRAGMENT_UNIFORM_VECTORS: + v->value_int = ctx->Const.FragmentProgram.MaxUniformComponents / 4; + break; } } @@ -1733,16 +1759,18 @@ find_value(const char *func, GLenum pname, void **p, union value *v) hash = (pname * prime_factor); while (1) { d = &values[table[hash & mask]]; - if (likely(d->pname == pname)) - break; /* If the enum isn't valid, the hash walk ends with index 0, * which is the API mask entry at the beginning of values[]. */ - if (d->type == TYPE_API_MASK) { + if (unlikely(d->type == TYPE_API_MASK)) { _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, _mesa_lookup_enum_by_nr(pname)); return &error_value; } + + if (likely(d->pname == pname)) + break; + hash += prime_step; } diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index df1527b47f1..f9f2ed73077 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -154,6 +154,8 @@ _mesa_sizeof_type( GLenum type ) return sizeof(GLdouble); case GL_HALF_FLOAT_ARB: return sizeof(GLhalfARB); + case GL_FIXED: + return sizeof(GLfixed); default: return -1; } diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index bcca4edc1aa..cefbf4d8c98 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -88,7 +88,8 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment) #if defined(HAVE_POSIX_MEMALIGN) void *mem; int err = posix_memalign(& mem, alignment, bytes); - (void) err; + if (err) + return NULL; return mem; #elif defined(_WIN32) && defined(_MSC_VER) return _aligned_malloc(bytes, alignment); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 87b96489dbf..1c549a8e247 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -41,14 +41,6 @@ #include "math/m_matrix.h" /* GLmatrix */ #include "main/simple_list.h" /* struct simple_node */ -/* Shader stages. Note that these will become 5 with tessellation. - * These MUST have the same values as PIPE_SHADER_* - */ -#define MESA_SHADER_VERTEX 0 -#define MESA_SHADER_FRAGMENT 1 -#define MESA_SHADER_GEOMETRY 2 -#define MESA_SHADER_TYPES 3 - /** * Color channel data type. @@ -130,6 +122,20 @@ struct st_context; /** + * Shader stages. Note that these will become 5 with tessellation. + * These MUST have the same values as gallium's PIPE_SHADER_* + */ +typedef enum +{ + MESA_SHADER_VERTEX = 0, + MESA_SHADER_FRAGMENT = 1, + MESA_SHADER_GEOMETRY = 2, + MESA_SHADER_TYPES = 3 +} gl_shader_type; + + + +/** * Indexes for vertex program attributes. * GL_NV_vertex_program aliases generic attributes over the conventional * attributes. In GL_ARB_vertex_program shader the aliasing is optional. @@ -2191,6 +2197,7 @@ struct gl_shader_compiler_options GLboolean EmitNoCont; /**< Emit CONT opcode? */ GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ + GLboolean EmitNoPow; /**< Emit POW opcodes? */ /** * \name Forms of indirect addressing the driver cannot do. @@ -2560,6 +2567,7 @@ struct gl_program_constants */ struct gl_constants { + GLint MaxTextureMbytes; /**< Max memory per image, in MB */ GLint MaxTextureLevels; /**< Max mipmap levels. */ GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */ GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */ diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index fdb647c7ea8..6d524e64908 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -484,11 +484,25 @@ _mesa_pack_rgba_span_float(struct gl_context *ctx, GLuint n, GLfloat rgba[][4], const struct gl_pixelstore_attrib *dstPacking, GLbitfield transferOps) { - GLfloat luminance[MAX_WIDTH]; + GLfloat *luminance; const GLint comps = _mesa_components_in_format(dstFormat); const GLboolean intDstFormat = _mesa_is_integer_format(dstFormat); GLuint i; + if (dstFormat == GL_LUMINANCE || + dstFormat == GL_LUMINANCE_ALPHA || + dstFormat == GL_LUMINANCE_INTEGER_EXT || + dstFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT) { + luminance = (GLfloat *) malloc(n * sizeof(GLfloat)); + if (!luminance) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel packing"); + return; + } + } + else { + luminance = NULL; + } + /* XXX * This test should probably go away. Have the caller set/clear the * IMAGE_CLAMP_BIT as needed. @@ -1907,6 +1921,8 @@ _mesa_pack_rgba_span_float(struct gl_context *ctx, GLuint n, GLfloat rgba[][4], } } } + + free(luminance); } @@ -3462,7 +3478,12 @@ _mesa_unpack_color_span_chan( struct gl_context *ctx, { GLint dstComponents; GLint rDst, gDst, bDst, aDst, lDst, iDst; - GLfloat rgba[MAX_WIDTH][4]; + GLfloat (*rgba)[4] = (GLfloat (*)[4]) malloc(4 * n * sizeof(GLfloat)); + + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } dstComponents = _mesa_components_in_format( dstFormat ); /* source & dest image formats should have been error checked by now */ @@ -3471,9 +3492,14 @@ _mesa_unpack_color_span_chan( struct gl_context *ctx, /* * Extract image data and convert to RGBA floats */ - assert(n <= MAX_WIDTH); if (srcFormat == GL_COLOR_INDEX) { - GLuint indexes[MAX_WIDTH]; + GLuint *indexes = (GLuint *) malloc(n * sizeof(GLuint)); + + if (!indexes) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } + extract_uint_indexes(n, indexes, srcFormat, srcType, source, srcPacking); @@ -3484,6 +3510,8 @@ _mesa_unpack_color_span_chan( struct gl_context *ctx, for (i = 0; i < n; i++) { dest[i] = (GLchan) (indexes[i] & 0xff); } + free(indexes); + free(rgba); return; } else { @@ -3498,6 +3526,8 @@ _mesa_unpack_color_span_chan( struct gl_context *ctx, * with color indexes. */ transferOps &= ~(IMAGE_SCALE_BIAS_BIT | IMAGE_MAP_COLOR_BIT); + + free(indexes); } else { /* non-color index data */ @@ -3575,6 +3605,8 @@ _mesa_unpack_color_span_chan( struct gl_context *ctx, dst += dstComponents; } } + + free(rgba); } } @@ -3652,7 +3684,12 @@ _mesa_unpack_color_span_float( struct gl_context *ctx, { GLint dstComponents; GLint rDst, gDst, bDst, aDst, lDst, iDst; - GLfloat rgba[MAX_WIDTH][4]; + GLfloat (*rgba)[4] = (GLfloat (*)[4]) malloc(4 * n * sizeof(GLfloat)); + + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } dstComponents = _mesa_components_in_format( dstFormat ); /* source & dest image formats should have been error checked by now */ @@ -3661,9 +3698,15 @@ _mesa_unpack_color_span_float( struct gl_context *ctx, /* * Extract image data and convert to RGBA floats */ - assert(n <= MAX_WIDTH); if (srcFormat == GL_COLOR_INDEX) { - GLuint indexes[MAX_WIDTH]; + GLuint *indexes = (GLuint *) malloc(n * sizeof(GLuint)); + + if (!indexes) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + free(rgba); + return; + } + extract_uint_indexes(n, indexes, srcFormat, srcType, source, srcPacking); @@ -3674,6 +3717,8 @@ _mesa_unpack_color_span_float( struct gl_context *ctx, for (i = 0; i < n; i++) { dest[i] = (GLchan) (indexes[i] & 0xff); } + free(indexes); + free(rgba); return; } else { @@ -3688,6 +3733,8 @@ _mesa_unpack_color_span_float( struct gl_context *ctx, * with color indexes. */ transferOps &= ~(IMAGE_SCALE_BIAS_BIT | IMAGE_MAP_COLOR_BIT); + + free(indexes); } else { /* non-color index data */ @@ -3760,6 +3807,8 @@ _mesa_unpack_color_span_float( struct gl_context *ctx, dst += dstComponents; } } + + free(rgba); } } @@ -3776,9 +3825,12 @@ _mesa_unpack_color_span_uint(struct gl_context *ctx, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking) { - GLuint rgba[MAX_WIDTH][4]; + GLuint (*rgba)[4] = (GLuint (*)[4]) malloc(n * 4 * sizeof(GLfloat)); - ASSERT(n <= MAX_WIDTH); + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } ASSERT(dstFormat == GL_ALPHA || dstFormat == GL_LUMINANCE || @@ -3912,6 +3964,8 @@ _mesa_unpack_color_span_uint(struct gl_context *ctx, } } } + + free(rgba); } @@ -3943,9 +3997,14 @@ _mesa_unpack_dudv_span_byte( struct gl_context *ctx, /* general solution */ { GLint dstComponents; - GLfloat rgba[MAX_WIDTH][4]; GLbyte *dst = dest; GLuint i; + GLfloat (*rgba)[4] = (GLfloat (*)[4]) malloc(4 * n * sizeof(GLfloat)); + + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } dstComponents = _mesa_components_in_format( dstFormat ); /* source & dest image formats should have been error checked by now */ @@ -3954,7 +4013,6 @@ _mesa_unpack_dudv_span_byte( struct gl_context *ctx, /* * Extract image data and convert to RGBA floats */ - assert(n <= MAX_WIDTH); extract_float_rgba(n, rgba, srcFormat, srcType, source, srcPacking->SwapBytes); @@ -3970,6 +4028,8 @@ _mesa_unpack_dudv_span_byte( struct gl_context *ctx, dst[1] = FLOAT_TO_BYTE(rgba[i][GCOMP]); dst += dstComponents; } + + free(rgba); } } @@ -3988,7 +4048,7 @@ _mesa_unpack_dudv_span_byte( struct gl_context *ctx, * transferOps - the pixel transfer operations to apply */ void -_mesa_unpack_index_span( const struct gl_context *ctx, GLuint n, +_mesa_unpack_index_span( struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking, @@ -4026,8 +4086,12 @@ _mesa_unpack_index_span( const struct gl_context *ctx, GLuint n, /* * general solution */ - GLuint indexes[MAX_WIDTH]; - assert(n <= MAX_WIDTH); + GLuint *indexes = (GLuint *) malloc(n * sizeof(GLuint)); + + if (!indexes) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } extract_uint_indexes(n, indexes, GL_COLOR_INDEX, srcType, source, srcPacking); @@ -4061,19 +4125,24 @@ _mesa_unpack_index_span( const struct gl_context *ctx, GLuint n, default: _mesa_problem(ctx, "bad dstType in _mesa_unpack_index_span"); } + + free(indexes); } } void -_mesa_pack_index_span( const struct gl_context *ctx, GLuint n, +_mesa_pack_index_span( struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, const GLuint *source, const struct gl_pixelstore_attrib *dstPacking, GLbitfield transferOps ) { - GLuint indexes[MAX_WIDTH]; + GLuint *indexes = (GLuint *) malloc(n * sizeof(GLuint)); - ASSERT(n <= MAX_WIDTH); + if (!indexes) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel packing"); + return; + } transferOps &= (IMAGE_MAP_COLOR_BIT | IMAGE_SHIFT_OFFSET_BIT); @@ -4178,6 +4247,8 @@ _mesa_pack_index_span( const struct gl_context *ctx, GLuint n, default: _mesa_problem(ctx, "bad type in _mesa_pack_index_span"); } + + free(indexes); } @@ -4196,7 +4267,7 @@ _mesa_pack_index_span( const struct gl_context *ctx, GLuint n, * transferOps - apply offset/bias/lookup ops? */ void -_mesa_unpack_stencil_span( const struct gl_context *ctx, GLuint n, +_mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking, @@ -4240,8 +4311,12 @@ _mesa_unpack_stencil_span( const struct gl_context *ctx, GLuint n, /* * general solution */ - GLuint indexes[MAX_WIDTH]; - assert(n <= MAX_WIDTH); + GLuint *indexes = (GLuint *) malloc(n * sizeof(GLuint)); + + if (!indexes) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "stencil unpacking"); + return; + } extract_uint_indexes(n, indexes, GL_STENCIL_INDEX, srcType, source, srcPacking); @@ -4286,18 +4361,23 @@ _mesa_unpack_stencil_span( const struct gl_context *ctx, GLuint n, default: _mesa_problem(ctx, "bad dstType in _mesa_unpack_stencil_span"); } + + free(indexes); } } void -_mesa_pack_stencil_span( const struct gl_context *ctx, GLuint n, +_mesa_pack_stencil_span( struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, const GLstencil *source, const struct gl_pixelstore_attrib *dstPacking ) { - GLstencil stencil[MAX_WIDTH]; + GLstencil *stencil = (GLstencil *) malloc(n * sizeof(GLstencil)); - ASSERT(n <= MAX_WIDTH); + if (!stencil) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "stencil packing"); + return; + } if (ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag) { @@ -4436,6 +4516,8 @@ _mesa_pack_stencil_span( const struct gl_context *ctx, GLuint n, default: _mesa_problem(ctx, "bad type in _mesa_pack_index_span"); } + + free(stencil); } #define DEPTH_VALUES(GLTYPE, GLTYPE2FLOAT) \ @@ -4466,12 +4548,12 @@ _mesa_pack_stencil_span( const struct gl_context *ctx, GLuint n, * (ignored for GLfloat). */ void -_mesa_unpack_depth_span( const struct gl_context *ctx, GLuint n, +_mesa_unpack_depth_span( struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, GLuint depthMax, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking ) { - GLfloat depthTemp[MAX_WIDTH], *depthValues; + GLfloat *depthTemp, *depthValues; GLboolean needClamp = GL_FALSE; /* Look for special cases first. @@ -4517,6 +4599,12 @@ _mesa_unpack_depth_span( const struct gl_context *ctx, GLuint n, /* general case path follows */ + depthTemp = (GLfloat *) malloc(n * sizeof(GLfloat)); + if (!depthTemp) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking"); + return; + } + if (dstType == GL_FLOAT) { depthValues = (GLfloat *) dest; } @@ -4599,6 +4687,7 @@ _mesa_unpack_depth_span( const struct gl_context *ctx, GLuint n, break; default: _mesa_problem(NULL, "bad type in _mesa_unpack_depth_span()"); + free(depthTemp); return; } @@ -4658,6 +4747,8 @@ _mesa_unpack_depth_span( const struct gl_context *ctx, GLuint n, ASSERT(dstType == GL_FLOAT); /*ASSERT(depthMax == 1.0F);*/ } + + free(depthTemp); } @@ -4665,13 +4756,15 @@ _mesa_unpack_depth_span( const struct gl_context *ctx, GLuint n, * Pack an array of depth values. The values are floats in [0,1]. */ void -_mesa_pack_depth_span( const struct gl_context *ctx, GLuint n, GLvoid *dest, +_mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest, GLenum dstType, const GLfloat *depthSpan, const struct gl_pixelstore_attrib *dstPacking ) { - GLfloat depthCopy[MAX_WIDTH]; - - ASSERT(n <= MAX_WIDTH); + GLfloat *depthCopy = (GLfloat *) malloc(n * sizeof(GLfloat)); + if (!depthCopy) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel packing"); + return; + } if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) { memcpy(depthCopy, depthSpan, n * sizeof(GLfloat)); @@ -4773,6 +4866,8 @@ _mesa_pack_depth_span( const struct gl_context *ctx, GLuint n, GLvoid *dest, default: _mesa_problem(ctx, "bad type in _mesa_pack_depth_span"); } + + free(depthCopy); } @@ -4781,16 +4876,21 @@ _mesa_pack_depth_span( const struct gl_context *ctx, GLuint n, GLvoid *dest, * Pack depth and stencil values as GL_DEPTH_STENCIL/GL_UNSIGNED_INT_24_8. */ void -_mesa_pack_depth_stencil_span(const struct gl_context *ctx, GLuint n, GLuint *dest, +_mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, const struct gl_pixelstore_attrib *dstPacking) { - GLfloat depthCopy[MAX_WIDTH]; - GLstencil stencilCopy[MAX_WIDTH]; + GLfloat *depthCopy = (GLfloat *) malloc(n * sizeof(GLfloat)); + GLstencil *stencilCopy = (GLstencil *) malloc(n * sizeof(GLstencil)); GLuint i; - ASSERT(n <= MAX_WIDTH); + if (!depthCopy || !stencilCopy) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel packing"); + free(depthCopy); + free(stencilCopy); + return; + } if (ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0) { memcpy(depthCopy, depthVals, n * sizeof(GLfloat)); @@ -4814,6 +4914,9 @@ _mesa_pack_depth_stencil_span(const struct gl_context *ctx, GLuint n, GLuint *de if (dstPacking->SwapBytes) { _mesa_swap4(dest, n); } + + free(depthCopy); + free(stencilCopy); } diff --git a/src/mesa/main/pack.h b/src/mesa/main/pack.h index 65d3f7a0fb2..78238ea5839 100644 --- a/src/mesa/main/pack.h +++ b/src/mesa/main/pack.h @@ -90,7 +90,7 @@ _mesa_unpack_dudv_span_byte(struct gl_context *ctx, GLbitfield transferOps); extern void -_mesa_unpack_index_span(const struct gl_context *ctx, GLuint n, +_mesa_unpack_index_span(struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking, @@ -98,39 +98,39 @@ _mesa_unpack_index_span(const struct gl_context *ctx, GLuint n, extern void -_mesa_pack_index_span(const struct gl_context *ctx, GLuint n, +_mesa_pack_index_span(struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, const GLuint *source, const struct gl_pixelstore_attrib *dstPacking, GLbitfield transferOps); extern void -_mesa_unpack_stencil_span(const struct gl_context *ctx, GLuint n, +_mesa_unpack_stencil_span(struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking, GLbitfield transferOps); extern void -_mesa_pack_stencil_span(const struct gl_context *ctx, GLuint n, +_mesa_pack_stencil_span(struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, const GLstencil *source, const struct gl_pixelstore_attrib *dstPacking); extern void -_mesa_unpack_depth_span(const struct gl_context *ctx, GLuint n, +_mesa_unpack_depth_span(struct gl_context *ctx, GLuint n, GLenum dstType, GLvoid *dest, GLuint depthMax, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking); extern void -_mesa_pack_depth_span(const struct gl_context *ctx, GLuint n, GLvoid *dest, +_mesa_pack_depth_span(struct gl_context *ctx, GLuint n, GLvoid *dest, GLenum dstType, const GLfloat *depthSpan, const struct gl_pixelstore_attrib *dstPacking); extern void -_mesa_pack_depth_stencil_span(const struct gl_context *ctx, +_mesa_pack_depth_stencil_span(struct gl_context *ctx, GLuint n, GLuint *dest, const GLfloat *depthVals, const GLstencil *stencilVals, diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 030236e7350..96df58d35c2 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -95,7 +95,7 @@ _mesa_init_shader_state(struct gl_context *ctx) * are generated by the GLSL compiler. */ struct gl_shader_compiler_options options; - GLuint i; + gl_shader_type sh; memset(&options, 0, sizeof(options)); options.MaxUnrollIterations = 32; @@ -103,8 +103,8 @@ _mesa_init_shader_state(struct gl_context *ctx) /* Default pragma settings */ options.DefaultPragmas.Optimize = GL_TRUE; - for(i = 0; i < MESA_SHADER_TYPES; ++i) - memcpy(&ctx->ShaderCompilerOptions[i], &options, sizeof(options)); + for (sh = 0; sh < MESA_SHADER_TYPES; ++sh) + memcpy(&ctx->ShaderCompilerOptions[sh], &options, sizeof(options)); ctx->Shader.Flags = get_shader_flags(); } diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index b6594cbe6f0..216bbce0032 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -291,6 +291,7 @@ _mesa_free_shader_program_data(struct gl_context *ctx, struct gl_shader_program *shProg) { GLuint i; + gl_shader_type sh; assert(shProg->Type == GL_SHADER_PROGRAM_MESA); @@ -326,10 +327,10 @@ _mesa_free_shader_program_data(struct gl_context *ctx, shProg->TransformFeedback.NumVarying = 0; - for (i = 0; i < MESA_SHADER_TYPES; i++) { - if (shProg->_LinkedShaders[i] != NULL) { - ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[i]); - shProg->_LinkedShaders[i] = NULL; + for (sh = 0; sh < MESA_SHADER_TYPES; sh++) { + if (shProg->_LinkedShaders[sh] != NULL) { + ctx->Driver.DeleteShader(ctx, shProg->_LinkedShaders[sh]); + shProg->_LinkedShaders[sh] = NULL; } } } diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h index 346a5b75175..de7c998cf0e 100644 --- a/src/mesa/main/shaderobj.h +++ b/src/mesa/main/shaderobj.h @@ -98,7 +98,7 @@ extern void _mesa_free_shader_state(struct gl_context *ctx); -static INLINE GLuint +static INLINE gl_shader_type _mesa_shader_type_to_index(GLenum v) { switch (v) { @@ -110,7 +110,7 @@ _mesa_shader_type_to_index(GLenum v) return MESA_SHADER_GEOMETRY; default: ASSERT(0 && "bad value in _mesa_shader_type_to_index()"); - return ~0; + return MESA_SHADER_TYPES; } } diff --git a/src/mesa/main/syncobj.h b/src/mesa/main/syncobj.h index f3c0046cf3d..51de9bf4809 100644 --- a/src/mesa/main/syncobj.h +++ b/src/mesa/main/syncobj.h @@ -31,9 +31,13 @@ #ifndef SYNCOBJ_H #define SYNCOBJ_H -#include "main/mtypes.h" +#include "glheader.h" +#include "mfeatures.h" +struct _glapi_table; struct dd_function_table; +struct gl_context; +struct gl_sync_object; #if FEATURE_ARB_sync diff --git a/src/mesa/main/texcompress.h b/src/mesa/main/texcompress.h index 83856429c54..19b08bbadf6 100644 --- a/src/mesa/main/texcompress.h +++ b/src/mesa/main/texcompress.h @@ -25,8 +25,11 @@ #ifndef TEXCOMPRESS_H #define TEXCOMPRESS_H -#include "mtypes.h" #include "formats.h" +#include "glheader.h" +#include "mfeatures.h" + +struct gl_context; #if _HAVE_FULL_GL diff --git a/src/mesa/main/texcompress_s3tc.h b/src/mesa/main/texcompress_s3tc.h index d0a5b186b71..74a0343b9b9 100644 --- a/src/mesa/main/texcompress_s3tc.h +++ b/src/mesa/main/texcompress_s3tc.h @@ -25,9 +25,13 @@ #ifndef TEXCOMPRESS_S3TC_H #define TEXCOMPRESS_S3TC_H -#include "main/mtypes.h" +#include "compiler.h" +#include "glheader.h" +#include "mfeatures.h" #include "texstore.h" +struct gl_context; +struct gl_texture_image; #if FEATURE_texture_s3tc diff --git a/src/mesa/main/texenvprogram.h b/src/mesa/main/texenvprogram.h index abfb916d21b..22e30a51943 100644 --- a/src/mesa/main/texenvprogram.h +++ b/src/mesa/main/texenvprogram.h @@ -27,7 +27,7 @@ #define TEXENVPROGRAM_H -#include "mtypes.h" +struct gl_context; extern struct gl_fragment_program * _mesa_get_fixed_func_fragment_program(struct gl_context *ctx); diff --git a/src/mesa/main/texformat.h b/src/mesa/main/texformat.h index 8bd15076753..3cf09213ac4 100644 --- a/src/mesa/main/texformat.h +++ b/src/mesa/main/texformat.h @@ -27,9 +27,9 @@ #define TEXFORMAT_H -#include "mtypes.h" #include "formats.h" +struct gl_context; extern gl_format _mesa_choose_tex_format( struct gl_context *ctx, GLint internalFormat, diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 879ac529a01..c94f88e16e6 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -120,10 +120,15 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions, const GLint height = texImage->Height; const GLint depth = texImage->Depth; GLint img, row, col; + GLfloat *depthRow = (GLfloat *) malloc(width * sizeof(GLfloat)); + + if (!depthRow) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); + return; + } for (img = 0; img < depth; img++) { for (row = 0; row < height; row++) { - GLfloat depthRow[MAX_WIDTH]; void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels, width, height, format, type, img, row, 0); @@ -135,6 +140,8 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions, _mesa_pack_depth_span(ctx, width, dest, type, depthRow, &ctx->Pack); } } + + free(depthRow); } @@ -244,6 +251,12 @@ get_tex_srgb(struct gl_context *ctx, GLuint dimensions, const GLint depth = texImage->Depth; const GLbitfield transferOps = 0x0; GLint img, row; + GLfloat (*rgba)[4] = (GLfloat (*)[4]) malloc(4 * width * sizeof(GLfloat)); + + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); + return; + } for (img = 0; img < depth; img++) { for (row = 0; row < height; row++) { @@ -251,7 +264,6 @@ get_tex_srgb(struct gl_context *ctx, GLuint dimensions, width, height, format, type, img, row, 0); - GLfloat rgba[MAX_WIDTH][4]; GLint col; /* convert row to RGBA format */ @@ -279,6 +291,8 @@ get_tex_srgb(struct gl_context *ctx, GLuint dimensions, &ctx->Pack, transferOps); } } + + free(rgba); } @@ -314,13 +328,18 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, */ GLbitfield transferOps = 0x0; GLint img, row; + GLfloat (*rgba)[4] = (GLfloat (*)[4]) malloc(4 * width * sizeof(GLfloat)); + + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); + return; + } for (img = 0; img < depth; img++) { for (row = 0; row < height; row++) { void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels, width, height, format, type, img, row, 0); - GLfloat rgba[MAX_WIDTH][4]; GLint col; GLenum dataType = _mesa_get_format_datatype(texImage->TexFormat); @@ -364,6 +383,8 @@ get_tex_rgba(struct gl_context *ctx, GLuint dimensions, &ctx->Pack, transferOps); } } + + free(rgba); } diff --git a/src/mesa/main/texgetimage.h b/src/mesa/main/texgetimage.h index 81a3bbbd9a7..ef420ddabf5 100644 --- a/src/mesa/main/texgetimage.h +++ b/src/mesa/main/texgetimage.h @@ -27,7 +27,11 @@ #ifndef TEXGETIMAGE_H #define TEXGETIMAGE_H -#include "mtypes.h" +#include "glheader.h" + +struct gl_context; +struct gl_texture_image; +struct gl_texture_object; extern void _mesa_get_teximage(struct gl_context *ctx, GLenum target, GLint level, diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 060f34b7f97..c5ae63052a7 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -636,6 +636,47 @@ _mesa_is_proxy_texture(GLenum target) /** + * Return the proxy target which corresponds to the given texture target + */ +static GLenum +get_proxy_target(GLenum target) +{ + switch (target) { + case GL_TEXTURE_1D: + case GL_PROXY_TEXTURE_1D: + return GL_PROXY_TEXTURE_1D; + case GL_TEXTURE_2D: + case GL_PROXY_TEXTURE_2D: + return GL_PROXY_TEXTURE_2D; + case GL_TEXTURE_3D: + case GL_PROXY_TEXTURE_3D: + return GL_PROXY_TEXTURE_3D; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_ARB: + case GL_PROXY_TEXTURE_CUBE_MAP_ARB: + return GL_PROXY_TEXTURE_CUBE_MAP_ARB; + case GL_TEXTURE_RECTANGLE_NV: + case GL_PROXY_TEXTURE_RECTANGLE_NV: + return GL_PROXY_TEXTURE_RECTANGLE_NV; + case GL_TEXTURE_1D_ARRAY_EXT: + case GL_PROXY_TEXTURE_1D_ARRAY_EXT: + return GL_PROXY_TEXTURE_1D_ARRAY_EXT; + case GL_TEXTURE_2D_ARRAY_EXT: + case GL_PROXY_TEXTURE_2D_ARRAY_EXT: + return GL_PROXY_TEXTURE_2D_ARRAY_EXT; + default: + _mesa_problem(NULL, "unexpected target in get_proxy_target()"); + return 0; + } +} + + +/** * Get the texture object that corresponds to the target of the given * texture unit. * @@ -1178,94 +1219,110 @@ _mesa_test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level, switch (target) { case GL_PROXY_TEXTURE_1D: maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); - if (width < 2 * border || width > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - width >0 && !_mesa_is_pow_two(width - 2 * border)) || - level >= ctx->Const.MaxTextureLevels) { - /* bad width or level */ + if (width < 2 * border || width > 2 + maxSize) return GL_FALSE; + if (level >= ctx->Const.MaxTextureLevels) + return GL_FALSE; + if (!ctx->Extensions.ARB_texture_non_power_of_two) { + if (width > 0 && !_mesa_is_pow_two(width - 2 * border)) + return GL_FALSE; } return GL_TRUE; + case GL_PROXY_TEXTURE_2D: maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); - if (width < 2 * border || width > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - width > 0 && !_mesa_is_pow_two(width - 2 * border)) || - height < 2 * border || height > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - height > 0 && !_mesa_is_pow_two(height - 2 * border)) || - level >= ctx->Const.MaxTextureLevels) { - /* bad width or height or level */ + if (width < 2 * border || width > 2 + maxSize) + return GL_FALSE; + if (height < 2 * border || height > 2 + maxSize) + return GL_FALSE; + if (level >= ctx->Const.MaxTextureLevels) return GL_FALSE; + if (!ctx->Extensions.ARB_texture_non_power_of_two) { + if (width > 0 && !_mesa_is_pow_two(width - 2 * border)) + return GL_FALSE; + if (height > 0 && !_mesa_is_pow_two(height - 2 * border)) + return GL_FALSE; } return GL_TRUE; + case GL_PROXY_TEXTURE_3D: maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); - if (width < 2 * border || width > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - width > 0 && !_mesa_is_pow_two(width - 2 * border)) || - height < 2 * border || height > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - height > 0 && !_mesa_is_pow_two(height - 2 * border)) || - depth < 2 * border || depth > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - depth > 0 && !_mesa_is_pow_two(depth - 2 * border)) || - level >= ctx->Const.Max3DTextureLevels) { - /* bad width or height or depth or level */ + if (width < 2 * border || width > 2 + maxSize) + return GL_FALSE; + if (height < 2 * border || height > 2 + maxSize) + return GL_FALSE; + if (depth < 2 * border || depth > 2 + maxSize) return GL_FALSE; + if (level >= ctx->Const.Max3DTextureLevels) + return GL_FALSE; + if (!ctx->Extensions.ARB_texture_non_power_of_two) { + if (width > 0 && !_mesa_is_pow_two(width - 2 * border)) + return GL_FALSE; + if (height > 0 && !_mesa_is_pow_two(height - 2 * border)) + return GL_FALSE; + if (depth > 0 && !_mesa_is_pow_two(depth - 2 * border)) + return GL_FALSE; } return GL_TRUE; + case GL_PROXY_TEXTURE_RECTANGLE_NV: - if (width < 0 || width > ctx->Const.MaxTextureRectSize || - height < 0 || height > ctx->Const.MaxTextureRectSize || - level != 0) { - /* bad width or height or level */ + maxSize = ctx->Const.MaxTextureRectSize; + if (width < 0 || width > maxSize) + return GL_FALSE; + if (height < 0 || height > maxSize) + return GL_FALSE; + if (level != 0) return GL_FALSE; - } return GL_TRUE; + case GL_PROXY_TEXTURE_CUBE_MAP_ARB: maxSize = 1 << (ctx->Const.MaxCubeTextureLevels - 1); - if (width < 2 * border || width > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - width > 0 && !_mesa_is_pow_two(width - 2 * border)) || - height < 2 * border || height > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - height > 0 && !_mesa_is_pow_two(height - 2 * border)) || - level >= ctx->Const.MaxCubeTextureLevels) { - /* bad width or height */ + if (width < 2 * border || width > 2 + maxSize) + return GL_FALSE; + if (height < 2 * border || height > 2 + maxSize) return GL_FALSE; + if (level >= ctx->Const.MaxCubeTextureLevels) + return GL_FALSE; + if (!ctx->Extensions.ARB_texture_non_power_of_two) { + if (width > 0 && !_mesa_is_pow_two(width - 2 * border)) + return GL_FALSE; + if (height > 0 && !_mesa_is_pow_two(height - 2 * border)) + return GL_FALSE; } return GL_TRUE; + case GL_PROXY_TEXTURE_1D_ARRAY_EXT: maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); - if (width < 2 * border || width > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - width > 0 && !_mesa_is_pow_two(width - 2 * border)) || - level >= ctx->Const.MaxTextureLevels) { - /* bad width or level */ + if (width < 2 * border || width > 2 + maxSize) return GL_FALSE; - } - - if (height < 1 || height > ctx->Const.MaxArrayTextureLayers) { + if (height < 1 || height > ctx->Const.MaxArrayTextureLayers) + return GL_FALSE; + if (level >= ctx->Const.MaxTextureLevels) return GL_FALSE; + if (!ctx->Extensions.ARB_texture_non_power_of_two) { + if (width > 0 && !_mesa_is_pow_two(width - 2 * border)) + return GL_FALSE; } return GL_TRUE; + case GL_PROXY_TEXTURE_2D_ARRAY_EXT: maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); - if (width < 2 * border || width > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - width > 0 && !_mesa_is_pow_two(width - 2 * border)) || - height < 2 * border || height > 2 + maxSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && - height > 0 && !_mesa_is_pow_two(height - 2 * border)) || - level >= ctx->Const.MaxTextureLevels) { - /* bad width or height or level */ + if (width < 2 * border || width > 2 + maxSize) return GL_FALSE; - } - if (depth < 1 || depth > ctx->Const.MaxArrayTextureLayers) { + if (height < 2 * border || height > 2 + maxSize) + return GL_FALSE; + if (depth < 1 || depth > ctx->Const.MaxArrayTextureLayers) + return GL_FALSE; + if (level >= ctx->Const.MaxTextureLevels) return GL_FALSE; + if (!ctx->Extensions.ARB_texture_non_power_of_two) { + if (width > 0 && !_mesa_is_pow_two(width - 2 * border)) + return GL_FALSE; + if (height > 0 && !_mesa_is_pow_two(height - 2 * border)) + return GL_FALSE; } return GL_TRUE; + default: _mesa_problem(ctx, "Invalid target in _mesa_test_proxy_teximage"); return GL_FALSE; @@ -1274,15 +1331,37 @@ _mesa_test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level, /** - * Helper function to determine whether a target supports compressed textures + * Check if the memory used by the texture would exceed the driver's limit. + * This lets us support a max 3D texture size of 8K (for example) but + * prevents allocating a full 8K x 8K x 8K texture. + * XXX this could be rolled into the proxy texture size test (above) but + * we don't have the actual texture internal format at that point. + */ +static GLboolean +legal_texture_size(struct gl_context *ctx, gl_format format, + GLint width, GLint height, GLint depth) +{ + uint64_t bytes = _mesa_format_image_size64(format, width, height, depth); + uint64_t mbytes = bytes / (1024 * 1024); /* convert to MB */ + return mbytes <= (uint64_t) ctx->Const.MaxTextureMbytes; +} + + + +/** + * Helper function to determine whether a target and specific compression + * format are supported. */ static GLboolean -target_can_be_compressed(struct gl_context *ctx, GLenum target) +target_can_be_compressed(const struct gl_context *ctx, GLenum target, + GLenum intFormat) { + (void) intFormat; /* not used yet */ + switch (target) { case GL_TEXTURE_2D: case GL_PROXY_TEXTURE_2D: - return GL_TRUE; + return GL_TRUE; /* true for any compressed format so far */ case GL_PROXY_TEXTURE_CUBE_MAP: case GL_TEXTURE_CUBE_MAP_POSITIVE_X: case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: @@ -1301,6 +1380,109 @@ target_can_be_compressed(struct gl_context *ctx, GLenum target) /** + * Check if the given texture target value is legal for a + * glTexImage1/2/3D call. + */ +static GLboolean +legal_teximage_target(struct gl_context *ctx, GLuint dims, GLenum target) +{ + switch (dims) { + case 1: + switch (target) { + case GL_TEXTURE_1D: + case GL_PROXY_TEXTURE_1D: + return GL_TRUE; + default: + return GL_FALSE; + } + case 2: + switch (target) { + case GL_TEXTURE_2D: + case GL_PROXY_TEXTURE_2D: + return GL_TRUE; + case GL_PROXY_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + return ctx->Extensions.ARB_texture_cube_map; + case GL_TEXTURE_RECTANGLE_NV: + case GL_PROXY_TEXTURE_RECTANGLE_NV: + return ctx->Extensions.NV_texture_rectangle; + case GL_TEXTURE_1D_ARRAY_EXT: + case GL_PROXY_TEXTURE_1D_ARRAY_EXT: + return ctx->Extensions.MESA_texture_array; + default: + return GL_FALSE; + } + case 3: + switch (target) { + case GL_TEXTURE_3D: + case GL_PROXY_TEXTURE_3D: + return GL_TRUE; + case GL_TEXTURE_2D_ARRAY_EXT: + case GL_PROXY_TEXTURE_2D_ARRAY_EXT: + return ctx->Extensions.MESA_texture_array; + default: + return GL_FALSE; + } + default: + _mesa_problem(ctx, "invalid dims=%u in legal_teximage_target()", dims); + return GL_FALSE; + } +} + + +/** + * Check if the given texture target value is legal for a + * glTexSubImage, glCopyTexSubImage or glCopyTexImage call. + * The difference compared to legal_teximage_target() above is that + * proxy targets are not supported. + */ +static GLboolean +legal_texsubimage_target(struct gl_context *ctx, GLuint dims, GLenum target) +{ + switch (dims) { + case 1: + return target == GL_TEXTURE_1D; + case 2: + switch (target) { + case GL_TEXTURE_2D: + return GL_TRUE; + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + return ctx->Extensions.ARB_texture_cube_map; + case GL_TEXTURE_RECTANGLE_NV: + return ctx->Extensions.NV_texture_rectangle; + case GL_TEXTURE_1D_ARRAY_EXT: + return ctx->Extensions.MESA_texture_array; + default: + return GL_FALSE; + } + case 3: + switch (target) { + case GL_TEXTURE_3D: + return GL_TRUE; + case GL_TEXTURE_2D_ARRAY_EXT: + return ctx->Extensions.MESA_texture_array; + default: + return GL_FALSE; + } + default: + _mesa_problem(ctx, "invalid dims=%u in legal_texsubimage_target()", + dims); + return GL_FALSE; + } +} + + +/** * Test the glTexImage[123]D() parameters for errors. * * \param ctx GL context. @@ -1329,10 +1511,10 @@ texture_error_check( struct gl_context *ctx, GLint width, GLint height, GLint depth, GLint border ) { - const GLboolean isProxy = _mesa_is_proxy_texture(target); + const GLenum proxyTarget = get_proxy_target(target); + const GLboolean isProxy = target == proxyTarget; GLboolean sizeOK = GL_TRUE; GLboolean colorFormat, indexFormat; - GLenum proxy_target; /* Basic level check (more checking in ctx->Driver.TestProxyTexImage) */ if (level < 0 || level >= MAX_TEXTURE_LEVELS) { @@ -1362,71 +1544,16 @@ texture_error_check( struct gl_context *ctx, return GL_TRUE; } - /* Check target and call ctx->Driver.TestProxyTexImage() to check the - * level, width, height and depth. - */ - if (dimensions == 1) { - if (target == GL_PROXY_TEXTURE_1D || target == GL_TEXTURE_1D) { - proxy_target = GL_PROXY_TEXTURE_1D; - height = 1; - depth = 1; - } - else { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexImage1D(target)" ); - return GL_TRUE; - } - } - else if (dimensions == 2) { - depth = 1; - if (target == GL_PROXY_TEXTURE_2D || target == GL_TEXTURE_2D) { - proxy_target = GL_PROXY_TEXTURE_2D; - } - else if (target == GL_PROXY_TEXTURE_CUBE_MAP_ARB || - (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB)) { - if (!ctx->Extensions.ARB_texture_cube_map) { - _mesa_error(ctx, GL_INVALID_ENUM, "glTexImage2D(target)"); - return GL_TRUE; - } - proxy_target = GL_PROXY_TEXTURE_CUBE_MAP_ARB; - sizeOK = (width == height); - } - else if (target == GL_PROXY_TEXTURE_RECTANGLE_NV || - target == GL_TEXTURE_RECTANGLE_NV) { - if (!ctx->Extensions.NV_texture_rectangle) { - _mesa_error(ctx, GL_INVALID_ENUM, "glTexImage2D(target)"); - return GL_TRUE; - } - proxy_target = GL_PROXY_TEXTURE_RECTANGLE_NV; - } - else if (target == GL_PROXY_TEXTURE_1D_ARRAY_EXT || - target == GL_TEXTURE_1D_ARRAY_EXT) { - proxy_target = GL_PROXY_TEXTURE_1D_ARRAY_EXT; - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glTexImage2D(target)"); - return GL_TRUE; - } - } - else if (dimensions == 3) { - if (target == GL_PROXY_TEXTURE_3D || target == GL_TEXTURE_3D) { - proxy_target = GL_PROXY_TEXTURE_3D; - } - else if (target == GL_PROXY_TEXTURE_2D_ARRAY_EXT || - target == GL_TEXTURE_2D_ARRAY_EXT) { - proxy_target = GL_PROXY_TEXTURE_2D_ARRAY_EXT; - } - else { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexImage3D(target)" ); - return GL_TRUE; - } - } - else { - _mesa_problem( ctx, "bad dims in texture_error_check" ); - return GL_TRUE; + /* Do this simple check before calling the TestProxyTexImage() function */ + if (proxyTarget == GL_PROXY_TEXTURE_CUBE_MAP_ARB) { + sizeOK = (width == height); } - sizeOK = sizeOK && ctx->Driver.TestProxyTexImage(ctx, proxy_target, level, + /* + * Use the proxy texture driver hook to see if the size/level/etc are + * legal. + */ + sizeOK = sizeOK && ctx->Driver.TestProxyTexImage(ctx, proxyTarget, level, internalFormat, format, type, width, height, depth, border); @@ -1531,9 +1658,10 @@ texture_error_check( struct gl_context *ctx, /* additional checks for compressed textures */ if (_mesa_is_compressed_format(ctx, internalFormat)) { - if (!target_can_be_compressed(ctx, target) && !isProxy) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glTexImage%dD(target)", dimensions); + if (!target_can_be_compressed(ctx, target, internalFormat)) { + if (!isProxy) + _mesa_error(ctx, GL_INVALID_ENUM, + "glTexImage%dD(target)", dimensions); return GL_TRUE; } if (border != 0) { @@ -1591,61 +1719,13 @@ subtexture_error_check( struct gl_context *ctx, GLuint dimensions, GLint width, GLint height, GLint depth, GLenum format, GLenum type ) { - /* Check target */ - if (dimensions == 1) { - if (target != GL_TEXTURE_1D) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage1D(target)" ); - return GL_TRUE; - } - } - else if (dimensions == 2) { - if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) { - if (!ctx->Extensions.ARB_texture_cube_map) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (target == GL_TEXTURE_RECTANGLE_NV) { - if (!ctx->Extensions.NV_texture_rectangle) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (target == GL_TEXTURE_1D_ARRAY_EXT) { - if (!ctx->Extensions.MESA_texture_array) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (target != GL_TEXTURE_2D) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (dimensions == 3) { - if (target == GL_TEXTURE_2D_ARRAY_EXT) { - if (!ctx->Extensions.MESA_texture_array) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage3D(target)" ); - return GL_TRUE; - } - } - else if (target != GL_TEXTURE_3D) { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexSubImage3D(target)" ); - return GL_TRUE; - } - } - else { - _mesa_problem( ctx, "invalid dims in texture_error_check" ); - return GL_TRUE; - } - /* Basic level check */ if (level < 0 || level >= MAX_TEXTURE_LEVELS) { _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage2D(level=%d)", level); return GL_TRUE; } + /* Check for negative sizes */ if (width < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glTexSubImage%dD(width=%d)", dimensions, width); @@ -1732,13 +1812,6 @@ subtexture_error_check2( struct gl_context *ctx, GLuint dimensions, if (_mesa_is_format_compressed(destTex->TexFormat)) { GLuint bw, bh; - if (!target_can_be_compressed(ctx, target)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glTexSubImage%dD(target=%s)", dimensions, - _mesa_lookup_enum_by_nr(target)); - return GL_TRUE; - } - /* do tests which depend on compression block size */ _mesa_get_format_block_size(destTex->TexFormat, &bw, &bh); @@ -1789,10 +1862,18 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, GLenum target, GLint level, GLint internalFormat, GLint width, GLint height, GLint border ) { - GLenum type; + const GLenum proxyTarget = get_proxy_target(target); + const GLenum type = GL_FLOAT; GLboolean sizeOK; GLint format; + /* check target */ + if (!legal_texsubimage_target(ctx, dimensions, target)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%uD(target=%s)", + dimensions, _mesa_lookup_enum_by_nr(target)); + return GL_TRUE; + } + /* Basic level check (more checking in ctx->Driver.TestProxyTexImage) */ if (level < 0 || level >= MAX_TEXTURE_LEVELS) { _mesa_error(ctx, GL_INVALID_VALUE, @@ -1830,75 +1911,14 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, return GL_TRUE; } - /* NOTE: the format and type aren't really significant for - * TestProxyTexImage(). Only the internalformat really matters. - */ - type = GL_FLOAT; + /* Do size, level checking */ + sizeOK = (proxyTarget == GL_PROXY_TEXTURE_CUBE_MAP_ARB) + ? (width == height) : 1; - /* Check target and call ctx->Driver.TestProxyTexImage() to check the - * level, width, height and depth. - */ - if (dimensions == 1) { - if (target == GL_TEXTURE_1D) { - sizeOK = ctx->Driver.TestProxyTexImage(ctx, GL_PROXY_TEXTURE_1D, - level, internalFormat, - format, type, - width, 1, 1, border); - } - else { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexImage1D(target)" ); - return GL_TRUE; - } - } - else if (dimensions == 2) { - if (target == GL_TEXTURE_2D) { - sizeOK = ctx->Driver.TestProxyTexImage(ctx, GL_PROXY_TEXTURE_2D, - level, internalFormat, - format, type, - width, height, 1, border); - } - else if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) { - if (!ctx->Extensions.ARB_texture_cube_map) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexImage2D(target)" ); - return GL_TRUE; - } - sizeOK = (width == height) && - ctx->Driver.TestProxyTexImage(ctx, GL_PROXY_TEXTURE_CUBE_MAP_ARB, - level, internalFormat, format, type, - width, height, 1, border); - } - else if (target == GL_TEXTURE_RECTANGLE_NV) { - if (!ctx->Extensions.NV_texture_rectangle) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexImage2D(target)" ); - return GL_TRUE; - } - sizeOK = ctx->Driver.TestProxyTexImage(ctx, - GL_PROXY_TEXTURE_RECTANGLE_NV, - level, internalFormat, - format, type, - width, height, 1, border); - } - else if (target == GL_TEXTURE_1D_ARRAY_EXT) { - if (!ctx->Extensions.MESA_texture_array) { - _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage2D(target)"); - return GL_TRUE; - } - sizeOK = ctx->Driver.TestProxyTexImage(ctx, - GL_PROXY_TEXTURE_1D_ARRAY_EXT, - level, internalFormat, - format, type, - width, height, 1, border); - } - else { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexImage2D(target)" ); - return GL_TRUE; - } - } - else { - _mesa_problem(ctx, "invalid dimensions in copytexture_error_check"); - return GL_TRUE; - } + sizeOK = sizeOK && ctx->Driver.TestProxyTexImage(ctx, proxyTarget, level, + internalFormat, format, + type, width, height, + 1, border); if (!sizeOK) { if (dimensions == 1) { @@ -1914,7 +1934,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions, } if (_mesa_is_compressed_format(ctx, internalFormat)) { - if (!target_can_be_compressed(ctx, target)) { + if (!target_can_be_compressed(ctx, target, internalFormat)) { _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%dD(target)", dimensions); return GL_TRUE; @@ -1973,45 +1993,11 @@ copytexsubimage_error_check1( struct gl_context *ctx, GLuint dimensions, } } - /* Check target */ - if (dimensions == 1) { - if (target != GL_TEXTURE_1D) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexSubImage1D(target)" ); - return GL_TRUE; - } - } - else if (dimensions == 2) { - if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) { - if (!ctx->Extensions.ARB_texture_cube_map) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (target == GL_TEXTURE_RECTANGLE_NV) { - if (!ctx->Extensions.NV_texture_rectangle) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (target == GL_TEXTURE_1D_ARRAY_EXT) { - if (!ctx->Extensions.MESA_texture_array) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (target != GL_TEXTURE_2D) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexSubImage2D(target)" ); - return GL_TRUE; - } - } - else if (dimensions == 3) { - if (((target != GL_TEXTURE_2D_ARRAY_EXT) || - (!ctx->Extensions.MESA_texture_array)) - && (target != GL_TEXTURE_3D)) { - _mesa_error( ctx, GL_INVALID_ENUM, "glCopyTexSubImage3D(target)" ); - return GL_TRUE; - } + /* check target (proxies not allowed) */ + if (!legal_texsubimage_target(ctx, dimensions, target)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexSubImage%uD(target=%s)", + dimensions, _mesa_lookup_enum_by_nr(target)); + return GL_TRUE; } /* Check level */ @@ -2100,11 +2086,6 @@ copytexsubimage_error_check2( struct gl_context *ctx, GLuint dimensions, } if (_mesa_is_format_compressed(teximage->TexFormat)) { - if (!target_can_be_compressed(ctx, target)) { - _mesa_error(ctx, GL_INVALID_ENUM, - "glCopyTexSubImage%dD(target)", dimensions); - return GL_TRUE; - } /* offset must be multiple of 4 */ if ((xoffset & 3) || (yoffset & 3)) { _mesa_error(ctx, GL_INVALID_VALUE, @@ -2337,89 +2318,48 @@ _mesa_choose_texture_format(struct gl_context *ctx, } - -/* - * Called from the API. Note that width includes the border. +/** + * Common code to implement all the glTexImage1D/2D/3D functions. */ -void GLAPIENTRY -_mesa_TexImage1D( GLenum target, GLint level, GLint internalFormat, - GLsizei width, GLint border, GLenum format, - GLenum type, const GLvoid *pixels ) +static void +teximage(struct gl_context *ctx, GLuint dims, + GLenum target, GLint level, GLint internalFormat, + GLsizei width, GLsizei height, GLsizei depth, + GLint border, GLenum format, GLenum type, + const GLvoid *pixels) { - GET_CURRENT_CONTEXT(ctx); + GLboolean error; + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glTexImage1D %s %d %s %d %d %s %s %p\n", + _mesa_debug(ctx, "glTexImage%uD %s %d %s %d %d %d %d %s %s %p\n", + dims, _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), width, border, + _mesa_lookup_enum_by_nr(internalFormat), + width, height, depth, border, _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), pixels); - internalFormat = override_internal_format(internalFormat, width, 1); - - if (target == GL_TEXTURE_1D) { - /* non-proxy target */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - const GLuint face = _mesa_tex_target_to_face(target); - - if (texture_error_check(ctx, 1, target, level, internalFormat, - format, type, width, 1, 1, border)) { - return; /* error was recorded */ - } - - if (ctx->NewState & _MESA_NEW_TRANSFER_STATE) - _mesa_update_state(ctx); - - texObj = _mesa_get_current_tex_object(ctx, target); - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); - } - else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } - - ASSERT(texImage->Data == NULL); - - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, format, - type); - - _mesa_init_teximage_fields(ctx, target, texImage, - width, 1, 1, - border, internalFormat, - texFormat); + internalFormat = override_internal_format(internalFormat, width, height); - /* Give the texture to the driver. <pixels> may be null. */ - ASSERT(ctx->Driver.TexImage1D); - ctx->Driver.TexImage1D(ctx, target, level, internalFormat, - width, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); + /* target error checking */ + if (!legal_teximage_target(ctx, dims, target)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glTexImage%uD(target=%s)", + dims, _mesa_lookup_enum_by_nr(target)); + return; + } - check_gen_mipmap(ctx, target, texObj, level); + /* general error checking */ + error = texture_error_check(ctx, dims, target, level, internalFormat, + format, type, width, height, depth, border); - update_fbo_texture(ctx, texObj, face, level); + if (_mesa_is_proxy_texture(target)) { + /* Proxy texture: just clear or set state depending on error checking */ + struct gl_texture_image *texImage = + _mesa_get_proxy_tex_image(ctx, target, level); - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; - } - } - _mesa_unlock_texture(ctx, texObj); - } - else if (target == GL_PROXY_TEXTURE_1D) { - /* Proxy texture: check for errors and update proxy state */ - struct gl_texture_image *texImage; - texImage = _mesa_get_proxy_tex_image(ctx, target, level); - if (texture_error_check(ctx, 1, target, level, internalFormat, - format, type, width, 1, 1, border)) { + if (error) { /* when error, clear all proxy texture image parameters */ if (texImage) clear_teximage_fields(texImage); @@ -2428,54 +2368,28 @@ _mesa_TexImage1D( GLenum target, GLint level, GLint internalFormat, /* no error, set the tex image parameters */ struct gl_texture_object *texObj = _mesa_get_current_tex_object(ctx, target); - gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, target, - level, + gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, + target, level, internalFormat, format, type); - _mesa_init_teximage_fields(ctx, target, texImage, width, 1, 1, - border, internalFormat, texFormat); + + if (legal_texture_size(ctx, texFormat, width, height, depth)) { + _mesa_init_teximage_fields(ctx, target, texImage, width, height, + depth, border, internalFormat, + texFormat); + } + else if (texImage) { + clear_teximage_fields(texImage); + } } } else { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexImage1D(target)" ); - return; - } -} - - -void GLAPIENTRY -_mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, - GLsizei width, GLsizei height, GLint border, - GLenum format, GLenum type, - const GLvoid *pixels ) -{ - GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glTexImage2D %s %d %s %d %d %d %s %s %p\n", - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), width, height, - border, _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); - - internalFormat = override_internal_format(internalFormat, width, height); - - if (target == GL_TEXTURE_2D || - (ctx->Extensions.ARB_texture_cube_map && - target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) || - (ctx->Extensions.NV_texture_rectangle && - target == GL_TEXTURE_RECTANGLE_NV) || - (ctx->Extensions.MESA_texture_array && - target == GL_TEXTURE_1D_ARRAY_EXT)) { /* non-proxy target */ + const GLuint face = _mesa_tex_target_to_face(target); struct gl_texture_object *texObj; struct gl_texture_image *texImage; - const GLuint face = _mesa_tex_target_to_face(target); - if (texture_error_check(ctx, 2, target, level, internalFormat, - format, type, width, height, 1, border)) { + if (error) { return; /* error was recorded */ } @@ -2483,11 +2397,13 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, _mesa_update_state(ctx); texObj = _mesa_get_current_tex_object(ctx, target); + _mesa_lock_texture(ctx, texObj); { texImage = _mesa_get_tex_image(ctx, texObj, target, level); + if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims); } else { gl_format texFormat; @@ -2497,63 +2413,81 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, } ASSERT(texImage->Data == NULL); - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, internalFormat, format, type); - _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, - border, internalFormat, texFormat); - - /* Give the texture to the driver. <pixels> may be null. */ - ASSERT(ctx->Driver.TexImage2D); - ctx->Driver.TexImage2D(ctx, target, level, internalFormat, - width, height, border, format, type, - pixels, &ctx->Unpack, texObj, texImage); + if (legal_texture_size(ctx, texFormat, width, height, depth)) { + _mesa_init_teximage_fields(ctx, target, texImage, + width, height, depth, + border, internalFormat, texFormat); + + /* Give the texture to the driver. <pixels> may be null. */ + ASSERT(ctx->Driver.TexImage3D); + switch (dims) { + case 1: + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, format, + type, pixels, &ctx->Unpack, texObj, + texImage); + break; + case 2: + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, format, + type, pixels, &ctx->Unpack, texObj, + texImage); + break; + case 3: + ctx->Driver.TexImage3D(ctx, target, level, internalFormat, + width, height, depth, border, format, + type, pixels, &ctx->Unpack, texObj, + texImage); + break; + default: + _mesa_problem(ctx, "invalid dims=%u in teximage()", dims); + } - check_gen_mipmap(ctx, target, texObj, level); + check_gen_mipmap(ctx, target, texObj, level); - update_fbo_texture(ctx, texObj, face, level); + update_fbo_texture(ctx, texObj, face, level); - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; + /* state update */ + texObj->_Complete = GL_FALSE; + ctx->NewState |= _NEW_TEXTURE; + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage%uD", dims); + } } } _mesa_unlock_texture(ctx, texObj); } - else if (target == GL_PROXY_TEXTURE_2D || - (target == GL_PROXY_TEXTURE_CUBE_MAP_ARB && - ctx->Extensions.ARB_texture_cube_map) || - (target == GL_PROXY_TEXTURE_RECTANGLE_NV && - ctx->Extensions.NV_texture_rectangle) || - (ctx->Extensions.MESA_texture_array && - target == GL_PROXY_TEXTURE_1D_ARRAY_EXT)) { - /* Proxy texture: check for errors and update proxy state */ - struct gl_texture_image *texImage; - texImage = _mesa_get_proxy_tex_image(ctx, target, level); - if (texture_error_check(ctx, 2, target, level, internalFormat, - format, type, width, height, 1, border)) { - /* when error, clear all proxy texture image parameters */ - if (texImage) - clear_teximage_fields(texImage); - } - else { - /* no error, set the tex image parameters */ - struct gl_texture_object *texObj = - _mesa_get_current_tex_object(ctx, target); - gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, - target, level, - internalFormat, - format, type); - _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, - border, internalFormat, texFormat); - } - } - else { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexImage2D(target)" ); - return; - } +} + + +/* + * Called from the API. Note that width includes the border. + */ +void GLAPIENTRY +_mesa_TexImage1D( GLenum target, GLint level, GLint internalFormat, + GLsizei width, GLint border, GLenum format, + GLenum type, const GLvoid *pixels ) +{ + GET_CURRENT_CONTEXT(ctx); + teximage(ctx, 1, target, level, internalFormat, width, 1, 1, + border, format, type, pixels); +} + + +void GLAPIENTRY +_mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, + GLsizei width, GLsizei height, GLint border, + GLenum format, GLenum type, + const GLvoid *pixels ) +{ + GET_CURRENT_CONTEXT(ctx); + teximage(ctx, 2, target, level, internalFormat, width, height, 1, + border, format, type, pixels); } @@ -2568,100 +2502,8 @@ _mesa_TexImage3D( GLenum target, GLint level, GLint internalFormat, const GLvoid *pixels ) { GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glTexImage3D %s %d %s %d %d %d %d %s %s %p\n", - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), width, height, - depth, border, _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); - - internalFormat = override_internal_format(internalFormat, width, height); - - if (target == GL_TEXTURE_3D || - (ctx->Extensions.MESA_texture_array && - target == GL_TEXTURE_2D_ARRAY_EXT)) { - /* non-proxy target */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - const GLuint face = _mesa_tex_target_to_face(target); - - if (texture_error_check(ctx, 3, target, level, (GLint) internalFormat, - format, type, width, height, depth, border)) { - return; /* error was recorded */ - } - - if (ctx->NewState & _MESA_NEW_TRANSFER_STATE) - _mesa_update_state(ctx); - - texObj = _mesa_get_current_tex_object(ctx, target); - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); - } - else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } - - ASSERT(texImage->Data == NULL); - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, format, - type); - _mesa_init_teximage_fields(ctx, target, texImage, - width, height, depth, - border, internalFormat, texFormat); - - /* Give the texture to the driver. <pixels> may be null. */ - ASSERT(ctx->Driver.TexImage3D); - ctx->Driver.TexImage3D(ctx, target, level, internalFormat, - width, height, depth, border, format, type, - pixels, &ctx->Unpack, texObj, texImage); - - check_gen_mipmap(ctx, target, texObj, level); - - update_fbo_texture(ctx, texObj, face, level); - - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; - } - } - _mesa_unlock_texture(ctx, texObj); - } - else if (target == GL_PROXY_TEXTURE_3D || - (ctx->Extensions.MESA_texture_array && - target == GL_PROXY_TEXTURE_2D_ARRAY_EXT)) { - /* Proxy texture: check for errors and update proxy state */ - struct gl_texture_image *texImage; - texImage = _mesa_get_proxy_tex_image(ctx, target, level); - if (texture_error_check(ctx, 3, target, level, internalFormat, - format, type, width, height, depth, border)) { - /* when error, clear all proxy texture image parameters */ - if (texImage) - clear_teximage_fields(texImage); - } - else { - /* no error, set the tex image parameters */ - struct gl_texture_object *texObj = - _mesa_get_current_tex_object(ctx, target); - gl_format texFormat = _mesa_choose_texture_format(ctx, texObj, - target, level, - internalFormat, - format, type); - _mesa_init_teximage_fields(ctx, target, texImage, width, height, - depth, border, internalFormat, texFormat); - } - } - else { - _mesa_error( ctx, GL_INVALID_ENUM, "glTexImage3D(target)" ); - return; - } + teximage(ctx, 3, target, level, internalFormat, width, height, depth, + border, format, type, pixels); } @@ -2724,51 +2566,92 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image) #endif -void GLAPIENTRY -_mesa_TexSubImage1D( GLenum target, GLint level, - GLint xoffset, GLsizei width, - GLenum format, GLenum type, - const GLvoid *pixels ) + +/** + * Implement all the glTexSubImage1/2/3D() functions. + */ +static void +texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, const GLvoid *pixels ) { struct gl_texture_object *texObj; struct gl_texture_image *texImage; - GET_CURRENT_CONTEXT(ctx); + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glTexSubImage1D %s %d %d %d %s %s %p\n", + _mesa_debug(ctx, "glTexSubImage%uD %s %d %d %d %d %d %d %d %s %s %p\n", + dims, _mesa_lookup_enum_by_nr(target), level, - xoffset, width, _mesa_lookup_enum_by_nr(format), + xoffset, yoffset, zoffset, width, height, depth, + _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), pixels); + /* check target (proxies not allowed) */ + if (!legal_texsubimage_target(ctx, dims, target)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)", + dims, _mesa_lookup_enum_by_nr(target)); + return; + } + if (ctx->NewState & _MESA_NEW_TRANSFER_STATE) _mesa_update_state(ctx); - if (subtexture_error_check(ctx, 1, target, level, xoffset, 0, 0, - width, 1, 1, format, type)) { + if (subtexture_error_check(ctx, dims, target, level, xoffset, yoffset, zoffset, + width, height, depth, format, type)) { return; /* error was detected */ } - texObj = _mesa_get_current_tex_object(ctx, target); - assert(texObj); _mesa_lock_texture(ctx, texObj); { texImage = _mesa_select_tex_image(ctx, texObj, target, level); - if (subtexture_error_check2(ctx, 1, target, level, xoffset, 0, 0, - width, 1, 1, format, type, texImage)) { + if (subtexture_error_check2(ctx, dims, target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, texImage)) { /* error was recorded */ } - else if (width > 0) { - /* If we have a border, xoffset=-1 is legal. Bias by border width */ - xoffset += texImage->Border; + else if (width > 0 && height > 0 && height > 0) { + /* If we have a border, offset=-1 is legal. Bias by border width. */ + switch (dims) { + case 3: + zoffset += texImage->Border; + /* fall-through */ + case 2: + yoffset += texImage->Border; + /* fall-through */ + case 1: + xoffset += texImage->Border; + } - ASSERT(ctx->Driver.TexSubImage1D); - ctx->Driver.TexSubImage1D(ctx, target, level, xoffset, width, - format, type, pixels, &ctx->Unpack, - texObj, texImage); + switch (dims) { + case 1: + ctx->Driver.TexSubImage1D(ctx, target, level, + xoffset, width, + format, type, pixels, + &ctx->Unpack, texObj, texImage ); + break; + case 2: + ctx->Driver.TexSubImage2D(ctx, target, level, + xoffset, yoffset, width, height, + format, type, pixels, + &ctx->Unpack, texObj, texImage ); + break; + case 3: + ctx->Driver.TexSubImage3D(ctx, target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, + &ctx->Unpack, texObj, texImage ); + break; + default: + _mesa_problem(ctx, "unexpected dims in subteximage()"); + } check_gen_mipmap(ctx, target, texObj, level); @@ -2780,58 +2663,31 @@ _mesa_TexSubImage1D( GLenum target, GLint level, void GLAPIENTRY +_mesa_TexSubImage1D( GLenum target, GLint level, + GLint xoffset, GLsizei width, + GLenum format, GLenum type, + const GLvoid *pixels ) +{ + GET_CURRENT_CONTEXT(ctx); + texsubimage(ctx, 1, target, level, + xoffset, 0, 0, + width, 1, 1, + format, type, pixels); +} + + +void GLAPIENTRY _mesa_TexSubImage2D( GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels ) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glTexSubImage2D %s %d %d %d %d %d %s %s %p\n", - _mesa_lookup_enum_by_nr(target), level, - xoffset, yoffset, width, height, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); - - if (ctx->NewState & _MESA_NEW_TRANSFER_STATE) - _mesa_update_state(ctx); - - if (subtexture_error_check(ctx, 2, target, level, xoffset, yoffset, 0, - width, height, 1, format, type)) { - return; /* error was detected */ - } - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - - if (subtexture_error_check2(ctx, 2, target, level, xoffset, yoffset, 0, - width, height, 1, format, type, texImage)) { - /* error was recorded */ - } - else if (width > 0 && height >= 0) { - /* If we have a border, xoffset=-1 is legal. Bias by border width */ - xoffset += texImage->Border; - yoffset += texImage->Border; - - ASSERT(ctx->Driver.TexSubImage2D); - ctx->Driver.TexSubImage2D(ctx, target, level, xoffset, yoffset, - width, height, format, type, pixels, - &ctx->Unpack, texObj, texImage); - - check_gen_mipmap(ctx, target, texObj, level); - - ctx->NewState |= _NEW_TEXTURE; - } - } - _mesa_unlock_texture(ctx, texObj); + texsubimage(ctx, 2, target, level, + xoffset, yoffset, 0, + width, height, 1, + format, type, pixels); } @@ -2843,84 +2699,41 @@ _mesa_TexSubImage3D( GLenum target, GLint level, GLenum format, GLenum type, const GLvoid *pixels ) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glTexSubImage3D %s %d %d %d %d %d %d %d %s %s %p\n", - _mesa_lookup_enum_by_nr(target), level, - xoffset, yoffset, zoffset, width, height, depth, - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type), pixels); - - if (ctx->NewState & _MESA_NEW_TRANSFER_STATE) - _mesa_update_state(ctx); - - if (subtexture_error_check(ctx, 3, target, level, xoffset, yoffset, zoffset, - width, height, depth, format, type)) { - return; /* error was detected */ - } - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - - if (subtexture_error_check2(ctx, 3, target, level, - xoffset, yoffset, zoffset, - width, height, depth, - format, type, texImage)) { - /* error was recorded */ - } - else if (width > 0 && height > 0 && height > 0) { - /* If we have a border, xoffset=-1 is legal. Bias by border width */ - xoffset += texImage->Border; - yoffset += texImage->Border; - zoffset += texImage->Border; - - ASSERT(ctx->Driver.TexSubImage3D); - ctx->Driver.TexSubImage3D(ctx, target, level, - xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, - &ctx->Unpack, texObj, texImage ); - - check_gen_mipmap(ctx, target, texObj, level); - - ctx->NewState |= _NEW_TEXTURE; - } - } - _mesa_unlock_texture(ctx, texObj); + texsubimage(ctx, 3, target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels); } -void GLAPIENTRY -_mesa_CopyTexImage1D( GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, - GLsizei width, GLint border ) +/** + * Implement the glCopyTexImage1/2D() functions. + */ +static void +copyteximage(struct gl_context *ctx, GLuint dims, + GLenum target, GLint level, GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, GLint border ) { struct gl_texture_object *texObj; struct gl_texture_image *texImage; const GLuint face = _mesa_tex_target_to_face(target); - GET_CURRENT_CONTEXT(ctx); + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCopyTexImage1D %s %d %s %d %d %d %d\n", + _mesa_debug(ctx, "glCopyTexImage%uD %s %d %s %d %d %d %d %d\n", + dims, _mesa_lookup_enum_by_nr(target), level, _mesa_lookup_enum_by_nr(internalFormat), - x, y, width, border); + x, y, width, height, border); if (ctx->NewState & NEW_COPY_TEX_STATE) _mesa_update_state(ctx); - if (copytexture_error_check(ctx, 1, target, level, internalFormat, - width, 1, border)) + if (copytexture_error_check(ctx, dims, target, level, internalFormat, + width, height, border)) return; texObj = _mesa_get_current_tex_object(ctx, target); @@ -2928,8 +2741,9 @@ _mesa_CopyTexImage1D( GLenum target, GLint level, _mesa_lock_texture(ctx, texObj); { texImage = _mesa_get_tex_image(ctx, texObj, target, level); + if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage1D"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); } else { gl_format texFormat; @@ -2944,20 +2758,29 @@ _mesa_CopyTexImage1D( GLenum target, GLint level, internalFormat, GL_NONE, GL_NONE); - _mesa_init_teximage_fields(ctx, target, texImage, width, 1, 1, - border, internalFormat, texFormat); + if (legal_texture_size(ctx, texFormat, width, height, 1)) { + _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, + border, internalFormat, texFormat); - ASSERT(ctx->Driver.CopyTexImage1D); - ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, - x, y, width, border); + ASSERT(ctx->Driver.CopyTexImage2D); + if (dims == 1) + ctx->Driver.CopyTexImage1D(ctx, target, level, internalFormat, + x, y, width, border); + else + ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, + x, y, width, height, border); - check_gen_mipmap(ctx, target, texObj, level); + check_gen_mipmap(ctx, target, texObj, level); - update_fbo_texture(ctx, texObj, face, level); + update_fbo_texture(ctx, texObj, face, level); - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; + /* state update */ + texObj->_Complete = GL_FALSE; + ctx->NewState |= _NEW_TEXTURE; + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims); + } } } _mesa_unlock_texture(ctx, texObj); @@ -2966,92 +2789,52 @@ _mesa_CopyTexImage1D( GLenum target, GLint level, void GLAPIENTRY -_mesa_CopyTexImage2D( GLenum target, GLint level, GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border ) +_mesa_CopyTexImage1D( GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, + GLsizei width, GLint border ) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - const GLuint face = _mesa_tex_target_to_face(target); GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCopyTexImage2D %s %d %s %d %d %d %d %d\n", - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), - x, y, width, height, border); - - if (ctx->NewState & NEW_COPY_TEX_STATE) - _mesa_update_state(ctx); - - if (copytexture_error_check(ctx, 2, target, level, internalFormat, - width, height, border)) - return; - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - - if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage2D"); - } - else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } - - ASSERT(texImage->Data == NULL); - - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); - - _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, - border, internalFormat, texFormat); + copyteximage(ctx, 1, target, level, internalFormat, x, y, width, 1, border); +} - ASSERT(ctx->Driver.CopyTexImage2D); - ctx->Driver.CopyTexImage2D(ctx, target, level, internalFormat, - x, y, width, height, border); - check_gen_mipmap(ctx, target, texObj, level); - update_fbo_texture(ctx, texObj, face, level); - - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; - } - } - _mesa_unlock_texture(ctx, texObj); +void GLAPIENTRY +_mesa_CopyTexImage2D( GLenum target, GLint level, GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border ) +{ + GET_CURRENT_CONTEXT(ctx); + copyteximage(ctx, 2, target, level, internalFormat, + x, y, width, height, border); } -void GLAPIENTRY -_mesa_CopyTexSubImage1D( GLenum target, GLint level, - GLint xoffset, GLint x, GLint y, GLsizei width ) + +/** + * Implementation for glCopyTexSubImage1/2/3D() functions. + */ +static void +copytexsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_texture_object *texObj; struct gl_texture_image *texImage; - GLint yoffset = 0; - GLsizei height = 1; - GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCopyTexSubImage1D %s %d %d %d %d %d\n", + _mesa_debug(ctx, "glCopyTexSubImage%uD %s %d %d %d %d %d %d %d %d\n", + dims, _mesa_lookup_enum_by_nr(target), - level, xoffset, x, y, width); + level, xoffset, yoffset, zoffset, x, y, width, height); if (ctx->NewState & NEW_COPY_TEX_STATE) _mesa_update_state(ctx); - if (copytexsubimage_error_check1(ctx, 1, target, level)) + if (copytexsubimage_error_check1(ctx, dims, target, level)) return; texObj = _mesa_get_current_tex_object(ctx, target); @@ -3060,19 +2843,43 @@ _mesa_CopyTexSubImage1D( GLenum target, GLint level, { texImage = _mesa_select_tex_image(ctx, texObj, target, level); - if (copytexsubimage_error_check2(ctx, 1, target, level, - xoffset, 0, 0, width, 1, texImage)) { - /* error was recorded */ + if (copytexsubimage_error_check2(ctx, dims, target, level, xoffset, yoffset, + zoffset, width, height, texImage)) { + /* error was recored */ } else { - /* If we have a border, xoffset=-1 is legal. Bias by border width */ - xoffset += texImage->Border; + /* If we have a border, offset=-1 is legal. Bias by border width. */ + switch (dims) { + case 3: + zoffset += texImage->Border; + /* fall-through */ + case 2: + yoffset += texImage->Border; + /* fall-through */ + case 1: + xoffset += texImage->Border; + } if (_mesa_clip_copytexsubimage(ctx, &xoffset, &yoffset, &x, &y, &width, &height)) { - ASSERT(ctx->Driver.CopyTexSubImage1D); - ctx->Driver.CopyTexSubImage1D(ctx, target, level, - xoffset, x, y, width); + switch (dims) { + case 1: + ctx->Driver.CopyTexSubImage1D(ctx, target, level, + xoffset, x, y, width); + break; + case 2: + ctx->Driver.CopyTexSubImage2D(ctx, target, level, + xoffset, yoffset, + x, y, width, height); + break; + case 3: + ctx->Driver.CopyTexSubImage3D(ctx, target, level, + xoffset, yoffset, zoffset, + x, y, width, height); + break; + default: + _mesa_problem(ctx, "bad dims in copytexsubimage()"); + } check_gen_mipmap(ctx, target, texObj, level); @@ -3084,57 +2891,24 @@ _mesa_CopyTexSubImage1D( GLenum target, GLint level, } +void GLAPIENTRY +_mesa_CopyTexSubImage1D( GLenum target, GLint level, + GLint xoffset, GLint x, GLint y, GLsizei width ) +{ + GET_CURRENT_CONTEXT(ctx); + copytexsubimage(ctx, 1, target, level, xoffset, 0, 0, x, y, width, 1); +} + + void GLAPIENTRY _mesa_CopyTexSubImage2D( GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height ) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCopyTexSubImage2D %s %d %d %d %d %d %d %d\n", - _mesa_lookup_enum_by_nr(target), - level, xoffset, yoffset, x, y, width, height); - - if (ctx->NewState & NEW_COPY_TEX_STATE) - _mesa_update_state(ctx); - - if (copytexsubimage_error_check1(ctx, 2, target, level)) - return; - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - - if (copytexsubimage_error_check2(ctx, 2, target, level, - xoffset, yoffset, 0, - width, height, texImage)) { - /* error was recorded */ - } - else { - /* If we have a border, xoffset=-1 is legal. Bias by border width */ - xoffset += texImage->Border; - yoffset += texImage->Border; - - if (_mesa_clip_copytexsubimage(ctx, &xoffset, &yoffset, &x, &y, - &width, &height)) { - ASSERT(ctx->Driver.CopyTexSubImage2D); - ctx->Driver.CopyTexSubImage2D(ctx, target, level, xoffset, yoffset, - x, y, width, height); - - check_gen_mipmap(ctx, target, texObj, level); - - ctx->NewState |= _NEW_TEXTURE; - } - } - } - _mesa_unlock_texture(ctx, texObj); + copytexsubimage(ctx, 2, target, level, xoffset, yoffset, 0, x, y, + width, height); } @@ -3144,52 +2918,9 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height ) { - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCopyTexSubImage3D %s %d %d %d %d %d %d %d %d\n", - _mesa_lookup_enum_by_nr(target), - level, xoffset, yoffset, zoffset, x, y, width, height); - - if (ctx->NewState & NEW_COPY_TEX_STATE) - _mesa_update_state(ctx); - - if (copytexsubimage_error_check1(ctx, 3, target, level)) - return; - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - - if (copytexsubimage_error_check2(ctx, 3, target, level, xoffset, yoffset, - zoffset, width, height, texImage)) { - /* error was recored */ - } - else { - /* If we have a border, xoffset=-1 is legal. Bias by border width */ - xoffset += texImage->Border; - yoffset += texImage->Border; - zoffset += texImage->Border; - - if (_mesa_clip_copytexsubimage(ctx, &xoffset, &yoffset, &x, &y, - &width, &height)) { - ASSERT(ctx->Driver.CopyTexSubImage3D); - ctx->Driver.CopyTexSubImage3D(ctx, target, level, - xoffset, yoffset, zoffset, - x, y, width, height); - - check_gen_mipmap(ctx, target, texObj, level); - - ctx->NewState |= _NEW_TEXTURE; - } - } - } - _mesa_unlock_texture(ctx, texObj); + copytexsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, + x, y, width, height); } @@ -3234,45 +2965,18 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize) { - GLint expectedSize, maxLevels = 0, maxTextureSize; + const GLenum proxyTarget = get_proxy_target(target); + const GLint maxLevels = _mesa_max_texture_levels(ctx, target); + GLint expectedSize; - if (dimensions == 1) { - /* 1D compressed textures not allowed */ - return GL_INVALID_ENUM; - } - else if (dimensions == 2) { - if (target == GL_PROXY_TEXTURE_2D) { - maxLevels = ctx->Const.MaxTextureLevels; - } - else if (target == GL_TEXTURE_2D) { - maxLevels = ctx->Const.MaxTextureLevels; - } - else if (target == GL_PROXY_TEXTURE_CUBE_MAP_ARB) { - if (!ctx->Extensions.ARB_texture_cube_map) - return GL_INVALID_ENUM; /*target*/ - maxLevels = ctx->Const.MaxCubeTextureLevels; - } - else if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) { - if (!ctx->Extensions.ARB_texture_cube_map) - return GL_INVALID_ENUM; /*target*/ - maxLevels = ctx->Const.MaxCubeTextureLevels; - } - else { - return GL_INVALID_ENUM; /*target*/ - } - } - else if (dimensions == 3) { - /* 3D compressed textures not allowed */ - return GL_INVALID_ENUM; - } - else { - assert(0); + /* check level */ + if (level < 0 || level >= maxLevels) + return GL_INVALID_VALUE; + + if (!target_can_be_compressed(ctx, target, internalFormat)) { return GL_INVALID_ENUM; } - maxTextureSize = 1 << (maxLevels - 1); - /* This will detect any invalid internalFormat value */ if (!_mesa_is_compressed_format(ctx, internalFormat)) return GL_INVALID_ENUM; @@ -3281,47 +2985,51 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions, if (_mesa_base_tex_format(ctx, internalFormat) < 0) return GL_INVALID_ENUM; + /* No compressed formats support borders at this time */ if (border != 0) return GL_INVALID_VALUE; - /* - * XXX We should probably use the proxy texture error check function here. - */ - if (width < 1 || width > maxTextureSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && !_mesa_is_pow_two(width))) - return GL_INVALID_VALUE; - - if ((height < 1 || height > maxTextureSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && !_mesa_is_pow_two(height))) - && dimensions > 1) - return GL_INVALID_VALUE; - - if ((depth < 1 || depth > maxTextureSize || - (!ctx->Extensions.ARB_texture_non_power_of_two && !_mesa_is_pow_two(depth))) - && dimensions > 2) - return GL_INVALID_VALUE; - /* For cube map, width must equal height */ if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB && width != height) return GL_INVALID_VALUE; - if (level < 0 || level >= maxLevels) - return GL_INVALID_VALUE; + /* check image size against compression block size */ + { + gl_format texFormat = + ctx->Driver.ChooseTextureFormat(ctx, internalFormat, + GL_NONE, GL_NONE); + GLuint bw, bh; - expectedSize = compressed_tex_size(width, height, depth, internalFormat); - if (expectedSize != imageSize) - return GL_INVALID_VALUE; + _mesa_get_format_block_size(texFormat, &bw, &bh); + if ((width > bw && width % bw > 0) || + (height > bh && height % bh > 0)) { + /* + * Per GL_ARB_texture_compression: GL_INVALID_OPERATION is + * generated [...] if any parameter combinations are not + * supported by the specific compressed internal format. + */ + return GL_INVALID_OPERATION; + } + } -#if FEATURE_EXT_texture_sRGB - if ((internalFormat == GL_COMPRESSED_SRGB_S3TC_DXT1_EXT || - internalFormat == GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT || - internalFormat == GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT || - internalFormat == GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT) - && border != 0) { + /* check image sizes */ + if (!ctx->Driver.TestProxyTexImage(ctx, proxyTarget, level, + internalFormat, GL_NONE, GL_NONE, + width, height, depth, border)) { + /* See error comment above */ return GL_INVALID_OPERATION; } -#endif + + /* check image size in bytes */ + expectedSize = compressed_tex_size(width, height, depth, internalFormat); + if (expectedSize != imageSize) { + /* Per GL_ARB_texture_compression: GL_INVALID_VALUE is generated [...] + * if <imageSize> is not consistent with the format, dimensions, and + * contents of the specified image. + */ + return GL_INVALID_VALUE; + } return GL_NO_ERROR; } @@ -3463,159 +3171,101 @@ compressed_subtexture_error_check2(struct gl_context *ctx, GLuint dims, } - -void GLAPIENTRY -_mesa_CompressedTexImage1DARB(GLenum target, GLint level, - GLenum internalFormat, GLsizei width, - GLint border, GLsizei imageSize, - const GLvoid *data) +/** + * Implementation of the glCompressedTexImage1/2/3D() functions. + */ +static void +compressedteximage(struct gl_context *ctx, GLuint dims, + GLenum target, GLint level, + GLenum internalFormat, GLsizei width, + GLsizei height, GLsizei depth, GLint border, + GLsizei imageSize, const GLvoid *data) { - GET_CURRENT_CONTEXT(ctx); + GLenum error; + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCompressedTexImage1DARB %s %d %s %d %d %d %p\n", + _mesa_debug(ctx, + "glCompressedTexImage%uDARB %s %d %s %d %d %d %d %d %p\n", + dims, _mesa_lookup_enum_by_nr(target), level, _mesa_lookup_enum_by_nr(internalFormat), - width, border, imageSize, data); - - if (target == GL_TEXTURE_1D) { - /* non-proxy target */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum error = compressed_texture_error_check(ctx, 1, target, level, - internalFormat, width, 1, 1, border, imageSize); - if (error) { - _mesa_error(ctx, error, "glCompressedTexImage1D"); - return; - } - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage1D"); - } - else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } - ASSERT(texImage->Data == NULL); - - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); + width, height, depth, border, imageSize, data); - _mesa_init_teximage_fields(ctx, target, texImage, width, 1, 1, - border, internalFormat, texFormat); + /* check target */ + if (!legal_teximage_target(ctx, dims, target)) { + _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexImage%uD(target=%s)", + dims, _mesa_lookup_enum_by_nr(target)); + return; + } - ASSERT(ctx->Driver.CompressedTexImage1D); - ctx->Driver.CompressedTexImage1D(ctx, target, level, - internalFormat, width, border, - imageSize, data, - texObj, texImage); + error = compressed_texture_error_check(ctx, dims, target, level, + internalFormat, width, height, depth, + border, imageSize); - check_gen_mipmap(ctx, target, texObj, level); +#if FEATURE_ES + /* XXX this is kind of a hack */ + if (error) { + _mesa_error(ctx, error, "glTexImage2D"); + return; + } - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; - } + if (dims == 2) { + switch (internalFormat) { + case GL_PALETTE4_RGB8_OES: + case GL_PALETTE4_RGBA8_OES: + case GL_PALETTE4_R5_G6_B5_OES: + case GL_PALETTE4_RGBA4_OES: + case GL_PALETTE4_RGB5_A1_OES: + case GL_PALETTE8_RGB8_OES: + case GL_PALETTE8_RGBA8_OES: + case GL_PALETTE8_R5_G6_B5_OES: + case GL_PALETTE8_RGBA4_OES: + case GL_PALETTE8_RGB5_A1_OES: + _mesa_cpal_compressed_teximage2d(target, level, internalFormat, + width, height, imageSize, data); + return; } - _mesa_unlock_texture(ctx, texObj); } - else if (target == GL_PROXY_TEXTURE_1D) { - /* Proxy texture: check for errors and update proxy state */ - GLenum error = compressed_texture_error_check(ctx, 1, target, level, - internalFormat, width, 1, 1, border, imageSize); +#endif + + if (_mesa_is_proxy_texture(target)) { + /* Proxy texture: just check for errors and update proxy state */ + struct gl_texture_image *texImage; + if (!error) { - ASSERT(ctx->Driver.TestProxyTexImage); - error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, - internalFormat, GL_NONE, GL_NONE, - width, 1, 1, border); - } - if (error) { - /* if error, clear all proxy texture image parameters */ - struct gl_texture_image *texImage; - texImage = _mesa_get_proxy_tex_image(ctx, target, level); - if (texImage) - clear_teximage_fields(texImage); + struct gl_texture_object *texObj = + _mesa_get_current_tex_object(ctx, target); + gl_format texFormat = + _mesa_choose_texture_format(ctx, texObj, target, level, + internalFormat, GL_NONE, GL_NONE); + if (!legal_texture_size(ctx, texFormat, width, height, depth)) { + error = GL_OUT_OF_MEMORY; + } } - else { - /* store the teximage parameters */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - gl_format texFormat; - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); - _mesa_init_teximage_fields(ctx, target, texImage, width, 1, 1, - border, internalFormat, texFormat); - } - _mesa_unlock_texture(ctx, texObj); + texImage = _mesa_get_proxy_tex_image(ctx, target, level); + if (texImage) { + if (error) { + /* if error, clear all proxy texture image parameters */ + clear_teximage_fields(texImage); + } + else { + /* no error: store the teximage parameters */ + _mesa_init_teximage_fields(ctx, target, texImage, width, height, + depth, border, internalFormat, + MESA_FORMAT_NONE); + } } } else { - _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexImage1D(target)"); - return; - } -} - -void GLAPIENTRY -_mesa_CompressedTexImage2DARB(GLenum target, GLint level, - GLenum internalFormat, GLsizei width, - GLsizei height, GLint border, GLsizei imageSize, - const GLvoid *data) -{ - GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCompressedTexImage2DARB %s %d %s %d %d %d %d %p\n", - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), - width, height, border, imageSize, data); - -#if FEATURE_ES - switch (internalFormat) { - case GL_PALETTE4_RGB8_OES: - case GL_PALETTE4_RGBA8_OES: - case GL_PALETTE4_R5_G6_B5_OES: - case GL_PALETTE4_RGBA4_OES: - case GL_PALETTE4_RGB5_A1_OES: - case GL_PALETTE8_RGB8_OES: - case GL_PALETTE8_RGBA8_OES: - case GL_PALETTE8_R5_G6_B5_OES: - case GL_PALETTE8_RGBA4_OES: - case GL_PALETTE8_RGB5_A1_OES: - _mesa_cpal_compressed_teximage2d(target, level, internalFormat, - width, height, imageSize, data); - return; - } -#endif - - if (target == GL_TEXTURE_2D || - (ctx->Extensions.ARB_texture_cube_map && - target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && - target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB)) { /* non-proxy target */ struct gl_texture_object *texObj; struct gl_texture_image *texImage; - GLenum error = compressed_texture_error_check(ctx, 2, target, level, - internalFormat, width, height, 1, border, imageSize); if (error) { - _mesa_error(ctx, error, "glCompressedTexImage2D"); + _mesa_error(ctx, error, "glCompressedTexImage%uD", dims); return; } @@ -3625,7 +3275,8 @@ _mesa_CompressedTexImage2DARB(GLenum target, GLint level, { texImage = _mesa_get_tex_image(ctx, texObj, target, level); if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, + "glCompressedTexImage%uD", dims); } else { gl_format texFormat; @@ -3639,67 +3290,78 @@ _mesa_CompressedTexImage2DARB(GLenum target, GLint level, internalFormat, GL_NONE, GL_NONE); - _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, - border, internalFormat, texFormat); - - ASSERT(ctx->Driver.CompressedTexImage2D); - ctx->Driver.CompressedTexImage2D(ctx, target, level, - internalFormat, width, height, - border, imageSize, data, - texObj, texImage); + if (legal_texture_size(ctx, texFormat, width, height, depth)) { + _mesa_init_teximage_fields(ctx, target, texImage, + width, height, depth, + border, internalFormat, texFormat); + + switch (dims) { + case 1: + ASSERT(ctx->Driver.CompressedTexImage1D); + ctx->Driver.CompressedTexImage1D(ctx, target, level, + internalFormat, + width, + border, imageSize, data, + texObj, texImage); + break; + case 2: + ASSERT(ctx->Driver.CompressedTexImage2D); + ctx->Driver.CompressedTexImage2D(ctx, target, level, + internalFormat, + width, height, + border, imageSize, data, + texObj, texImage); + break; + case 3: + ASSERT(ctx->Driver.CompressedTexImage3D); + ctx->Driver.CompressedTexImage3D(ctx, target, level, + internalFormat, + width, height, depth, + border, imageSize, data, + texObj, texImage); + break; + default: + _mesa_problem(ctx, "bad dims in compressedteximage"); + } - check_gen_mipmap(ctx, target, texObj, level); + check_gen_mipmap(ctx, target, texObj, level); - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; + /* state update */ + texObj->_Complete = GL_FALSE; + ctx->NewState |= _NEW_TEXTURE; + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, + "glCompressedTexImage%uD", dims); + } } } _mesa_unlock_texture(ctx, texObj); } - else if (target == GL_PROXY_TEXTURE_2D || - (target == GL_PROXY_TEXTURE_CUBE_MAP_ARB && - ctx->Extensions.ARB_texture_cube_map)) { - /* Proxy texture: check for errors and update proxy state */ - GLenum error = compressed_texture_error_check(ctx, 2, target, level, - internalFormat, width, height, 1, border, imageSize); - if (!error) { - ASSERT(ctx->Driver.TestProxyTexImage); - error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, - internalFormat, GL_NONE, GL_NONE, - width, height, 1, border); - } - if (error) { - /* if error, clear all proxy texture image parameters */ - struct gl_texture_image *texImage; - texImage = _mesa_get_proxy_tex_image(ctx, target, level); - if (texImage) - clear_teximage_fields(texImage); - } - else { - /* store the teximage parameters */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - gl_format texFormat; +} - texObj = _mesa_get_current_tex_object(ctx, target); - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); - _mesa_init_teximage_fields(ctx, target, texImage, width, height, 1, - border, internalFormat, texFormat); - } - _mesa_unlock_texture(ctx, texObj); - } - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexImage2D(target)"); - return; - } +void GLAPIENTRY +_mesa_CompressedTexImage1DARB(GLenum target, GLint level, + GLenum internalFormat, GLsizei width, + GLint border, GLsizei imageSize, + const GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + compressedteximage(ctx, 1, target, level, internalFormat, + width, 1, 1, border, imageSize, data); +} + + +void GLAPIENTRY +_mesa_CompressedTexImage2DARB(GLenum target, GLint level, + GLenum internalFormat, GLsizei width, + GLsizei height, GLint border, GLsizei imageSize, + const GLvoid *data) +{ + GET_CURRENT_CONTEXT(ctx); + compressedteximage(ctx, 2, target, level, internalFormat, + width, height, 1, border, imageSize, data); } @@ -3710,107 +3372,8 @@ _mesa_CompressedTexImage3DARB(GLenum target, GLint level, GLsizei imageSize, const GLvoid *data) { GET_CURRENT_CONTEXT(ctx); - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) - _mesa_debug(ctx, "glCompressedTexImage3DARB %s %d %s %d %d %d %d %d %p\n", - _mesa_lookup_enum_by_nr(target), level, - _mesa_lookup_enum_by_nr(internalFormat), - width, height, depth, border, imageSize, data); - - if (target == GL_TEXTURE_3D) { - /* non-proxy target */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - GLenum error = compressed_texture_error_check(ctx, 3, target, level, - internalFormat, width, height, depth, border, imageSize); - if (error) { - _mesa_error(ctx, error, "glCompressedTexImage3D"); - return; - } - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_get_tex_image(ctx, texObj, target, level); - if (!texImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage3D"); - } - else { - gl_format texFormat; - - if (texImage->Data) { - ctx->Driver.FreeTexImageData( ctx, texImage ); - } - ASSERT(texImage->Data == NULL); - - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); - - _mesa_init_teximage_fields(ctx, target, texImage, - width, height, depth, - border, internalFormat, texFormat); - - ASSERT(ctx->Driver.CompressedTexImage3D); - ctx->Driver.CompressedTexImage3D(ctx, target, level, - internalFormat, - width, height, depth, - border, imageSize, data, - texObj, texImage); - - check_gen_mipmap(ctx, target, texObj, level); - - /* state update */ - texObj->_Complete = GL_FALSE; - ctx->NewState |= _NEW_TEXTURE; - } - } - _mesa_unlock_texture(ctx, texObj); - } - else if (target == GL_PROXY_TEXTURE_3D) { - /* Proxy texture: check for errors and update proxy state */ - GLenum error = compressed_texture_error_check(ctx, 3, target, level, - internalFormat, width, height, depth, border, imageSize); - if (!error) { - ASSERT(ctx->Driver.TestProxyTexImage); - error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, - internalFormat, GL_NONE, GL_NONE, - width, height, depth, border); - } - if (error) { - /* if error, clear all proxy texture image parameters */ - struct gl_texture_image *texImage; - texImage = _mesa_get_proxy_tex_image(ctx, target, level); - if (texImage) - clear_teximage_fields(texImage); - } - else { - /* store the teximage parameters */ - struct gl_texture_object *texObj; - struct gl_texture_image *texImage; - gl_format texFormat; - - texObj = _mesa_get_current_tex_object(ctx, target); - - _mesa_lock_texture(ctx, texObj); - { - texImage = _mesa_select_tex_image(ctx, texObj, target, level); - texFormat = _mesa_choose_texture_format(ctx, texObj, target, level, - internalFormat, GL_NONE, - GL_NONE); - _mesa_init_teximage_fields(ctx, target, texImage, width, height, - depth, border, internalFormat, - texFormat); - } - _mesa_unlock_texture(ctx, texObj); - } - } - else { - _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexImage3D(target)"); - return; - } + compressedteximage(ctx, 3, target, level, internalFormat, + width, height, depth, border, imageSize, data); } diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index 72dbf10cc4b..5bc5639dbf7 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -372,15 +372,12 @@ _mesa_reference_texobj(struct gl_texture_object **ptr, /** - * Report why a texture object is incomplete. - * - * \param t texture object. - * \param why string describing why it's incomplete. - * - * \note For debug purposes only. + * Mark a texture object as incomplete. + * \param t texture object + * \param fmt... string describing why it's incomplete (for debugging). */ static void -incomplete(const struct gl_texture_object *t, const char *fmt, ...) +incomplete(struct gl_texture_object *t, const char *fmt, ...) { #if 0 va_list args; @@ -392,6 +389,7 @@ incomplete(const struct gl_texture_object *t, const char *fmt, ...) printf("Texture Obj %d incomplete because: %s\n", t->Name, s); #endif + t->_Complete = GL_FALSE; } @@ -421,14 +419,12 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, */ if ((baseLevel < 0) || (baseLevel >= MAX_TEXTURE_LEVELS)) { incomplete(t, "base level = %d is invalid", baseLevel); - t->_Complete = GL_FALSE; return; } /* Always need the base level image */ if (!t->Image[0][baseLevel]) { incomplete(t, "Image[baseLevel=%d] == NULL", baseLevel); - t->_Complete = GL_FALSE; return; } @@ -437,7 +433,6 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, t->Image[0][baseLevel]->Height == 0 || t->Image[0][baseLevel]->Depth == 0) { incomplete(t, "texture width = 0"); - t->_Complete = GL_FALSE; return; } @@ -491,7 +486,6 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, if (t->Image[face][baseLevel] == NULL || t->Image[face][baseLevel]->Width2 != w || t->Image[face][baseLevel]->Height2 != h) { - t->_Complete = GL_FALSE; incomplete(t, "Cube face missing or mismatched size"); return; } @@ -508,7 +502,6 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, GLint maxLevel = t->_MaxLevel; if (minLevel > maxLevel) { - t->_Complete = GL_FALSE; incomplete(t, "minLevel > maxLevel"); return; } @@ -517,12 +510,10 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, for (i = minLevel; i <= maxLevel; i++) { if (t->Image[0][i]) { if (t->Image[0][i]->TexFormat != t->Image[0][baseLevel]->TexFormat) { - t->_Complete = GL_FALSE; incomplete(t, "Format[i] != Format[baseLevel]"); return; } if (t->Image[0][i]->Border != t->Image[0][baseLevel]->Border) { - t->_Complete = GL_FALSE; incomplete(t, "Border[i] != Border[baseLevel]"); return; } @@ -540,12 +531,10 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, } if (i >= minLevel && i <= maxLevel) { if (!t->Image[0][i]) { - t->_Complete = GL_FALSE; incomplete(t, "1D Image[0][i] == NULL"); return; } if (t->Image[0][i]->Width2 != width ) { - t->_Complete = GL_FALSE; incomplete(t, "1D Image[0][i] bad width"); return; } @@ -569,17 +558,14 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, } if (i >= minLevel && i <= maxLevel) { if (!t->Image[0][i]) { - t->_Complete = GL_FALSE; incomplete(t, "2D Image[0][i] == NULL"); return; } if (t->Image[0][i]->Width2 != width) { - t->_Complete = GL_FALSE; incomplete(t, "2D Image[0][i] bad width"); return; } if (t->Image[0][i]->Height2 != height) { - t->_Complete = GL_FALSE; incomplete(t, "2D Image[0][i] bad height"); return; } @@ -607,26 +593,21 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, if (i >= minLevel && i <= maxLevel) { if (!t->Image[0][i]) { incomplete(t, "3D Image[0][i] == NULL"); - t->_Complete = GL_FALSE; return; } if (t->Image[0][i]->_BaseFormat == GL_DEPTH_COMPONENT) { - t->_Complete = GL_FALSE; incomplete(t, "GL_DEPTH_COMPONENT only works with 1/2D tex"); return; } if (t->Image[0][i]->Width2 != width) { - t->_Complete = GL_FALSE; incomplete(t, "3D Image[0][i] bad width"); return; } if (t->Image[0][i]->Height2 != height) { - t->_Complete = GL_FALSE; incomplete(t, "3D Image[0][i] bad height"); return; } if (t->Image[0][i]->Depth2 != depth) { - t->_Complete = GL_FALSE; incomplete(t, "3D Image[0][i] bad depth"); return; } @@ -652,20 +633,17 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, for (face = 0; face < 6; face++) { /* check that we have images defined */ if (!t->Image[face][i]) { - t->_Complete = GL_FALSE; incomplete(t, "CubeMap Image[n][i] == NULL"); return; } /* Don't support GL_DEPTH_COMPONENT for cube maps */ if (t->Image[face][i]->_BaseFormat == GL_DEPTH_COMPONENT) { - t->_Complete = GL_FALSE; incomplete(t, "GL_DEPTH_COMPONENT only works with 1/2D tex"); return; } /* check that all six images have same size */ if (t->Image[face][i]->Width2 != width || t->Image[face][i]->Height2 != height) { - t->_Complete = GL_FALSE; incomplete(t, "CubeMap Image[n][i] bad size"); return; } @@ -688,6 +666,44 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx, /** + * Check if the given cube map texture is "cube complete" as defined in + * the OpenGL specification. + */ +GLboolean +_mesa_cube_complete(const struct gl_texture_object *texObj) +{ + const GLint baseLevel = texObj->BaseLevel; + const struct gl_texture_image *img0, *img; + GLuint face; + + if (texObj->Target != GL_TEXTURE_CUBE_MAP) + return GL_FALSE; + + if ((baseLevel < 0) || (baseLevel >= MAX_TEXTURE_LEVELS)) + return GL_FALSE; + + /* check first face */ + img0 = texObj->Image[0][baseLevel]; + if (!img0 || + img0->Width < 1 || + img0->Width != img0->Height) + return GL_FALSE; + + /* check remaining faces vs. first face */ + for (face = 1; face < 6; face++) { + img = texObj->Image[face][baseLevel]; + if (!img || + img->Width != img0->Width || + img->Height != img0->Height || + img->TexFormat != img0->TexFormat) + return GL_FALSE; + } + + return GL_TRUE; +} + + +/** * Mark a texture object dirty. It forces the object to be incomplete * and optionally forces the context to re-validate its state. * diff --git a/src/mesa/main/texobj.h b/src/mesa/main/texobj.h index 821b35caa36..2461b063efd 100644 --- a/src/mesa/main/texobj.h +++ b/src/mesa/main/texobj.h @@ -32,8 +32,9 @@ #define TEXTOBJ_H -#include "mtypes.h" +#include "glheader.h" +struct gl_context; /** * \name Internal functions @@ -68,6 +69,9 @@ extern void _mesa_test_texobj_completeness( const struct gl_context *ctx, struct gl_texture_object *obj ); +extern GLboolean +_mesa_cube_complete(const struct gl_texture_object *texObj); + extern void _mesa_dirty_texobj(struct gl_context *ctx, struct gl_texture_object *texObj, GLboolean invalidate_state); diff --git a/src/mesa/main/texrender.h b/src/mesa/main/texrender.h index 5e68fb03b57..cacd091160e 100644 --- a/src/mesa/main/texrender.h +++ b/src/mesa/main/texrender.h @@ -1,7 +1,9 @@ #ifndef TEXRENDER_H #define TEXRENDER_H -#include "mtypes.h" +struct gl_context; +struct gl_framebuffer; +struct gl_renderbuffer_attachment; extern void _mesa_render_texture(struct gl_context *ctx, diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index 752cd4e201f..b0d5b70f2b7 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -25,8 +25,13 @@ #ifndef TRANSFORM_FEEDBACK_H #define TRANSFORM_FEEDBACK_H -#include "main/mtypes.h" +#include "compiler.h" +#include "glheader.h" +#include "mfeatures.h" +struct _glapi_table; +struct dd_function_table; +struct gl_context; extern void _mesa_init_transform_feedback(struct gl_context *ctx); diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 340c3fe1d39..32bf95e3ed1 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -127,8 +127,8 @@ update_array(struct gl_context *ctx, GLsizei elementSize; GLenum format = GL_RGBA; - if (ctx->API != API_OPENGLES) { - /* fixed point arrays / data is only allowed with OpenGL ES 1.x */ + if (ctx->API != API_OPENGLES && ctx->API != API_OPENGLES2) { + /* fixed point arrays / data is only allowed with OpenGL ES 1.x/2.0 */ legalTypesMask &= ~FIXED_BIT; } @@ -297,7 +297,8 @@ _mesa_TexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid *ptr) { GLbitfield legalTypes = (SHORT_BIT | INT_BIT | - HALF_BIT | FLOAT_BIT | DOUBLE_BIT); + HALF_BIT | FLOAT_BIT | DOUBLE_BIT | + FIXED_BIT); GET_CURRENT_CONTEXT(ctx); const GLuint unit = ctx->Array.ActiveTexture; ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h index fb96478cfc3..af9324134ec 100644 --- a/src/mesa/main/varray.h +++ b/src/mesa/main/varray.h @@ -28,7 +28,11 @@ #define VARRAY_H -#include "mtypes.h" +#include "glheader.h" +#include "mfeatures.h" + +struct gl_client_array; +struct gl_context; #if _HAVE_FULL_GL diff --git a/src/mesa/main/viewport.h b/src/mesa/main/viewport.h index ccfa37588b8..909ff92eee5 100644 --- a/src/mesa/main/viewport.h +++ b/src/mesa/main/viewport.h @@ -28,7 +28,8 @@ #define VIEWPORT_H #include "glheader.h" -#include "mtypes.h" + +struct gl_context; extern void GLAPIENTRY _mesa_Viewport(GLint x, GLint y, GLsizei width, GLsizei height); diff --git a/src/mesa/math/m_debug_clip.c b/src/mesa/math/m_debug_clip.c index e97afafac3c..bbad6ef024b 100644 --- a/src/mesa/math/m_debug_clip.c +++ b/src/mesa/math/m_debug_clip.c @@ -208,6 +208,24 @@ ALIGN16(static GLfloat, d[TEST_COUNT][4]); ALIGN16(static GLfloat, r[TEST_COUNT][4]); +/** + * Check if X, Y or Z component of the coordinate is close to W, in terms + * of the clip test. + */ +static GLboolean +xyz_close_to_w(const GLfloat c[4]) +{ + float k = 0.0001; + return (fabs(c[0] - c[3]) < k || + fabs(c[1] - c[3]) < k || + fabs(c[2] - c[3]) < k || + fabs(-c[0] - c[3]) < k || + fabs(-c[1] - c[3]) < k || + fabs(-c[2] - c[3]) < k); +} + + + static int test_cliptest_function( clip_func func, int np, int psize, long *cycles ) { @@ -281,9 +299,18 @@ static int test_cliptest_function( clip_func func, int np, } for ( i = 0 ; i < TEST_COUNT ; i++ ) { if ( dm[i] != rm[i] ) { + GLfloat *c = source->start; + STRIDE_F(c, source->stride * i); + if (psize == 4 && xyz_close_to_w(c)) { + /* The coordinate is very close to the clip plane. The clipmask + * may vary depending on code path, but that's OK. + */ + continue; + } printf( "\n-----------------------------\n" ); - printf( "(i = %i)\n", i ); - printf( "dm = 0x%02x rm = 0x%02x\n", dm[i], rm[i] ); + printf( "mask[%d] = 0x%02x ref mask[%d] = 0x%02x\n", i, dm[i], i,rm[i] ); + printf(" coord = %f, %f, %f, %f\n", + c[0], c[1], c[2], c[3]); return 0; } } diff --git a/src/mesa/program/arbprogparse.h b/src/mesa/program/arbprogparse.h index 08e25a1c168..4c0c3007205 100644 --- a/src/mesa/program/arbprogparse.h +++ b/src/mesa/program/arbprogparse.h @@ -26,7 +26,11 @@ #ifndef ARBPROGPARSE_H #define ARBPROGPARSE_H -#include "main/mtypes.h" +#include "main/glheader.h" + +struct gl_context; +struct gl_fragment_program; +struct gl_vertex_program; extern void _mesa_parse_arb_vertex_program(struct gl_context *ctx, GLenum target, diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 98da90d359a..b274a961b28 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -65,7 +65,7 @@ static int swizzle_for_size(int size); typedef struct ir_to_mesa_src_reg { ir_to_mesa_src_reg(int file, int index, const glsl_type *type) { - this->file = file; + this->file = (gl_register_file) file; this->index = index; if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) this->swizzle = swizzle_for_size(type->vector_elements); @@ -84,7 +84,7 @@ typedef struct ir_to_mesa_src_reg { this->reladdr = NULL; } - int file; /**< PROGRAM_* from Mesa */ + gl_register_file file; /**< PROGRAM_* from Mesa */ int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ int negate; /**< NEGATE_XYZW mask from mesa */ @@ -133,13 +133,13 @@ public: class variable_storage : public exec_node { public: - variable_storage(ir_variable *var, int file, int index) + variable_storage(ir_variable *var, gl_register_file file, int index) : file(file), index(index), var(var) { /* empty */ } - int file; + gl_register_file file; int index; ir_variable *var; /* variable that maps to this, if any */ }; @@ -2166,9 +2166,14 @@ ir_to_mesa_visitor::visit(ir_discard *ir) { struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; - assert(ir->condition == NULL); /* FINISHME */ + if (ir->condition) { + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + ir_to_mesa_emit_op1(ir, OPCODE_KIL, ir_to_mesa_undef_dst, this->result); + } else { + ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV); + } - ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV); fp->UsesKill = GL_TRUE; } @@ -2239,7 +2244,7 @@ mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg) struct prog_src_register mesa_reg; mesa_reg.File = reg.file; - assert(reg.index < (1 << INST_INDEX_BITS) - 1); + assert(reg.index < (1 << INST_INDEX_BITS)); mesa_reg.Index = reg.index; mesa_reg.Swizzle = reg.swizzle; mesa_reg.RelAddr = reg.reladdr != NULL; @@ -2609,8 +2614,9 @@ set_uniform_initializers(struct gl_context *ctx, /** * Convert a shader's GLSL IR into a Mesa gl_program. */ -struct gl_program * -get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_program, +static struct gl_program * +get_mesa_program(struct gl_context *ctx, + struct gl_shader_program *shader_program, struct gl_shader *shader) { ir_to_mesa_visitor v; @@ -2633,6 +2639,10 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_progra target = GL_FRAGMENT_PROGRAM_ARB; target_string = "fragment"; break; + case GL_GEOMETRY_SHADER: + target = GL_GEOMETRY_PROGRAM_NV; + target_string = "geometry"; + break; default: assert(!"should not be reached"); return NULL; @@ -2756,6 +2766,15 @@ get_mesa_program(struct gl_context *ctx, struct gl_shader_program *shader_progra mesa_inst++; i++; + + if (!shader_program->LinkStatus) + break; + } + + if (!shader_program->LinkStatus) { + free(mesa_instructions); + _mesa_reference_program(ctx, &shader->Program, NULL); + return NULL; } set_branchtargets(&v, mesa_instructions, num_instructions); @@ -2830,8 +2849,9 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) /* Lowering */ do_mat_op_to_vec(ir); - lower_instructions(ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 - | LOG_TO_LOG2); + lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 + | LOG_TO_LOG2 + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; @@ -2839,8 +2859,10 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) progress = lower_quadop_vector(ir, true) || progress; - if (options->EmitNoIfs) + if (options->EmitNoIfs) { + progress = lower_discard(ir) || progress; progress = do_if_to_cond_assign(ir) || progress; + } if (options->EmitNoNoise) progress = lower_noise(ir) || progress; @@ -2866,30 +2888,40 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { struct gl_program *linked_prog; - bool ok = true; if (prog->_LinkedShaders[i] == NULL) continue; linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); - switch (prog->_LinkedShaders[i]->Type) { - case GL_VERTEX_SHADER: - _mesa_reference_vertprog(ctx, &prog->VertexProgram, - (struct gl_vertex_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, - linked_prog); - break; - case GL_FRAGMENT_SHADER: - _mesa_reference_fragprog(ctx, &prog->FragmentProgram, - (struct gl_fragment_program *)linked_prog); - ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, - linked_prog); - break; - } - if (!ok) { - return GL_FALSE; + if (linked_prog) { + bool ok = true; + + switch (prog->_LinkedShaders[i]->Type) { + case GL_VERTEX_SHADER: + _mesa_reference_vertprog(ctx, &prog->VertexProgram, + (struct gl_vertex_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, + linked_prog); + break; + case GL_FRAGMENT_SHADER: + _mesa_reference_fragprog(ctx, &prog->FragmentProgram, + (struct gl_fragment_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, + linked_prog); + break; + case GL_GEOMETRY_SHADER: + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, + (struct gl_geometry_program *)linked_prog); + ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV, + linked_prog); + break; + } + if (!ok) { + return GL_FALSE; + } } + _mesa_reference_program(ctx, &linked_prog, NULL); } @@ -3007,6 +3039,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) prog->Varying = _mesa_new_parameter_list(); _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL); _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL); + _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL); if (prog->LinkStatus) { link_shaders(ctx, prog); diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index ca90de7ce1c..a383828e344 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -247,7 +247,7 @@ typedef enum prog_opcode { * Number of bits for the src/dst register Index field. * This limits the size of temp/uniform register files. */ -#define INST_INDEX_BITS 10 +#define INST_INDEX_BITS 11 /** diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index 79c01020eb2..abebf392c0a 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -42,8 +42,8 @@ /** * Return string name for given program/register file. */ -static const char * -file_string(gl_register_file f, gl_prog_print_mode mode) +const char * +_mesa_register_file_name(gl_register_file f) { switch (f) { case PROGRAM_TEMPORARY: @@ -275,7 +275,8 @@ reg_string(gl_register_file f, GLint index, gl_prog_print_mode mode, switch (mode) { case PROG_PRINT_DEBUG: - sprintf(str, "%s[%s%d]", file_string(f, mode), addr, index); + sprintf(str, "%s[%s%d]", + _mesa_register_file_name(f), addr, index); if (hasIndex2) { int offset = strlen(str); const char *addr2 = relAddr2 ? "ADDR+" : ""; @@ -497,7 +498,7 @@ fprint_dst_reg(FILE * f, #if 0 fprintf(f, "%s[%d]%s", - file_string((gl_register_file) dstReg->File, mode), + _mesa_register_file_name((gl_register_file) dstReg->File), dstReg->Index, _mesa_writemask_string(dstReg->WriteMask)); #endif @@ -522,7 +523,7 @@ fprint_src_reg(FILE *f, abs); #if 0 fprintf(f, "%s[%d]%s", - file_string((gl_register_file) srcReg->File, mode), + _mesa_register_file_name((gl_register_file) srcReg->File), srcReg->Index, _mesa_swizzle_string(srcReg->Swizzle, srcReg->Negate, GL_FALSE)); @@ -615,8 +616,7 @@ _mesa_fprint_instruction_opt(FILE *f, if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { fprintf(f, ", "); fprintf(f, "%s[%d]%s", - file_string((gl_register_file) inst->SrcReg[0].File, - mode), + _mesa_register_file_name((gl_register_file) inst->SrcReg[0].File), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, inst->SrcReg[0].Negate, GL_FALSE)); @@ -632,8 +632,7 @@ _mesa_fprint_instruction_opt(FILE *f, fprintf(f, " "); fprint_dst_reg(f, &inst->DstReg, mode, prog); fprintf(f, ", %s[%d], %s", - file_string((gl_register_file) inst->SrcReg[0].File, - mode), + _mesa_register_file_name((gl_register_file) inst->SrcReg[0].File), inst->SrcReg[0].Index, _mesa_swizzle_string(inst->SrcReg[0].Swizzle, inst->SrcReg[0].Negate, GL_TRUE)); @@ -964,7 +963,6 @@ static void _mesa_fprint_parameter_list(FILE *f, const struct gl_program_parameter_list *list) { - const gl_prog_print_mode mode = PROG_PRINT_DEBUG; GLuint i; if (!list) @@ -978,7 +976,7 @@ _mesa_fprint_parameter_list(FILE *f, const GLfloat *v = list->ParameterValues[i]; fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}", i, param->Size, - file_string(list->Parameters[i].Type, mode), + _mesa_register_file_name(list->Parameters[i].Type), param->Name, v[0], v[1], v[2], v[3]); if (param->Flags & PROG_PARAM_BIT_CENTROID) fprintf(f, " Centroid"); diff --git a/src/mesa/program/prog_print.h b/src/mesa/program/prog_print.h index f080b3fd2e6..d962087db38 100644 --- a/src/mesa/program/prog_print.h +++ b/src/mesa/program/prog_print.h @@ -47,6 +47,9 @@ typedef enum { } gl_prog_print_mode; +extern const char * +_mesa_register_file_name(gl_register_file f); + extern void _mesa_print_vp_inputs(GLbitfield inputs); diff --git a/src/mesa/program/prog_statevars.c b/src/mesa/program/prog_statevars.c index baac29ff0dd..c310acb01d4 100644 --- a/src/mesa/program/prog_statevars.c +++ b/src/mesa/program/prog_statevars.c @@ -572,6 +572,24 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[], value[3] = 0.0F; return; + case STATE_FB_WPOS_Y_TRANSFORM: + /* A driver may negate this conditional by using ZW swizzle + * instead of XY (based on e.g. some other state). */ + if (ctx->DrawBuffer->Name != 0) { + /* Identity (XY) followed by flipping Y upside down (ZW). */ + value[0] = 1.0F; + value[1] = 0.0F; + value[2] = -1.0F; + value[3] = (GLfloat) (ctx->DrawBuffer->Height - 1); + } else { + /* Flipping Y upside down (XY) followed by identity (ZW). */ + value[0] = -1.0F; + value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[2] = 1.0F; + value[3] = 0.0F; + } + return; + case STATE_ROT_MATRIX_0: { const int unit = (int) state[2]; @@ -695,6 +713,7 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) return _NEW_PIXEL; case STATE_FB_SIZE: + case STATE_FB_WPOS_Y_TRANSFORM: return _NEW_BUFFERS; default: @@ -900,6 +919,9 @@ append_token(char *dst, gl_state_index k) case STATE_FB_SIZE: append(dst, "FbSize"); break; + case STATE_FB_WPOS_Y_TRANSFORM: + append(dst, "FbWposYTransform"); + break; case STATE_ROT_MATRIX_0: append(dst, "rotMatrixRow0"); break; @@ -1046,7 +1068,9 @@ _mesa_program_state_string(const gl_state_index state[STATE_LENGTH]) * Loop over all the parameters in a parameter list. If the parameter * is a GL state reference, look up the current value of that state * variable and put it into the parameter's Value[4] array. - * This would be called at glBegin time when using a fragment program. + * Other parameter types never change or are explicitly set by the user + * with glUniform() or glProgramParameter(), etc. + * This would be called at glBegin time. */ void _mesa_load_state_parameters(struct gl_context *ctx, @@ -1057,12 +1081,10 @@ _mesa_load_state_parameters(struct gl_context *ctx, if (!paramList) return; - /*assert(ctx->Driver.NeedFlush == 0);*/ - for (i = 0; i < paramList->NumParameters; i++) { if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { _mesa_fetch_state(ctx, - (gl_state_index *) paramList->Parameters[i].StateIndexes, + paramList->Parameters[i].StateIndexes, paramList->ParameterValues[i]); } } diff --git a/src/mesa/program/prog_statevars.h b/src/mesa/program/prog_statevars.h index 6e5be53630c..009ebde0012 100644 --- a/src/mesa/program/prog_statevars.h +++ b/src/mesa/program/prog_statevars.h @@ -117,6 +117,7 @@ typedef enum gl_state_index_ { STATE_PT_BIAS, /**< Pixel transfer RGBA bias */ STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */ STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */ + STATE_FB_WPOS_Y_TRANSFORM, /**< (1, 0, -1, height-1) if a FBO is bound, (-1, height-1, 1, 0) otherwise */ STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */ STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 4cacde9aed1..9ffa49bb013 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -917,6 +917,103 @@ _mesa_find_free_register(const GLboolean used[], } + +/** + * Check if the given register index is valid (doesn't exceed implementation- + * dependent limits). + * \return GL_TRUE if OK, GL_FALSE if bad index + */ +GLboolean +_mesa_valid_register_index(const struct gl_context *ctx, + gl_shader_type shaderType, + gl_register_file file, GLint index) +{ + const struct gl_program_constants *c; + + switch (shaderType) { + case MESA_SHADER_VERTEX: + c = &ctx->Const.VertexProgram; + break; + case MESA_SHADER_FRAGMENT: + c = &ctx->Const.FragmentProgram; + break; + case MESA_SHADER_GEOMETRY: + c = &ctx->Const.GeometryProgram; + break; + default: + _mesa_problem(ctx, + "unexpected shader type in _mesa_valid_register_index()"); + return GL_FALSE; + } + + switch (file) { + case PROGRAM_UNDEFINED: + return GL_TRUE; /* XXX or maybe false? */ + + case PROGRAM_TEMPORARY: + return index >= 0 && index < c->MaxTemps; + + case PROGRAM_ENV_PARAM: + return index >= 0 && index < c->MaxEnvParams; + + case PROGRAM_LOCAL_PARAM: + return index >= 0 && index < c->MaxLocalParams; + + case PROGRAM_NAMED_PARAM: + return index >= 0 && index < c->MaxParameters; + + case PROGRAM_UNIFORM: + case PROGRAM_STATE_VAR: + /* aka constant buffer */ + return index >= 0 && index < c->MaxUniformComponents / 4; + + case PROGRAM_CONSTANT: + /* constant buffer w/ possible relative negative addressing */ + return (index > (int) c->MaxUniformComponents / -4 && + index < c->MaxUniformComponents / 4); + + case PROGRAM_INPUT: + if (index < 0) + return GL_FALSE; + + switch (shaderType) { + case MESA_SHADER_VERTEX: + return index < VERT_ATTRIB_GENERIC0 + c->MaxAttribs; + case MESA_SHADER_FRAGMENT: + return index < FRAG_ATTRIB_VAR0 + ctx->Const.MaxVarying; + case MESA_SHADER_GEOMETRY: + return index < GEOM_ATTRIB_VAR0 + ctx->Const.MaxVarying; + default: + return GL_FALSE; + } + + case PROGRAM_OUTPUT: + if (index < 0) + return GL_FALSE; + + switch (shaderType) { + case MESA_SHADER_VERTEX: + return index < VERT_RESULT_VAR0 + ctx->Const.MaxVarying; + case MESA_SHADER_FRAGMENT: + return index < FRAG_RESULT_DATA0 + ctx->Const.MaxDrawBuffers; + case MESA_SHADER_GEOMETRY: + return index < GEOM_RESULT_VAR0 + ctx->Const.MaxVarying; + default: + return GL_FALSE; + } + + case PROGRAM_ADDRESS: + return index >= 0 && index < c->MaxAddressRegs; + + default: + _mesa_problem(ctx, + "unexpected register file in _mesa_valid_register_index()"); + return GL_FALSE; + } +} + + + /** * "Post-process" a GPU program. This is intended to be used for debugging. * Example actions include no-op'ing instructions or changing instruction diff --git a/src/mesa/program/program.h b/src/mesa/program/program.h index 70cc2c3aaed..ce37b95bf82 100644 --- a/src/mesa/program/program.h +++ b/src/mesa/program/program.h @@ -165,6 +165,12 @@ extern GLint _mesa_find_free_register(const GLboolean used[], GLuint maxRegs, GLuint firstReg); + +extern GLboolean +_mesa_valid_register_index(const struct gl_context *ctx, + gl_shader_type shaderType, + gl_register_file file, GLint index); + extern void _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog); diff --git a/src/mesa/program/symbol_table.c b/src/mesa/program/symbol_table.c index 09e7cb44ef3..004f1f8fa7b 100644 --- a/src/mesa/program/symbol_table.c +++ b/src/mesa/program/symbol_table.c @@ -336,12 +336,12 @@ _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table, check_symbol_table(table); if (hdr == NULL) { - hdr = calloc(1, sizeof(*hdr)); - hdr->name = strdup(name); + hdr = calloc(1, sizeof(*hdr)); + hdr->name = strdup(name); - hash_table_insert(table->ht, hdr, hdr->name); - hdr->next = table->hdr; - table->hdr = hdr; + hash_table_insert(table->ht, hdr, hdr->name); + hdr->next = table->hdr; + table->hdr = hdr; } check_symbol_table(table); @@ -376,6 +376,81 @@ _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *table, } +int +_mesa_symbol_table_add_global_symbol(struct _mesa_symbol_table *table, + int name_space, const char *name, + void *declaration) +{ + struct symbol_header *hdr; + struct symbol *sym; + struct symbol *curr; + struct scope_level *top_scope; + + check_symbol_table(table); + + hdr = find_symbol(table, name); + + check_symbol_table(table); + + if (hdr == NULL) { + hdr = calloc(1, sizeof(*hdr)); + hdr->name = strdup(name); + + hash_table_insert(table->ht, hdr, hdr->name); + hdr->next = table->hdr; + table->hdr = hdr; + } + + check_symbol_table(table); + + /* If the symbol already exists in this namespace at this scope, it cannot + * be added to the table. + */ + for (sym = hdr->symbols + ; (sym != NULL) && (sym->name_space != name_space) + ; sym = sym->next_with_same_name) { + /* empty */ + } + + if (sym && sym->depth == 0) + return -1; + + /* Find the top-level scope */ + for (top_scope = table->current_scope + ; top_scope->next != NULL + ; top_scope = top_scope->next) { + /* empty */ + } + + sym = calloc(1, sizeof(*sym)); + sym->next_with_same_scope = top_scope->symbols; + sym->hdr = hdr; + sym->name_space = name_space; + sym->data = declaration; + + assert(sym->hdr == hdr); + + /* Since next_with_same_name is ordered by scope, we need to append the + * new symbol to the _end_ of the list. + */ + if (hdr->symbols == NULL) { + hdr->symbols = sym; + } else { + for (curr = hdr->symbols + ; curr->next_with_same_name != NULL + ; curr = curr->next_with_same_name) { + /* empty */ + } + curr->next_with_same_name = sym; + } + top_scope->symbols = sym; + + check_symbol_table(table); + return 0; +} + + + struct _mesa_symbol_table * _mesa_symbol_table_ctor(void) { diff --git a/src/mesa/program/symbol_table.h b/src/mesa/program/symbol_table.h index 1d570fc1a09..f9d91649bbc 100644 --- a/src/mesa/program/symbol_table.h +++ b/src/mesa/program/symbol_table.h @@ -33,6 +33,10 @@ extern void _mesa_symbol_table_pop_scope(struct _mesa_symbol_table *table); extern int _mesa_symbol_table_add_symbol(struct _mesa_symbol_table *symtab, int name_space, const char *name, void *declaration); +extern int _mesa_symbol_table_add_global_symbol( + struct _mesa_symbol_table *symtab, int name_space, const char *name, + void *declaration); + extern int _mesa_symbol_table_symbol_scope(struct _mesa_symbol_table *table, int name_space, const char *name); diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 8d1dc792bc8..f1d08a3e166 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -66,6 +66,11 @@ void st_upload_constants( struct st_context *st, if (params && params->NumParameters) { const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; + /* Update the constants which come from fixed-function state, such as + * transformation matrices, fog factors, etc. The rest of the values in + * the parameters list are explicitly set by the user with glUniform, + * glProgramParameter(), etc. + */ _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 036bc60049a..2843b7b1764 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -51,7 +51,7 @@ static void update_renderbuffer_surface(struct st_context *st, struct st_renderbuffer *strb) { - struct pipe_screen *screen = st->pipe->screen; + struct pipe_context *pipe = st->pipe; struct pipe_resource *resource = strb->rtt->pt; int rtt_width = strb->Base.Width; int rtt_height = strb->Base.Height; @@ -65,15 +65,19 @@ update_renderbuffer_surface(struct st_context *st, for (level = 0; level <= resource->last_level; level++) { if (u_minify(resource->width0, level) == rtt_width && u_minify(resource->height0, level) == rtt_height) { + struct pipe_surface surf_tmpl; + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = resource->format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = level; + surf_tmpl.u.tex.first_layer = strb->rtt_face + strb->rtt_slice; + surf_tmpl.u.tex.last_layer = strb->rtt_face + strb->rtt_slice; pipe_surface_reference(&strb->surface, NULL); - strb->surface = screen->get_tex_surface(screen, - resource, - strb->rtt_face, - level, - strb->rtt_slice, - PIPE_BIND_RENDER_TARGET); + strb->surface = pipe->create_surface(pipe, + resource, + &surf_tmpl); #if 0 printf("-- alloc new surface %d x %d into tex %p\n", strb->surface->width, strb->surface->height, diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 6be03376d01..378b30e57cc 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -122,8 +122,8 @@ load_color_map_texture(struct gl_context *ctx, struct pipe_resource *pt) uint i, j; transfer = pipe_get_transfer(st_context(ctx)->pipe, - pt, 0, 0, 0, PIPE_TRANSFER_WRITE, - 0, 0, texSize, texSize); + pt, 0, 0, PIPE_TRANSFER_WRITE, + 0, 0, texSize, texSize); dest = (uint *) pipe_transfer_map(pipe, transfer); /* Pack four 1D maps into a 2D texture: diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index f147d768084..b67068df373 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -121,6 +121,18 @@ static void xlate_border_color(const GLfloat *colorIn, GLenum baseFormat, GLfloat *colorOut) { switch (baseFormat) { + case GL_RED: + colorOut[0] = colorIn[0]; + colorOut[1] = 0.0F; + colorOut[2] = 0.0F; + colorOut[3] = 1.0F; + break; + case GL_RG: + colorOut[0] = colorIn[0]; + colorOut[1] = colorIn[1]; + colorOut[2] = 0.0F; + colorOut[3] = 1.0F; + break; case GL_RGB: colorOut[0] = colorIn[0]; colorOut[1] = colorIn[1]; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 6c5caf42e35..a76ae92dc3d 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -138,10 +138,10 @@ accum_accum(struct st_context *st, GLfloat value, debug_printf("%s: fallback processing\n", __FUNCTION__); color_trans = pipe_get_transfer(st->pipe, - color_strb->texture, - 0, 0, 0, - PIPE_TRANSFER_READ, xpos, ypos, - width, height); + color_strb->texture, + 0, 0, + PIPE_TRANSFER_READ, xpos, ypos, + width, height); buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); @@ -187,9 +187,9 @@ accum_load(struct st_context *st, GLfloat value, debug_printf("%s: fallback processing\n", __FUNCTION__); color_trans = pipe_get_transfer(st->pipe, color_strb->texture, - 0, 0, 0, - PIPE_TRANSFER_READ, xpos, ypos, - width, height); + 0, 0, + PIPE_TRANSFER_READ, xpos, ypos, + width, height); buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); @@ -241,12 +241,12 @@ accum_return(struct gl_context *ctx, GLfloat value, usage = PIPE_TRANSFER_READ_WRITE; else usage = PIPE_TRANSFER_WRITE; - + color_trans = pipe_get_transfer(st_context(ctx)->pipe, - color_strb->texture, 0, 0, 0, - usage, - xpos, ypos, - width, height); + color_strb->texture, 0, 0, + usage, + xpos, ypos, + width, height); if (usage & PIPE_TRANSFER_READ) pipe_get_tile_rgba(pipe, color_trans, 0, 0, width, height, buf); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 3c0ee6c2883..f08697fe23b 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -283,9 +283,9 @@ make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height, return NULL; } - transfer = pipe_get_transfer(st->pipe, pt, 0, 0, 0, - PIPE_TRANSFER_WRITE, - 0, 0, width, height); + transfer = pipe_get_transfer(st->pipe, pt, 0, 0, + PIPE_TRANSFER_WRITE, + 0, 0, width, height); dest = pipe_transfer_map(pipe, transfer); @@ -585,10 +585,10 @@ create_cache_trans(struct st_context *st) /* Map the texture transfer. * Subsequent glBitmap calls will write into the texture image. */ - cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, - BITMAP_CACHE_WIDTH, - BITMAP_CACHE_HEIGHT); + cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, + BITMAP_CACHE_WIDTH, + BITMAP_CACHE_HEIGHT); cache->buffer = pipe_transfer_map(pipe, cache->trans); /* init image to all 0xff */ diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index af41835326a..06cee520b37 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -115,17 +115,14 @@ st_BlitFramebuffer(struct gl_context *ctx, st_texture_object(srcAtt->Texture); struct st_renderbuffer *dstRb = st_renderbuffer(drawFB->_ColorDrawBuffers[0]); - struct pipe_subresource srcSub; struct pipe_surface *dstSurf = dstRb->surface; if (!srcObj->pt) return; - srcSub.face = srcAtt->CubeMapFace; - srcSub.level = srcAtt->TextureLevel; - - util_blit_pixels(st->blit, srcObj->pt, srcSub, - srcX0, srcY0, srcX1, srcY1, srcAtt->Zoffset, + util_blit_pixels(st->blit, srcObj->pt, srcAtt->TextureLevel, + srcX0, srcY0, srcX1, srcY1, + srcAtt->Zoffset + srcAtt->CubeMapFace, dstSurf, dstX0, dstY0, dstX1, dstY1, 0.0, pFilter); } @@ -136,14 +133,11 @@ st_BlitFramebuffer(struct gl_context *ctx, st_renderbuffer(drawFB->_ColorDrawBuffers[0]); struct pipe_surface *srcSurf = srcRb->surface; struct pipe_surface *dstSurf = dstRb->surface; - struct pipe_subresource srcSub; - - srcSub.face = srcSurf->face; - srcSub.level = srcSurf->level; util_blit_pixels(st->blit, - srcRb->texture, srcSub, srcX0, srcY0, srcX1, srcY1, - srcSurf->zslice, + srcRb->texture, srcSurf->u.tex.level, + srcX0, srcY0, srcX1, srcY1, + srcSurf->u.tex.first_layer, dstSurf, dstX0, dstY0, dstX1, dstY1, 0.0, pFilter); } @@ -176,11 +170,11 @@ st_BlitFramebuffer(struct gl_context *ctx, /* Blitting depth and stencil values between combined * depth/stencil buffers. This is the ideal case for such buffers. */ - util_blit_pixels(st->blit, srcDepthRb->texture, - u_subresource(srcDepthRb->surface->face, - srcDepthRb->surface->level), + util_blit_pixels(st->blit, + srcDepthRb->texture, + srcDepthRb->surface->u.tex.level, srcX0, srcY0, srcX1, srcY1, - srcDepthRb->surface->zslice, + srcDepthRb->surface->u.tex.first_layer, dstDepthSurf, dstX0, dstY0, dstX1, dstY1, 0.0, pFilter); } @@ -189,10 +183,9 @@ st_BlitFramebuffer(struct gl_context *ctx, if (mask & GL_DEPTH_BUFFER_BIT) { util_blit_pixels(st->blit, srcDepthRb->texture, - u_subresource(srcDepthRb->surface->face, - srcDepthRb->surface->level), + srcDepthRb->surface->u.tex.level, srcX0, srcY0, srcX1, srcY1, - srcDepthRb->surface->zslice, + srcDepthRb->surface->u.tex.first_layer, dstDepthSurf, dstX0, dstY0, dstX1, dstY1, 0.0, pFilter); } diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index 27540c36ce7..8b60f9040d0 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -211,6 +211,13 @@ st_bufferobj_data(struct gl_context *ctx, /** + * Dummy data whose's pointer is used for zero size buffers or ranges. + */ +static long st_bufferobj_zero_length = 0; + + + +/** * Called via glMapBufferARB(). */ static void * @@ -233,10 +240,16 @@ st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, break; } - obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, - st_obj->buffer, - flags, - &st_obj->transfer); + /* Handle zero-size buffers here rather than in drivers */ + if (obj->Size == 0) { + obj->Pointer = &st_bufferobj_zero_length; + } + else { + obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, + st_obj->buffer, + flags, + &st_obj->transfer); + } if (obj->Pointer) { obj->Offset = 0; @@ -247,13 +260,6 @@ st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, /** - * Dummy data whose's pointer is used for zero length ranges. - */ -static long -st_bufferobj_zero_length_range = 0; - - -/** * Called via glMapBufferRange(). */ static void * @@ -273,6 +279,12 @@ st_bufferobj_map_range(struct gl_context *ctx, GLenum target, if (access & GL_MAP_FLUSH_EXPLICIT_BIT) flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; + + if (access & GL_MAP_INVALIDATE_RANGE_BIT) + flags |= PIPE_TRANSFER_DISCARD; + + if (access & GL_MAP_INVALIDATE_BUFFER_BIT) + flags |= PIPE_TRANSFER_DISCARD; if (access & GL_MAP_UNSYNCHRONIZED_BIT) flags |= PIPE_TRANSFER_UNSYNCHRONIZED; @@ -293,7 +305,7 @@ st_bufferobj_map_range(struct gl_context *ctx, GLenum target, * length range from the pipe driver. */ if (!length) { - obj->Pointer = &st_bufferobj_zero_length_range; + obj->Pointer = &st_bufferobj_zero_length; } else { obj->Pointer = pipe_buffer_map_range(pipe, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d80c068ea81..c9786024575 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -427,9 +427,9 @@ make_texture(struct st_context *st, /* we'll do pixel transfer in a fragment shader */ ctx->_ImageTransferState = 0x0; - transfer = pipe_get_transfer(st->pipe, pt, 0, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, - width, height); + transfer = pipe_get_transfer(st->pipe, pt, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, + width, height); /* map texture transfer */ dest = pipe_transfer_map(pipe, transfer); @@ -763,9 +763,9 @@ draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, else usage = PIPE_TRANSFER_WRITE; - pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0, 0, - usage, x, y, - width, height); + pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0, + usage, x, y, + width, height); stmap = pipe_transfer_map(pipe, pt); @@ -1025,15 +1025,15 @@ copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, usage = PIPE_TRANSFER_READ_WRITE; else usage = PIPE_TRANSFER_WRITE; - + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { dsty = rbDraw->Base.Height - dsty - height; } ptDraw = pipe_get_transfer(st_context(ctx)->pipe, - rbDraw->texture, 0, 0, 0, - usage, dstx, dsty, - width, height); + rbDraw->texture, 0, 0, + usage, dstx, dsty, + width, height); assert(util_format_get_blockwidth(ptDraw->resource->format) == 1); assert(util_format_get_blockheight(ptDraw->resource->format) == 1); @@ -1209,27 +1209,24 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, /* Make temporary texture which is a copy of the src region. */ if (srcFormat == texFormat) { - struct pipe_subresource srcsub, dstsub; - srcsub.face = 0; - srcsub.level = 0; - dstsub.face = 0; - dstsub.level = 0; - /* copy source framebuffer surface into mipmap/texture */ + struct pipe_box src_box; + u_box_2d(readX, readY, readW, readH, &src_box); + /* copy source framebuffer surface into mipmap/texture */ pipe->resource_copy_region(pipe, pt, /* dest tex */ - dstsub, + 0, pack.SkipPixels, pack.SkipRows, 0, /* dest pos */ rbRead->texture, /* src tex */ - srcsub, - readX, readY, 0, readW, readH); /* src region */ + 0, + &src_box); } else { /* CPU-based fallback/conversion */ struct pipe_transfer *ptRead = - pipe_get_transfer(st->pipe, rbRead->texture, 0, 0, 0, - PIPE_TRANSFER_READ, - readX, readY, readW, readH); + pipe_get_transfer(st->pipe, rbRead->texture, 0, 0, + PIPE_TRANSFER_READ, + readX, readY, readW, readH); struct pipe_transfer *ptTex; enum pipe_transfer_usage transfer_usage; @@ -1241,8 +1238,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, else transfer_usage = PIPE_TRANSFER_WRITE; - ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, 0, transfer_usage, - 0, 0, width, height); + ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage, + 0, 0, width, height); /* copy image from ptRead surface to ptTex surface */ if (type == GL_COLOR) { diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 9425f07aee6..cd718a31a14 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -52,6 +52,7 @@ #include "util/u_format.h" #include "util/u_inlines.h" +#include "util/u_surface.h" /** @@ -65,9 +66,11 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *r GLuint width, GLuint height) { struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = st->pipe->screen; struct st_renderbuffer *strb = st_renderbuffer(rb); enum pipe_format format; + struct pipe_surface surf_tmpl; if (strb->format != PIPE_FORMAT_NONE) format = strb->format; @@ -113,6 +116,7 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *r template.width0 = width; template.height0 = height; template.depth0 = 1; + template.array_size = 1; template.last_level = 0; template.nr_samples = rb->NumSamples; if (util_format_is_depth_or_stencil(format)) { @@ -120,7 +124,7 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *r } else { template.bind = (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_RENDER_TARGET); + PIPE_BIND_RENDER_TARGET); } strb->texture = screen->resource_create(screen, &template); @@ -128,10 +132,11 @@ st_renderbuffer_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *r if (!strb->texture) return FALSE; - strb->surface = screen->get_tex_surface(screen, - strb->texture, - 0, 0, 0, - template.bind); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, strb->texture, template.bind); + strb->surface = pipe->create_surface(pipe, + strb->texture, + &surf_tmpl); if (strb->surface) { assert(strb->surface->texture); assert(strb->surface->format); @@ -327,12 +332,12 @@ st_render_texture(struct gl_context *ctx, { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *strb; struct gl_renderbuffer *rb; struct pipe_resource *pt = st_get_texobj_resource(att->Texture); struct st_texture_object *stObj; const struct gl_texture_image *texImage; + struct pipe_surface surf_tmpl; /* When would this fail? Perhaps assert? */ if (!pt) @@ -381,12 +386,15 @@ st_render_texture(struct gl_context *ctx, assert(strb->rtt_level <= strb->texture->last_level); /* new surface for rendering into the texture */ - strb->surface = screen->get_tex_surface(screen, - strb->texture, - strb->rtt_face, - strb->rtt_level, - strb->rtt_slice, - PIPE_BIND_RENDER_TARGET); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = strb->texture->format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = strb->rtt_level; + surf_tmpl.u.tex.first_layer = strb->rtt_face + strb->rtt_slice; + surf_tmpl.u.tex.last_layer = strb->rtt_face + strb->rtt_slice; + strb->surface = pipe->create_surface(pipe, + strb->texture, + &surf_tmpl); strb->format = pt->format; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index bcd46ac9d54..0507be74578 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -80,7 +80,7 @@ st_read_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, /* Create a read transfer from the renderbuffer's texture */ pt = pipe_get_transfer(pipe, strb->texture, - 0, 0, 0, /* face, level, zslice */ + 0, 0, PIPE_TRANSFER_READ, x, y, width, height); @@ -236,7 +236,7 @@ st_fast_readpixels(struct gl_context *ctx, struct st_renderbuffer *strb, } trans = pipe_get_transfer(pipe, strb->texture, - 0, 0, 0, /* face, level, zslice */ + 0, 0, PIPE_TRANSFER_READ, x, y, width, height); if (!trans) { @@ -328,7 +328,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - GLfloat temp[MAX_WIDTH][4]; + GLfloat (*temp)[4]; const GLbitfield transferOps = ctx->_ImageTransferState; GLsizei i, j; GLint yStep, dfStride; @@ -381,6 +381,13 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h return; } + /* allocate temp pixel row buffer */ + temp = (GLfloat (*)[4]) malloc(4 * width * sizeof(GLfloat)); + if (!temp) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels"); + return; + } + if (format == GL_RGBA && type == GL_FLOAT) { /* write tile(row) directly into user's buffer */ df = (GLfloat *) _mesa_image_address2d(&clippedPacking, dest, width, @@ -400,7 +407,7 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h /* Create a read transfer from the renderbuffer's texture */ trans = pipe_get_transfer(pipe, strb->texture, - 0, 0, 0, /* face, level, zslice */ + 0, 0, PIPE_TRANSFER_READ, x, y, width, height); @@ -533,6 +540,8 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei h } } + free(temp); + pipe->transfer_destroy(pipe, trans); _mesa_unmap_pbo_dest(ctx, &clippedPacking); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 15e69e1fa07..866426a7549 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -63,7 +63,7 @@ #include "util/u_surface.h" #include "util/u_sampler.h" #include "util/u_math.h" - +#include "util/u_box.h" #define DBG if (0) printf @@ -431,7 +431,7 @@ compress_with_blit(struct gl_context * ctx, struct pipe_resource *src_tex; struct pipe_sampler_view view_templ; struct pipe_sampler_view *src_view; - struct pipe_surface *dst_surface; + struct pipe_surface *dst_surface, surf_tmpl; struct pipe_transfer *tex_xfer; void *map; @@ -441,9 +441,13 @@ compress_with_blit(struct gl_context * ctx, } /* get destination surface (in the compressed texture) */ - dst_surface = screen->get_tex_surface(screen, stImage->pt, - stImage->face, stImage->level, 0, - 0 /* flags */); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = stImage->pt->format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = stImage->level; + surf_tmpl.u.tex.first_layer = stImage->face; + surf_tmpl.u.tex.last_layer = stImage->face; + dst_surface = pipe->create_surface(pipe, stImage->pt, &surf_tmpl); if (!dst_surface) { /* can't render into this format (or other problem) */ return GL_FALSE; @@ -464,6 +468,7 @@ compress_with_blit(struct gl_context * ctx, templ.width0 = width; templ.height0 = height; templ.depth0 = 1; + templ.array_size = 1; templ.last_level = 0; templ.usage = PIPE_USAGE_DEFAULT; templ.bind = PIPE_BIND_SAMPLER_VIEW; @@ -475,9 +480,9 @@ compress_with_blit(struct gl_context * ctx, /* Put user's tex data into the temporary texture */ tex_xfer = pipe_get_transfer(st_context(ctx)->pipe, src_tex, - 0, 0, 0, /* face, level are zero */ - PIPE_TRANSFER_WRITE, - 0, 0, width, height); /* x, y, w, h */ + 0, 0, /* layer, level are zero */ + PIPE_TRANSFER_WRITE, + 0, 0, width, height); /* x, y, w, h */ map = pipe_transfer_map(pipe, tex_xfer); _mesa_texstore(ctx, 2, GL_RGBA, mesa_format, @@ -857,7 +862,6 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, { struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; struct st_texture_image *stImage = st_texture_image(texImage); struct st_texture_object *stObj = st_texture_object(texObj); struct pipe_sampler_view *src_view = @@ -871,7 +875,7 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, PIPE_BIND_TRANSFER_READ); /* create temp / dest surface */ - if (!util_create_rgba_surface(screen, width, height, bind, + if (!util_create_rgba_surface(pipe, width, height, bind, &dst_texture, &dst_surface)) { _mesa_problem(ctx, "util_create_rgba_surface() failed " "in decompress_with_blit()"); @@ -891,9 +895,9 @@ decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level, /* map the dst_surface so we can read from it */ tex_xfer = pipe_get_transfer(st_context(ctx)->pipe, - dst_texture, 0, 0, 0, - PIPE_TRANSFER_READ, - 0, 0, width, height); + dst_texture, 0, 0, + PIPE_TRANSFER_READ, + 0, 0, width, height); pixels = _mesa_map_pbo_dest(ctx, &ctx->Pack, pixels); @@ -1310,7 +1314,7 @@ fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level, struct pipe_transfer *src_trans; GLvoid *texDest; enum pipe_transfer_usage transfer_usage; - + if (ST_DEBUG & DEBUG_FALLBACK) debug_printf("%s: fallback processing\n", __FUNCTION__); @@ -1321,11 +1325,11 @@ fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level, } src_trans = pipe_get_transfer(st_context(ctx)->pipe, - strb->texture, - 0, 0, 0, - PIPE_TRANSFER_READ, - srcX, srcY, - width, height); + strb->texture, + 0, 0, + PIPE_TRANSFER_READ, + srcX, srcY, + width, height); if ((baseFormat == GL_DEPTH_COMPONENT || baseFormat == GL_DEPTH_STENCIL) && @@ -1334,7 +1338,8 @@ fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level, else transfer_usage = PIPE_TRANSFER_WRITE; - texDest = st_texture_image_map(st, stImage, 0, transfer_usage, + /* XXX this used to ignore destZ param */ + texDest = st_texture_image_map(st, stImage, destZ, transfer_usage, destX, destY, width, height); if (baseFormat == GL_DEPTH_COMPONENT || @@ -1592,27 +1597,23 @@ st_copy_texsubimage(struct gl_context *ctx, if (matching_base_formats && src_format == dest_format && - !do_flip) + !do_flip) { /* use surface_copy() / blit */ - struct pipe_subresource subdst, subsrc; - subdst.face = stImage->face; - subdst.level = stImage->level; - subsrc.face = strb->surface->face; - subsrc.level = strb->surface->level; + struct pipe_box src_box; + u_box_2d_zslice(srcX, srcY, strb->surface->u.tex.first_layer, + width, height, &src_box); /* for resource_copy_region(), y=0=top, always */ pipe->resource_copy_region(pipe, /* dest */ stImage->pt, - subdst, - destX, destY, destZ, + stImage->level, + destX, destY, destZ + stImage->face, /* src */ strb->texture, - subsrc, - srcX, srcY, strb->surface->zslice, - /* size */ - width, height); + strb->surface->u.tex.level, + &src_box); use_fallback = GL_FALSE; } else if (format_writemask && @@ -1628,12 +1629,16 @@ st_copy_texsubimage(struct gl_context *ctx, 0)) { /* draw textured quad to do the copy */ GLint srcY0, srcY1; - struct pipe_subresource subsrc; + struct pipe_surface surf_tmpl; + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = stImage->pt->format; + surf_tmpl.usage = PIPE_BIND_RENDER_TARGET; + surf_tmpl.u.tex.level = stImage->level; + surf_tmpl.u.tex.first_layer = stImage->face + destZ; + surf_tmpl.u.tex.last_layer = stImage->face + destZ; - dest_surface = screen->get_tex_surface(screen, stImage->pt, - stImage->face, stImage->level, - destZ, - PIPE_BIND_RENDER_TARGET); + dest_surface = pipe->create_surface(pipe, stImage->pt, + &surf_tmpl); if (do_flip) { srcY1 = strb->Base.Height - srcY - height; @@ -1643,15 +1648,13 @@ st_copy_texsubimage(struct gl_context *ctx, srcY0 = srcY; srcY1 = srcY0 + height; } - subsrc.face = strb->surface->face; - subsrc.level = strb->surface->level; util_blit_pixels_writemask(st->blit, strb->texture, - subsrc, + strb->surface->u.tex.level, srcX, srcY0, srcX + width, srcY1, - strb->surface->zslice, + strb->surface->u.tex.first_layer, dest_surface, destX, destY, destX + width, destY + height, @@ -1852,8 +1855,9 @@ st_finalize_texture(struct gl_context *ctx, * will match. */ if (firstImage->pt && + stObj->pt && firstImage->pt != stObj->pt && - firstImage->pt->last_level >= stObj->lastLevel) { + firstImage->pt->last_level >= stObj->pt->last_level) { pipe_resource_reference(&stObj->pt, firstImage->pt); pipe_sampler_view_reference(&stObj->sampler_view, NULL); } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index d0dcdd4e29b..6ec9c699a26 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -170,6 +170,11 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe, struct gl_context *shareCtx = share ? share->ctx : NULL; struct dd_function_table funcs; + /* Sanity checks */ + assert(MESA_SHADER_VERTEX == PIPE_SHADER_VERTEX); + assert(MESA_SHADER_FRAGMENT == PIPE_SHADER_FRAGMENT); + assert(MESA_SHADER_GEOMETRY == PIPE_SHADER_GEOMETRY); + memset(&funcs, 0, sizeof(funcs)); st_init_driver_functions(&funcs); @@ -239,8 +244,8 @@ void st_destroy_context( struct st_context *st ) pipe->set_index_buffer(pipe, NULL); for (i = 0; i < PIPE_SHADER_TYPES; i++) { - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, NULL); - pipe_resource_reference(&st->state.constants[PIPE_SHADER_VERTEX], NULL); + pipe->set_constant_buffer(pipe, i, 0, NULL); + pipe_resource_reference(&st->state.constants[i], NULL); } _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 132749130af..930b60ade2d 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -67,7 +67,7 @@ void st_init_limits(struct st_context *st) { struct pipe_screen *screen = st->pipe->screen; struct gl_constants *c = &st->ctx->Const; - unsigned i; + gl_shader_type sh; c->MaxTextureLevels = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS), @@ -137,11 +137,12 @@ void st_init_limits(struct st_context *st) /* Quads always follow GL provoking rules. */ c->QuadsFollowProvokingVertexConvention = GL_FALSE; - for(i = 0; i < MESA_SHADER_TYPES; ++i) { - struct gl_shader_compiler_options *options = &st->ctx->ShaderCompilerOptions[i]; + for (sh = 0; sh < MESA_SHADER_TYPES; ++sh) { + struct gl_shader_compiler_options *options = + &st->ctx->ShaderCompilerOptions[sh]; struct gl_program_constants *pc; - switch(i) - { + + switch (sh) { case PIPE_SHADER_FRAGMENT: pc = &c->FragmentProgram; break; @@ -156,36 +157,37 @@ void st_init_limits(struct st_context *st) continue; } - pc->MaxNativeInstructions = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INSTRUCTIONS); - pc->MaxNativeAluInstructions = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS); - pc->MaxNativeTexInstructions = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS); - pc->MaxNativeTexIndirections = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS); - pc->MaxNativeAttribs = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INPUTS); - pc->MaxNativeTemps = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_TEMPS); - pc->MaxNativeAddressRegs = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_ADDRS); - pc->MaxNativeParameters = screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONSTS); + pc->MaxNativeInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS); + pc->MaxNativeAluInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS); + pc->MaxNativeTexInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS); + pc->MaxNativeTexIndirections = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS); + pc->MaxNativeAttribs = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS); + pc->MaxNativeTemps = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEMPS); + pc->MaxNativeAddressRegs = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ADDRS); + pc->MaxNativeParameters = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONSTS); + pc->MaxUniformComponents = 4 * MIN2(pc->MaxNativeParameters, MAX_UNIFORMS); options->EmitNoNoise = TRUE; /* TODO: make these more fine-grained if anyone needs it */ - options->EmitNoIfs = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); - options->EmitNoFunctions = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); - options->EmitNoLoops = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); - options->EmitNoMainReturn = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); + options->EmitNoIfs = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); + options->EmitNoLoops = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); + options->EmitNoFunctions = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES); + options->EmitNoMainReturn = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_SUBROUTINES); - options->EmitNoCont = !screen->get_shader_param(screen, i, PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED); + options->EmitNoCont = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED); - options->EmitNoIndirectInput = !screen->get_shader_param(screen, i, + options->EmitNoIndirectInput = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR); - options->EmitNoIndirectOutput = !screen->get_shader_param(screen, i, + options->EmitNoIndirectOutput = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR); - options->EmitNoIndirectTemp = !screen->get_shader_param(screen, i, + options->EmitNoIndirectTemp = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR); - options->EmitNoIndirectUniform = !screen->get_shader_param(screen, i, + options->EmitNoIndirectUniform = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_INDIRECT_CONST_ADDR); if(options->EmitNoLoops) - options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, i, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536); + options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536); } /* PIPE_CAP_MAX_FS_INPUTS specifies the number of COLORn + GENERICn inputs @@ -298,6 +300,8 @@ void st_init_extensions(struct st_context *st) ctx->Extensions.ARB_vertex_shader = GL_TRUE; ctx->Extensions.ARB_shader_objects = GL_TRUE; ctx->Extensions.ARB_shading_language_100 = GL_TRUE; + ctx->Extensions.ARB_explicit_attrib_location = GL_TRUE; + ctx->Extensions.EXT_separate_shader_objects = GL_TRUE; } if (screen->get_param(screen, PIPE_CAP_TEXTURE_MIRROR_REPEAT) > 0) { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 1fc4f40488f..c5f6008a222 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -79,11 +79,15 @@ st_render_mipmap(struct st_context *st, const uint face = _mesa_tex_target_to_face(target); assert(psv->texture == stObj->pt); - assert(target != GL_TEXTURE_3D); /* not done yet */ +#if 0 + assert(target != GL_TEXTURE_3D); /* implemented but untested */ +#endif /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, psv->format, psv->texture->target, 0, - PIPE_BIND_RENDER_TARGET, 0)) { + /* XXX should probably kill this and always use util_gen_mipmap + since this implements a sw fallback as well */ + if (!screen->is_format_supported(screen, psv->format, psv->texture->target, + 0, PIPE_BIND_RENDER_TARGET, 0)) { return FALSE; } @@ -161,12 +165,12 @@ fallback_generate_mipmap(struct gl_context *ctx, GLenum target, struct pipe_resource *pt = st_get_texobj_resource(texObj); const uint baseLevel = texObj->BaseLevel; const uint lastLevel = pt->last_level; - const uint face = _mesa_tex_target_to_face(target), zslice = 0; + const uint face = _mesa_tex_target_to_face(target); uint dstLevel; GLenum datatype; GLuint comps; GLboolean compressed; - + if (ST_DEBUG & DEBUG_FALLBACK) debug_printf("%s: fallback processing\n", __FUNCTION__); @@ -198,16 +202,15 @@ fallback_generate_mipmap(struct gl_context *ctx, GLenum target, ubyte *dstData; int srcStride, dstStride; - srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, face, - srcLevel, zslice, - PIPE_TRANSFER_READ, 0, 0, - srcWidth, srcHeight); - + srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel, + face, + PIPE_TRANSFER_READ, 0, 0, + srcWidth, srcHeight); - dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, face, - dstLevel, zslice, - PIPE_TRANSFER_WRITE, 0, 0, - dstWidth, dstHeight); + dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel, + face, + PIPE_TRANSFER_WRITE, 0, 0, + dstWidth, dstHeight); srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans); dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans); @@ -215,6 +218,8 @@ fallback_generate_mipmap(struct gl_context *ctx, GLenum target, srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format); dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format); + /* this cannot work correctly for 3d since it does + not respect layerStride. */ if (compressed) { const enum pipe_format format = pt->format; const uint bw = util_format_get_blockwidth(format); @@ -365,6 +370,12 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, pt = stObj->pt; } + else { + /* Make sure that the base texture image data is present in the + * texture buffer. + */ + st_finalize_texture(ctx, st->pipe, texObj); + } assert(pt->last_level >= lastLevel); @@ -372,6 +383,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, * use the software fallback. */ if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) { + /* since the util code actually also has a fallback, should + probably make it never fail and kill this */ fallback_generate_mipmap(ctx, target, texObj); } diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c index 05733e818ea..0307b48978b 100644 --- a/src/mesa/state_tracker/st_manager.c +++ b/src/mesa/state_tracker/st_manager.c @@ -34,6 +34,7 @@ #include "util/u_pointer.h" #include "util/u_inlines.h" #include "util/u_atomic.h" +#include "util/u_surface.h" #include "main/mtypes.h" #include "main/context.h" @@ -142,7 +143,7 @@ buffer_index_to_attachment(gl_buffer_index index) static void st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) { - struct pipe_screen *screen = st->pipe->screen; + struct pipe_context *pipe = st->pipe; struct pipe_resource *textures[ST_ATTACHMENT_COUNT]; uint width, height; unsigned i; @@ -160,7 +161,7 @@ st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) for (i = 0; i < stfb->num_statts; i++) { struct st_renderbuffer *strb; - struct pipe_surface *ps; + struct pipe_surface *ps, surf_tmpl; gl_buffer_index idx; if (!textures[i]) @@ -179,8 +180,10 @@ st_framebuffer_validate(struct st_framebuffer *stfb, struct st_context *st) continue; } - ps = screen->get_tex_surface(screen, textures[i], 0, 0, 0, - PIPE_BIND_RENDER_TARGET); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + u_surface_default_template(&surf_tmpl, textures[i], + PIPE_BIND_RENDER_TARGET); + ps = pipe->create_surface(pipe, textures[i], &surf_tmpl); if (ps) { pipe_surface_reference(&strb->surface, ps); pipe_resource_reference(&strb->texture, ps->texture); @@ -813,6 +816,7 @@ st_manager_flush_frontbuffer(struct st_context *st) /** * Return the surface of an EGLImage. + * FIXME: I think this should operate on resources, not surfaces */ struct pipe_surface * st_manager_get_egl_image_surface(struct st_context *st, @@ -821,7 +825,7 @@ st_manager_get_egl_image_surface(struct st_context *st, struct st_manager *smapi = (struct st_manager *) st->iface.st_context_private; struct st_egl_image stimg; - struct pipe_surface *ps; + struct pipe_surface *ps, surf_tmpl; if (!smapi || !smapi->get_egl_image) return NULL; @@ -830,8 +834,13 @@ st_manager_get_egl_image_surface(struct st_context *st, if (!smapi->get_egl_image(smapi, eglimg, &stimg)) return NULL; - ps = smapi->screen->get_tex_surface(smapi->screen, - stimg.texture, stimg.face, stimg.level, stimg.zslice, usage); + memset(&surf_tmpl, 0, sizeof(surf_tmpl)); + surf_tmpl.format = stimg.texture->format; + surf_tmpl.usage = usage; + surf_tmpl.u.tex.level = stimg.level; + surf_tmpl.u.tex.first_layer = stimg.layer; + surf_tmpl.u.tex.last_layer = stimg.layer; + ps = st->pipe->create_surface(st->pipe, stimg.texture, &surf_tmpl); pipe_resource_reference(&stimg.texture, NULL); return ps; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index c5c239b2c95..f848462310e 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -760,10 +760,13 @@ emit_adjusted_wpos( struct st_translate *t, /** * Emit the TGSI instructions for inverting the WPOS y coordinate. + * This code is unavoidable because it also depends on whether + * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void -emit_inverted_wpos( struct st_translate *t, - const struct gl_program *program ) +emit_wpos_inversion( struct st_translate *t, + const struct gl_program *program, + boolean invert) { struct ureg_program *ureg = t->ureg; @@ -771,17 +774,17 @@ emit_inverted_wpos( struct st_translate *t, * Need to replace instances of INPUT[WPOS] with temp T * where T = INPUT[WPOS] by y is inverted. */ - static const gl_state_index winSizeState[STATE_LENGTH] - = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; + static const gl_state_index wposTransformState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 }; /* XXX: note we are modifying the incoming shader here! Need to * do this before emitting the constant decls below, or this * will be missed: */ - unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, - winSizeState); + unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, + wposTransformState); - struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); + struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); struct ureg_dst wpos_temp; struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; @@ -794,12 +797,23 @@ emit_inverted_wpos( struct st_translate *t, ureg_MOV( ureg, wpos_temp, wpos_input ); } - /* SUB wpos_temp.y, winsize_const, wpos_input - */ - ureg_SUB( ureg, - ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), - winsize, - wpos_input); + if (invert) { + /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 0), + ureg_scalar(wpostrans, 1)); + } else { + /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww + */ + ureg_MAD( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + wpos_input, + ureg_scalar(wpostrans, 2), + ureg_scalar(wpostrans, 3)); + } /* Use wpos_temp as position input from here on: */ @@ -861,8 +875,7 @@ emit_wpos(struct st_context *st, /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ - if (invert) - emit_inverted_wpos(t, program); + emit_wpos_inversion(t, program, invert); } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 76799287fe1..aae2913c202 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -216,6 +216,8 @@ st_translate_vertex_program(struct st_context *st, return NULL; } + vpv->key = *key; + vpv->num_inputs = stvp->num_inputs; num_outputs = stvp->num_outputs; if (key->passthrough_edgeflags) { diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index c6cf2ba061b..155ea39f18c 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -84,6 +84,7 @@ st_texture_create(struct st_context *st, pt.width0 = width0; pt.height0 = height0; pt.depth0 = depth0; + pt.array_size = (target == PIPE_TEXTURE_CUBE ? 6 : 1); pt.usage = PIPE_USAGE_DEFAULT; pt.bind = bind; pt.flags = 0; @@ -136,7 +137,7 @@ st_texture_match_image(const struct pipe_resource *pt, */ GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, - GLuint zoffset, enum pipe_transfer_usage usage, + GLuint zoffset, enum pipe_transfer_usage usage, GLuint x, GLuint y, GLuint w, GLuint h) { struct pipe_context *pipe = st->pipe; @@ -144,9 +145,9 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, DBG("%s \n", __FUNCTION__); - stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->face, - stImage->level, zoffset, - usage, x, y, w, h); + stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level, + stImage->face + zoffset, + usage, x, y, w, h); if (stImage->transfer) return pipe_transfer_map(pipe, stImage->transfer); @@ -219,10 +220,10 @@ st_texture_image_data(struct st_context *st, DBG("%s\n", __FUNCTION__); for (i = 0; i < depth; i++) { - dst_transfer = pipe_get_transfer(st->pipe, dst, face, level, i, - PIPE_TRANSFER_WRITE, 0, 0, - u_minify(dst->width0, level), - u_minify(dst->height0, level)); + dst_transfer = pipe_get_transfer(st->pipe, dst, level, face + i, + PIPE_TRANSFER_WRITE, 0, 0, + u_minify(dst->width0, level), + u_minify(dst->height0, level)); st_surface_data(pipe, dst_transfer, 0, 0, /* dstx, dsty */ @@ -230,7 +231,7 @@ st_texture_image_data(struct st_context *st, src_row_stride, 0, 0, /* source x, y */ u_minify(dst->width0, level), - u_minify(dst->height0, level)); /* width, height */ + u_minify(dst->height0, level)); /* width, height */ pipe->transfer_destroy(pipe, dst_transfer); @@ -245,14 +246,10 @@ st_texture_image_data(struct st_context *st, static void print_center_pixel(struct pipe_context *pipe, struct pipe_resource *src) { - struct pipe_subresource rect; struct pipe_transfer *xfer; struct pipe_box region; ubyte *map; - rect.face = 0; - rect.level = 0; - region.x = src->width0 / 2; region.y = src->height0 / 2; region.z = 0; @@ -260,7 +257,7 @@ print_center_pixel(struct pipe_context *pipe, struct pipe_resource *src) region.height = 1; region.depth = 1; - xfer = pipe->get_transfer(pipe, src, rect, PIPE_TRANSFER_READ, ®ion); + xfer = pipe->get_transfer(pipe, src, 0, PIPE_TRANSFER_READ, ®ion); map = pipe->transfer_map(pipe, xfer); printf("center pixel: %d %d %d %d\n", map[0], map[1], map[2], map[3]); @@ -282,22 +279,26 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_resource *src, GLuint srcLevel, GLuint face) { - GLuint width = u_minify(dst->width0, dstLevel); - GLuint height = u_minify(dst->height0, dstLevel); - GLuint depth = u_minify(dst->depth0, dstLevel); - struct pipe_subresource dstsub, srcsub; + GLuint width = u_minify(dst->width0, dstLevel); + GLuint height = u_minify(dst->height0, dstLevel); + GLuint depth = u_minify(dst->depth0, dstLevel); + struct pipe_box src_box; GLuint i; assert(u_minify(src->width0, srcLevel) == width); assert(u_minify(src->height0, srcLevel) == height); assert(u_minify(src->depth0, srcLevel) == depth); - dstsub.face = face; - dstsub.level = dstLevel; - srcsub.face = face; - srcsub.level = srcLevel; + src_box.x = 0; + src_box.y = 0; + src_box.width = width; + src_box.height = height; + src_box.depth = 1; /* Loop over 3D image slices */ - for (i = 0; i < depth; i++) { + /* could (and probably should) use "true" 3d box here - + but drivers can't quite handle it yet */ + for (i = face; i < face + depth; i++) { + src_box.z = i; if (0) { print_center_pixel(pipe, src); @@ -305,12 +306,11 @@ st_texture_image_copy(struct pipe_context *pipe, pipe->resource_copy_region(pipe, dst, - dstsub, + dstLevel, 0, 0, i,/* destX, Y, Z */ src, - srcsub, - 0, 0, i,/* srcX, Y, Z */ - width, height); + srcLevel, + &src_box); } } diff --git a/src/mesa/swrast/s_blend.c b/src/mesa/swrast/s_blend.c index 1a550c445d3..d61baba0f33 100644 --- a/src/mesa/swrast/s_blend.c +++ b/src/mesa/swrast/s_blend.c @@ -819,7 +819,16 @@ static void blend_general(struct gl_context *ctx, GLuint n, const GLubyte mask[], void *src, const void *dst, GLenum chanType) { - GLfloat rgbaF[MAX_WIDTH][4], destF[MAX_WIDTH][4]; + GLfloat (*rgbaF)[4], (*destF)[4]; + + rgbaF = (GLfloat (*)[4]) malloc(4 * n * sizeof(GLfloat)); + destF = (GLfloat (*)[4]) malloc(4 * n * sizeof(GLfloat)); + if (!rgbaF || !destF) { + free(rgbaF); + free(destF); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "blending"); + return; + } if (chanType == GL_UNSIGNED_BYTE) { GLubyte (*rgba)[4] = (GLubyte (*)[4]) src; @@ -883,6 +892,9 @@ blend_general(struct gl_context *ctx, GLuint n, const GLubyte mask[], blend_general_float(ctx, n, mask, (GLfloat (*)[4]) src, (GLfloat (*)[4]) dst, chanType); } + + free(rgbaF); + free(destF); } diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index 4e9b5307cc7..4d0666898b4 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -720,13 +720,16 @@ _swrast_DrawPixels( struct gl_context *ctx, if (swrast->NewState) _swrast_validate_derived( ctx ); - pixels = _mesa_map_pbo_source(ctx, unpack, pixels); - if (!pixels) { - swrast_render_finish(ctx); - _mesa_set_vp_override(ctx, save_vp_override); - return; - } + pixels = _mesa_map_pbo_source(ctx, unpack, pixels); + if (!pixels) { + swrast_render_finish(ctx); + _mesa_set_vp_override(ctx, save_vp_override); + return; + } + /* + * By time we get here, all error checking should have been done. + */ switch (format) { case GL_STENCIL_INDEX: draw_stencil_pixels( ctx, x, y, width, height, type, unpack, pixels ); @@ -734,27 +737,12 @@ _swrast_DrawPixels( struct gl_context *ctx, case GL_DEPTH_COMPONENT: draw_depth_pixels( ctx, x, y, width, height, type, unpack, pixels ); break; - case GL_COLOR_INDEX: - case GL_RED: - case GL_GREEN: - case GL_BLUE: - case GL_ALPHA: - case GL_LUMINANCE: - case GL_LUMINANCE_ALPHA: - case GL_RGB: - case GL_BGR: - case GL_RGBA: - case GL_BGRA: - case GL_ABGR_EXT: - draw_rgba_pixels(ctx, x, y, width, height, format, type, unpack, pixels); - break; case GL_DEPTH_STENCIL_EXT: - draw_depth_stencil_pixels(ctx, x, y, width, height, - type, unpack, pixels); + draw_depth_stencil_pixels(ctx, x, y, width, height, type, unpack, pixels); break; default: - _mesa_problem(ctx, "unexpected format 0x%x in _swrast_DrawPixels", format); - /* don't return yet, clean-up */ + /* all other formats should be color formats */ + draw_rgba_pixels(ctx, x, y, width, height, format, type, unpack, pixels); } swrast_render_finish(ctx); diff --git a/src/mesa/swrast/s_readpix.c b/src/mesa/swrast/s_readpix.c index 5e6356c0d54..9fe0752a37f 100644 --- a/src/mesa/swrast/s_readpix.c +++ b/src/mesa/swrast/s_readpix.c @@ -476,49 +476,33 @@ _swrast_ReadPixels( struct gl_context *ctx, _swrast_validate_derived( ctx ); /* Do all needed clipping here, so that we can forget about it later */ - if (!_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) { - /* The ReadPixels region is totally outside the window bounds */ - swrast_render_finish(ctx); - return; - } - - pixels = _mesa_map_pbo_dest(ctx, &clippedPacking, pixels); - if (!pixels) - return; - - switch (format) { - case GL_STENCIL_INDEX: - read_stencil_pixels(ctx, x, y, width, height, type, pixels, + if (_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) { + + pixels = _mesa_map_pbo_dest(ctx, &clippedPacking, pixels); + + if (pixels) { + switch (format) { + case GL_STENCIL_INDEX: + read_stencil_pixels(ctx, x, y, width, height, type, pixels, + &clippedPacking); + break; + case GL_DEPTH_COMPONENT: + read_depth_pixels(ctx, x, y, width, height, type, pixels, + &clippedPacking); + break; + case GL_DEPTH_STENCIL_EXT: + read_depth_stencil_pixels(ctx, x, y, width, height, type, pixels, + &clippedPacking); + break; + default: + /* all other formats should be color formats */ + read_rgba_pixels(ctx, x, y, width, height, format, type, pixels, &clippedPacking); - break; - case GL_DEPTH_COMPONENT: - read_depth_pixels(ctx, x, y, width, height, type, pixels, - &clippedPacking); - break; - case GL_RED: - case GL_GREEN: - case GL_BLUE: - case GL_ALPHA: - case GL_RGB: - case GL_LUMINANCE: - case GL_LUMINANCE_ALPHA: - case GL_RGBA: - case GL_BGR: - case GL_BGRA: - case GL_ABGR_EXT: - read_rgba_pixels(ctx, x, y, width, height, - format, type, pixels, &clippedPacking); - break; - case GL_DEPTH_STENCIL_EXT: - read_depth_stencil_pixels(ctx, x, y, width, height, - type, pixels, &clippedPacking); - break; - default: - _mesa_problem(ctx, "unexpected format 0x%x in _swrast_ReadPixels", format); - /* don't return yet, clean-up */ + } + + _mesa_unmap_pbo_dest(ctx, &clippedPacking); + } } swrast_render_finish(ctx); - - _mesa_unmap_pbo_dest(ctx, &clippedPacking); } diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 1836d074aee..99c44413fbe 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -86,10 +86,28 @@ texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA); const GLuint numArgsRGB = combine->_NumArgsRGB; const GLuint numArgsA = combine->_NumArgsA; - GLfloat ccolor[MAX_COMBINER_TERMS][MAX_WIDTH][4]; /* temp color buffers */ - GLfloat rgba[MAX_WIDTH][4]; + float4_array ccolor[4], rgba; GLuint i, term; + /* alloc temp pixel buffers */ + rgba = (float4_array) malloc(4 * n * sizeof(GLfloat)); + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); + return; + } + + for (i = 0; i < numArgsRGB || i < numArgsA; i++) { + ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat)); + if (!ccolor[i]) { + while (i) { + free(ccolor[i]); + i--; + } + _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); + return; + } + } + for (i = 0; i < n; i++) { rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]); rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]); @@ -163,7 +181,7 @@ texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, const GLuint srcUnit = srcRGB - GL_TEXTURE0; ASSERT(srcUnit < ctx->Const.MaxTextureUnits); if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled) - return; + goto end; argRGB[term] = get_texel_array(swrast, srcUnit); } } @@ -253,7 +271,7 @@ texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, const GLuint srcUnit = srcA - GL_TEXTURE0; ASSERT(srcUnit < ctx->Const.MaxTextureUnits); if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled) - return; + goto end; argA[term] = get_texel_array(swrast, srcUnit); } } @@ -411,7 +429,7 @@ texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, rgba[i][BCOMP] = 0.0; rgba[i][ACOMP] = 1.0; } - return; /* no alpha processing */ + goto end; /* no alpha processing */ default: _mesa_problem(ctx, "invalid combine mode"); } @@ -519,6 +537,12 @@ texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]); UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]); } + +end: + for (i = 0; i < numArgsRGB || i < numArgsA; i++) { + free(ccolor[i]); + } + free(rgba); } @@ -559,9 +583,16 @@ void _swrast_texture_span( struct gl_context *ctx, SWspan *span ) { SWcontext *swrast = SWRAST_CONTEXT(ctx); - GLfloat primary_rgba[MAX_WIDTH][4]; + float4_array primary_rgba; GLuint unit; + primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat)); + + if (!primary_rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span"); + return; + } + ASSERT(span->end <= MAX_WIDTH); /* @@ -706,4 +737,6 @@ _swrast_texture_span( struct gl_context *ctx, SWspan *span ) span->array->rgba ); } } + + free(primary_rgba); } diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c index ec281776d0d..539d878ddb4 100644 --- a/src/mesa/swrast/s_texfilter.c +++ b/src/mesa/swrast/s_texfilter.c @@ -1371,6 +1371,7 @@ opt_sample_rgb_2d(struct gl_context *ctx, rgba[k][RCOMP] = UBYTE_TO_FLOAT(texel[2]); rgba[k][GCOMP] = UBYTE_TO_FLOAT(texel[1]); rgba[k][BCOMP] = UBYTE_TO_FLOAT(texel[0]); + rgba[k][ACOMP] = 1.0F; } } diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 7b8da8eb843..79f76655354 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -40,7 +40,8 @@ struct _mesa_prim { GLuint begin:1; GLuint end:1; GLuint weak:1; - GLuint pad:20; + GLuint no_current_update:1; + GLuint pad:19; GLuint start; GLuint count; @@ -128,42 +129,42 @@ void vbo_set_draw_func(struct gl_context *ctx, vbo_draw_func func); void GLAPIENTRY -_vbo_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a); +_es_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a); void GLAPIENTRY -_vbo_Normal3f(GLfloat x, GLfloat y, GLfloat z); +_es_Normal3f(GLfloat x, GLfloat y, GLfloat z); void GLAPIENTRY -_vbo_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); +_es_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); void GLAPIENTRY -_vbo_Materialfv(GLenum face, GLenum pname, const GLfloat *params); +_es_Materialfv(GLenum face, GLenum pname, const GLfloat *params); void GLAPIENTRY -_vbo_Materialf(GLenum face, GLenum pname, GLfloat param); +_es_Materialf(GLenum face, GLenum pname, GLfloat param); void GLAPIENTRY -_vbo_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +_es_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); void GLAPIENTRY -_vbo_VertexAttrib1f(GLuint indx, GLfloat x); +_es_VertexAttrib1f(GLuint indx, GLfloat x); void GLAPIENTRY -_vbo_VertexAttrib1fv(GLuint indx, const GLfloat* values); +_es_VertexAttrib1fv(GLuint indx, const GLfloat* values); void GLAPIENTRY -_vbo_VertexAttrib2f(GLuint indx, GLfloat x, GLfloat y); +_es_VertexAttrib2f(GLuint indx, GLfloat x, GLfloat y); void GLAPIENTRY -_vbo_VertexAttrib2fv(GLuint indx, const GLfloat* values); +_es_VertexAttrib2fv(GLuint indx, const GLfloat* values); void GLAPIENTRY -_vbo_VertexAttrib3f(GLuint indx, GLfloat x, GLfloat y, GLfloat z); +_es_VertexAttrib3f(GLuint indx, GLfloat x, GLfloat y, GLfloat z); void GLAPIENTRY -_vbo_VertexAttrib3fv(GLuint indx, const GLfloat* values); +_es_VertexAttrib3fv(GLuint indx, const GLfloat* values); void GLAPIENTRY -_vbo_VertexAttrib4fv(GLuint indx, const GLfloat* values); +_es_VertexAttrib4fv(GLuint indx, const GLfloat* values); #endif diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 9b2d59f9e4c..fb981ccc3bc 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -369,8 +369,6 @@ static void vbo_exec_fixup_vertex( struct gl_context *ctx, } -#if FEATURE_beginend - /* */ #define ATTR( A, N, V0, V1, V2, V3 ) \ @@ -411,7 +409,7 @@ do { \ #include "vbo_attrib_tmp.h" - +#if FEATURE_beginend #if FEATURE_evaluators @@ -706,30 +704,6 @@ static void vbo_exec_vtxfmt_init( struct vbo_exec_context *exec ) #else /* FEATURE_beginend */ -#define ATTR( A, N, V0, V1, V2, V3 ) \ -do { \ - struct vbo_exec_context *exec = &vbo_context(ctx)->exec; \ - \ - /* FLUSH_UPDATE_CURRENT needs to be set manually */ \ - exec->ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT; \ - \ - if (exec->vtx.active_sz[A] != N) \ - vbo_exec_fixup_vertex(ctx, A, N); \ - \ - { \ - GLfloat *dest = exec->vtx.attrptr[A]; \ - if (N>0) dest[0] = V0; \ - if (N>1) dest[1] = V1; \ - if (N>2) dest[2] = V2; \ - if (N>3) dest[3] = V3; \ - } \ -} while (0) - -#define ERROR() _mesa_error( ctx, GL_INVALID_ENUM, __FUNCTION__ ) -#define TAG(x) vbo_##x - -#include "vbo_attrib_tmp.h" - static void vbo_exec_vtxfmt_init( struct vbo_exec_context *exec ) { /* silence warnings */ @@ -998,35 +972,35 @@ static void reset_attrfv( struct vbo_exec_context *exec ) void GLAPIENTRY -_vbo_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) +_es_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) { vbo_Color4f(r, g, b, a); } void GLAPIENTRY -_vbo_Normal3f(GLfloat x, GLfloat y, GLfloat z) +_es_Normal3f(GLfloat x, GLfloat y, GLfloat z) { vbo_Normal3f(x, y, z); } void GLAPIENTRY -_vbo_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q) +_es_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q) { vbo_MultiTexCoord4f(target, s, t, r, q); } void GLAPIENTRY -_vbo_Materialfv(GLenum face, GLenum pname, const GLfloat *params) +_es_Materialfv(GLenum face, GLenum pname, const GLfloat *params) { vbo_Materialfv(face, pname, params); } void GLAPIENTRY -_vbo_Materialf(GLenum face, GLenum pname, GLfloat param) +_es_Materialf(GLenum face, GLenum pname, GLfloat param) { GLfloat p[4]; p[0] = param; @@ -1035,57 +1009,71 @@ _vbo_Materialf(GLenum face, GLenum pname, GLfloat param) } +/** + * A special version of glVertexAttrib4f that does not treat index 0 as + * VBO_ATTRIB_POS. + */ +static void +VertexAttrib4f_nopos(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) +{ + GET_CURRENT_CONTEXT(ctx); + if (index < MAX_VERTEX_GENERIC_ATTRIBS) + ATTR(VBO_ATTRIB_GENERIC0 + index, 4, x, y, z, w); + else + ERROR(); +} + void GLAPIENTRY -_vbo_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) +_es_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) { - vbo_VertexAttrib4fARB(index, x, y, z, w); + VertexAttrib4f_nopos(index, x, y, z, w); } void GLAPIENTRY -_vbo_VertexAttrib1f(GLuint indx, GLfloat x) +_es_VertexAttrib1f(GLuint indx, GLfloat x) { - vbo_VertexAttrib1fARB(indx, x); + VertexAttrib4f_nopos(indx, x, 0.0f, 0.0f, 1.0f); } void GLAPIENTRY -_vbo_VertexAttrib1fv(GLuint indx, const GLfloat* values) +_es_VertexAttrib1fv(GLuint indx, const GLfloat* values) { - vbo_VertexAttrib1fvARB(indx, values); + VertexAttrib4f_nopos(indx, values[0], 0.0f, 0.0f, 1.0f); } void GLAPIENTRY -_vbo_VertexAttrib2f(GLuint indx, GLfloat x, GLfloat y) +_es_VertexAttrib2f(GLuint indx, GLfloat x, GLfloat y) { - vbo_VertexAttrib2fARB(indx, x, y); + VertexAttrib4f_nopos(indx, x, y, 0.0f, 1.0f); } void GLAPIENTRY -_vbo_VertexAttrib2fv(GLuint indx, const GLfloat* values) +_es_VertexAttrib2fv(GLuint indx, const GLfloat* values) { - vbo_VertexAttrib2fvARB(indx, values); + VertexAttrib4f_nopos(indx, values[0], values[1], 0.0f, 1.0f); } void GLAPIENTRY -_vbo_VertexAttrib3f(GLuint indx, GLfloat x, GLfloat y, GLfloat z) +_es_VertexAttrib3f(GLuint indx, GLfloat x, GLfloat y, GLfloat z) { - vbo_VertexAttrib3fARB(indx, x, y, z); + VertexAttrib4f_nopos(indx, x, y, z, 1.0f); } void GLAPIENTRY -_vbo_VertexAttrib3fv(GLuint indx, const GLfloat* values) +_es_VertexAttrib3fv(GLuint indx, const GLfloat* values) { - vbo_VertexAttrib3fvARB(indx, values); + VertexAttrib4f_nopos(indx, values[0], values[1], values[2], 1.0f); } void GLAPIENTRY -_vbo_VertexAttrib4fv(GLuint indx, const GLfloat* values) +_es_VertexAttrib4fv(GLuint indx, const GLfloat* values) { - vbo_VertexAttrib4fvARB(indx, values); + VertexAttrib4f_nopos(indx, values[0], values[1], values[2], values[3]); } diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h index f5a407ced14..23cbea2afc1 100644 --- a/src/mesa/vbo/vbo_save.h +++ b/src/mesa/vbo/vbo_save.h @@ -96,7 +96,9 @@ struct vbo_save_vertex_list { */ #define VBO_SAVE_BUFFER_SIZE (8*1024) /* dwords */ #define VBO_SAVE_PRIM_SIZE 128 -#define VBO_SAVE_PRIM_WEAK 0x40 +#define VBO_SAVE_PRIM_MODE_MASK 0x3f +#define VBO_SAVE_PRIM_WEAK 0x40 +#define VBO_SAVE_PRIM_NO_CURRENT_UPDATE 0x80 #define VBO_SAVE_FALLBACK 0x10000000 diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 817d478e2ac..bf5ceda78f8 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -294,26 +294,30 @@ static void _save_compile_vertex_list( struct gl_context *ctx ) node->vertex_store->refcount++; node->prim_store->refcount++; - - node->current_size = node->vertex_size - node->attrsz[0]; - node->current_data = NULL; - - if (node->current_size) { - /* If the malloc fails, we just pull the data out of the VBO - * later instead. - */ - node->current_data = MALLOC( node->current_size * sizeof(GLfloat) ); - if (node->current_data) { - const char *buffer = (const char *)save->vertex_store->buffer; - unsigned attr_offset = node->attrsz[0] * sizeof(GLfloat); - unsigned vertex_offset = 0; - - if (node->count) - vertex_offset = (node->count-1) * node->vertex_size * sizeof(GLfloat); - - memcpy( node->current_data, - buffer + node->buffer_offset + vertex_offset + attr_offset, - node->current_size * sizeof(GLfloat) ); + if (node->prim[0].no_current_update) { + node->current_size = 0; + node->current_data = NULL; + } else { + node->current_size = node->vertex_size - node->attrsz[0]; + node->current_data = NULL; + + if (node->current_size) { + /* If the malloc fails, we just pull the data out of the VBO + * later instead. + */ + node->current_data = MALLOC( node->current_size * sizeof(GLfloat) ); + if (node->current_data) { + const char *buffer = (const char *)save->vertex_store->buffer; + unsigned attr_offset = node->attrsz[0] * sizeof(GLfloat); + unsigned vertex_offset = 0; + + if (node->count) + vertex_offset = (node->count-1) * node->vertex_size * sizeof(GLfloat); + + memcpy( node->current_data, + buffer + node->buffer_offset + vertex_offset + attr_offset, + node->current_size * sizeof(GLfloat) ); + } } } @@ -397,6 +401,7 @@ static void _save_wrap_buffers( struct gl_context *ctx ) GLint i = save->prim_count - 1; GLenum mode; GLboolean weak; + GLboolean no_current_update; assert(i < (GLint) save->prim_max); assert(i >= 0); @@ -407,6 +412,7 @@ static void _save_wrap_buffers( struct gl_context *ctx ) save->prim[i].start); mode = save->prim[i].mode; weak = save->prim[i].weak; + no_current_update = save->prim[i].no_current_update; /* store the copied vertices, and allocate a new list. */ @@ -416,6 +422,7 @@ static void _save_wrap_buffers( struct gl_context *ctx ) */ save->prim[0].mode = mode; save->prim[0].weak = weak; + save->prim[0].no_current_update = no_current_update; save->prim[0].begin = 0; save->prim[0].end = 0; save->prim[0].pad = 0; @@ -770,10 +777,11 @@ GLboolean vbo_save_NotifyBegin( struct gl_context *ctx, GLenum mode ) GLuint i = save->prim_count++; assert(i < save->prim_max); - save->prim[i].mode = mode & ~VBO_SAVE_PRIM_WEAK; + save->prim[i].mode = mode & VBO_SAVE_PRIM_MODE_MASK; save->prim[i].begin = 1; save->prim[i].end = 0; save->prim[i].weak = (mode & VBO_SAVE_PRIM_WEAK) ? 1 : 0; + save->prim[i].no_current_update = (mode & VBO_SAVE_PRIM_NO_CURRENT_UPDATE) ? 1 : 0; save->prim[i].pad = 0; save->prim[i].start = save->vert_count; save->prim[i].count = 0; @@ -934,7 +942,7 @@ static void GLAPIENTRY _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei co _ae_map_vbos( ctx ); - vbo_save_NotifyBegin( ctx, mode | VBO_SAVE_PRIM_WEAK ); + vbo_save_NotifyBegin( ctx, mode | VBO_SAVE_PRIM_WEAK | VBO_SAVE_PRIM_NO_CURRENT_UPDATE); for (i = 0; i < count; i++) CALL_ArrayElement(GET_DISPATCH(), (start + i)); @@ -960,7 +968,7 @@ static void GLAPIENTRY _save_OBE_DrawElements(GLenum mode, GLsizei count, GLenum if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) indices = ADD_POINTERS(ctx->Array.ElementArrayBufferObj->Pointer, indices); - vbo_save_NotifyBegin( ctx, mode | VBO_SAVE_PRIM_WEAK ); + vbo_save_NotifyBegin( ctx, mode | VBO_SAVE_PRIM_WEAK | VBO_SAVE_PRIM_NO_CURRENT_UPDATE ); switch (type) { case GL_UNSIGNED_BYTE: |