diff options
author | Keith Whitwell <[email protected]> | 2009-01-09 10:08:06 +0000 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2009-01-09 10:08:06 +0000 |
commit | e3734593aea202e99e77febea7b86c904080939f (patch) | |
tree | 69856674e2062c55ec5eec95eb0e31225a943084 /src | |
parent | 221352bbd79a0ea92ce31cffb65537f62ee5668e (diff) | |
parent | 5cad143e545775acacee294049345c6a3868c51d (diff) |
Merge commit 'origin/gallium-0.2' into gallium-xlib-rework
Conflicts:
progs/glsl/Makefile
Diffstat (limited to 'src')
117 files changed, 2720 insertions, 1437 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index f34c68728ef..03e842ce082 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -33,6 +33,7 @@ #include "draw_private.h" #include "draw_pipe.h" #include "draw_context.h" +#include "draw_vbuf.h" static boolean points( unsigned prim ) { @@ -52,16 +53,28 @@ static boolean triangles( unsigned prim ) } /** - * Check if we need any special pipeline stages, or whether - * prims/verts can go through untouched. Don't test for bypass - * clipping or vs modes, this function is just about the primitive - * pipeline stages. + * Default version of a function to check if we need any special + * pipeline stages, or whether prims/verts can go through untouched. + * Don't test for bypass clipping or vs modes, this function is just + * about the primitive pipeline stages. + * + * This can be overridden by the driver. */ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned int prim ) { + /* If the driver has overridden this, use that version: + */ + if (draw->render && + draw->render->need_pipeline) + { + return draw->render->need_pipeline( draw->render, + rasterizer, + prim ); + } + /* Don't have to worry about triangles turning into lines/points * and triggering the pipeline, because we have to trigger the * pipeline *anyway* if unfilled mode is active. diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index b0aa2df3099..7e1df88f0b8 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -30,7 +30,7 @@ * Vertex buffer drawing stage. * * \author Keith Whitwell <[email protected]> - * \author Jos� Fonseca <[email protected]> + * \author Jose Fonseca <[email protected]> */ #ifndef DRAW_VBUF_H_ @@ -38,6 +38,7 @@ +struct pipe_rasterizer_state; struct draw_context; struct vertex_info; @@ -55,6 +56,17 @@ struct vbuf_render { unsigned max_vertex_buffer_bytes; /** + * Query if the hardware driver needs assistance for a particular + * combination of rasterizer state and primitive. + * + * Currently optional. + */ + boolean (*need_pipeline)(const struct vbuf_render *render, + const struct pipe_rasterizer_state *rasterizer, + unsigned int prim ); + + + /** * Get the hardware vertex format. * * XXX: have this in draw_context instead? diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index a943607d7ed..c143cf23723 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -81,9 +81,9 @@ struct vertex_info * memcmp() comparisons. */ struct { - ubyte interp_mode:4; /**< INTERP_x */ - ubyte emit:4; /**< EMIT_x */ - ubyte src_index; /**< map to post-xform attribs */ + unsigned interp_mode:4; /**< INTERP_x */ + unsigned emit:4; /**< EMIT_x */ + unsigned src_index:8; /**< map to post-xform attribs */ } attrib[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 7f305304ff7..c057cd67fde 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw, const float (*constants)[4], unsigned size ) { - if (((unsigned)constants) & 0xf) { + if (((uintptr_t)constants) & 0xf) { if (size > draw->vs.const_storage_size) { if (draw->vs.aligned_constant_storage) align_free((void *)draw->vs.aligned_constant_storage); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index b65bfa7bbdf..e9015ec2eb8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -261,7 +261,7 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.op2 = op2; emit_instruction(p, inst.bits); -}; +} union vxr_inst { @@ -287,7 +287,7 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.rC = 0; inst.inst.op2 = op2; emit_instruction(p, inst.bits); -}; +} union va_inst { @@ -313,7 +313,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vC = vC; inst.inst.op2 = op2; emit_instruction(p, inst.bits); -}; +} union i_inst { @@ -430,7 +430,7 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); emit_instruction(p, inst.bits); -}; +} union a_inst { @@ -459,7 +459,7 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); -}; +} union xo_inst { diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index c2a0ac5aff8..2ed8c2bf07b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -180,14 +180,16 @@ _dump_register_ind( uint file, int index, uint ind_file, - int ind_index ) + int ind_index, + uint ind_swizzle ) { ENM( file, file_names ); CHR( '[' ); ENM( ind_file, file_names ); CHR( '[' ); SID( ind_index ); - CHR( ']' ); + TXT( "]." ); + ENM( ind_swizzle, swizzle_names ); if (index != 0) { if (index > 0) CHR( '+' ); @@ -385,7 +387,8 @@ iter_instruction( src->SrcRegister.File, src->SrcRegister.Index, src->SrcRegisterInd.File, - src->SrcRegisterInd.Index ); + src->SrcRegisterInd.Index, + src->SrcRegisterInd.SwizzleX ); } else { _dump_register( diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index be25cb45a0a..c575b6c3e1f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -646,7 +646,6 @@ tgsi_dump_c( struct tgsi_full_declaration fd; uint ignored = flags & TGSI_DUMP_C_IGNORED; uint deflt = flags & TGSI_DUMP_C_DEFAULT; - uint instno = 0; tgsi_parse_init( &parse, tokens ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index cfc7ea8e898..1239f6c0765 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -124,7 +124,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << reg); info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)i); + info->file_max[file] = MAX2(info->file_max[file], (int)reg); if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index f5bd308083b..5df932c0f71 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -23,6 +23,7 @@ util = env.ConvenienceLibrary( 'u_stream_wd.c', 'u_tile.c', 'u_time.c', + 'u_timed_winsys.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 30f161fe3b3..b5eb896b7a1 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1198,7 +1198,7 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - uint dstLevel, zslice; + uint dstLevel, zslice = 0; assert(pt->block.width == 1); assert(pt->block.height == 1); diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 22d552d8e3d..8f502823f99 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -162,7 +162,7 @@ cell_create_context(struct pipe_screen *screen, */ /* This call only works with SDK 3.0. Anyone still using 2.1??? */ cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); - cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0); + cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); if (cell->debug_flags) { printf("Cell: found %d Cell(s) with %u SPUs\n", cell->num_cells, cell->num_spus); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 96a1743fc10..8f3deb482e6 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -75,13 +76,25 @@ struct codegen int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ + int addr_reg; /**< address register, integer values */ + /** Per-instruction temps / intermediate temps */ int num_itemps; int itemps[12]; /** Current IF/ELSE/ENDIF nesting level */ int if_nesting; - /** Index of execution mask register */ + /** Current BGNLOOP/ENDLOOP nesting level */ + int loop_nesting; + /** Location of start of current loop */ + int loop_start; + + /** Index of if/conditional mask register */ + int cond_mask_reg; + /** Index of loop mask register */ + int loop_mask_reg; + + /** Index of master execution mask register */ int exec_mask_reg; /** KIL mask: indicates which fragments have been killed */ @@ -145,10 +158,33 @@ get_const_one_reg(struct codegen *gen) /** - * Return index of the pixel execution mask. + * Return index of the address register. + * Used for indirect register loads/stores. + */ +static int +get_address_reg(struct codegen *gen) +{ + if (gen->addr_reg <= 0) { + gen->addr_reg = spe_allocate_available_register(gen->f); + + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "INIT CONSTANT 1.0:"); + + /* init addr = {0, 0, 0, 0} */ + spe_zero(gen->f, gen->addr_reg); + + spe_indent(gen->f, -4); + } + + return gen->addr_reg; +} + + +/** + * Return index of the master execution mask. * The register is allocated an initialized upon the first call. * - * The pixel execution mask controls which pixels in a quad are + * The master execution mask controls which pixels in a quad are * modified, according to surrounding conditionals, loops, etc. */ static int @@ -157,19 +193,40 @@ get_exec_mask_reg(struct codegen *gen) if (gen->exec_mask_reg <= 0) { gen->exec_mask_reg = spe_allocate_available_register(gen->f); - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:"); - - /* exec_mask = {~0, ~0, ~0, ~0} */ + /* XXX this may not be needed */ + spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); spe_load_int(gen->f, gen->exec_mask_reg, ~0); - - spe_indent(gen->f, -4); } return gen->exec_mask_reg; } +/** Return index of the conditional (if/else) execution mask register */ +static int +get_cond_mask_reg(struct codegen *gen) +{ + if (gen->cond_mask_reg <= 0) { + gen->cond_mask_reg = spe_allocate_available_register(gen->f); + } + + return gen->cond_mask_reg; +} + + +/** Return index of the loop execution mask register */ +static int +get_loop_mask_reg(struct codegen *gen) +{ + if (gen->loop_mask_reg <= 0) { + gen->loop_mask_reg = spe_allocate_available_register(gen->f); + } + + return gen->loop_mask_reg; +} + + + static boolean is_register_src(struct codegen *gen, int channel, const struct tgsi_full_src_register *src) @@ -231,16 +288,22 @@ get_src_reg(struct codegen *gen, spe_xor(gen->f, reg, reg, reg); } else { + int index = src->SrcRegister.Index; + assert(swizzle < 4); + if (src->SrcRegister.Indirect) { + /* XXX unfinished */ + } + switch (src->SrcRegister.File) { case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[src->SrcRegister.Index][swizzle]; + reg = gen->temp_regs[index][swizzle]; break; case TGSI_FILE_INPUT: { /* offset is measured in quadwords, not bytes */ - int offset = src->SrcRegister.Index * 4 + swizzle; + int offset = index * 4 + swizzle; reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ @@ -248,12 +311,12 @@ get_src_reg(struct codegen *gen, } break; case TGSI_FILE_IMMEDIATE: - reg = gen->imm_regs[src->SrcRegister.Index][swizzle]; + reg = gen->imm_regs[index][swizzle]; break; case TGSI_FILE_CONSTANT: { /* offset is measured in quadwords, not bytes */ - int offset = src->SrcRegister.Index * 4 + swizzle; + int offset = index * 4 + swizzle; reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ @@ -322,7 +385,7 @@ get_dst_reg(struct codegen *gen, switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0) + if (gen->if_nesting > 0 || gen->loop_nesting > 0) reg = get_itemp(gen); else reg = gen->temp_regs[dest->DstRegister.Index][channel]; @@ -367,7 +430,7 @@ store_dest_reg(struct codegen *gen, switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0) { + if (gen->if_nesting > 0 || gen->loop_nesting > 0) { int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; int exec_reg = get_exec_mask_reg(gen); /* Mix d with new value according to exec mask: @@ -384,7 +447,7 @@ store_dest_reg(struct codegen *gen, { /* offset is measured in quadwords, not bytes */ int offset = dest->DstRegister.Index * 4 + channel; - if (gen->if_nesting > 0) { + if (gen->if_nesting > 0 || gen->loop_nesting > 0) { int exec_reg = get_exec_mask_reg(gen); int curval_reg = get_itemp(gen); /* First read the current value from memory: @@ -488,96 +551,118 @@ emit_epilogue(struct codegen *gen) } +#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ + for (ch = 0; ch < 4; ch++) \ + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + + +static boolean +emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch = 0, src_reg, addr_reg; + + spe_comment(gen->f, -4, "ARL:"); + + src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + addr_reg = get_address_reg(gen); + + /* convert float to int */ + spe_cflts(gen->f, addr_reg, src_reg, 0); + + free_itemps(gen); + + return TRUE; +} + + static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, src_reg[4], dst_reg[4]; spe_comment(gen->f, -4, "MOV:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && - is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { - /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); - } - else { - spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); - } - free_itemps(gen); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && + is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + /* special-case: register to memory store */ + store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + } + else { + spe_move(gen->f, dst_reg[ch], src_reg[ch]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); } } - return true; + + free_itemps(gen); + + return TRUE; } /** - * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD - * becomes (up to) four SPU "fa" instructions because we're doing SOA - * processing. + * Emit binary operation */ static boolean -emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], d_reg[4]; - spe_comment(gen->f, -4, "ADD:"); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + spe_comment(gen->f, -4, "ADD:"); + break; + case TGSI_OPCODE_SUB: + spe_comment(gen->f, -4, "SUB:"); + break; + case TGSI_OPCODE_MUL: + spe_comment(gen->f, -4, "MUL:"); + break; + default: + assert(0); + } + /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ - for (ch = 0; ch < 4; ch++) { - /* If the dest R, G, B or A writemask is enabled... */ - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); } - /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* Emit actual SPE instruction: d = s1 + s2 */ + + /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* Emit actual SPE instruction: d = s1 + s2 */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - /* Free any intermediate temps we allocated */ - free_itemps(gen); + break; + case TGSI_OPCODE_SUB: + spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_MUL: + spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + default: + ; } } - return true; -} -/** - * Emit subtract. See emit_ADD for comments. - */ -static boolean -emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4]; - spe_comment(gen->f, -4, "SUB:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - } + /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* d = s1 - s2 */ - spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } - } - return true; + + /* Free any intermediate temps we allocated */ + free_itemps(gen); + + return TRUE; } + /** * Emit multiply add. See emit_ADD for comments. */ @@ -586,23 +671,20 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; spe_comment(gen->f, -4, "MAD:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* d = s1 * s2 + s3 */ - spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } - return true; + free_itemps(gen); + return TRUE; } @@ -615,132 +697,108 @@ emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; spe_comment(gen->f, -4, "LERP:"); /* setup/get src/dst/temp regs */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - tmp_reg[ch] = get_itemp(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); } /* d = s3 + s1(s2 - s3) */ /* do all subtracts, then all fma, then all stores to better pipeline */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } free_itemps(gen); - return true; + return TRUE; } + + /** - * Emit multiply. See emit_ADD for comments. + * Emit reciprocal or recip sqrt. */ static boolean -emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch, s1_reg[4], s2_reg[4], d_reg[4]; - spe_comment(gen->f, -4, "MUL:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - } + int ch, s1_reg[4], d_reg[4], tmp_reg[4]; + + if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { + spe_comment(gen->f, -4, "RCP:"); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* d = s1 * s2 */ - spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + else { + assert(inst->Instruction.Opcode == TGSI_OPCODE_RSQ); + spe_comment(gen->f, -4, "RSQ:"); } - return true; -} -/** - * Emit reciprocal. See emit_ADD for comments. - */ -static boolean -emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - spe_comment(gen->f, -4, "RCP:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* d = 1/s1 */ - spe_frest(gen->f, d_reg, s1_reg); - spe_fi(gen->f, d_reg, s1_reg, d_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); } - return true; -} -/** - * Emit reciprocal sqrt. See emit_ADD for comments. - */ -static boolean -emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - spe_comment(gen->f, -4, "RSQ:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* d = 1/s1 */ - spe_frsqest(gen->f, d_reg, s1_reg); - spe_fi(gen->f, d_reg, s1_reg, d_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { + /* tmp = 1/s1 */ + spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); + } + else { + /* tmp = 1/sqrt(s1) */ + spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); } } - return true; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* d = float_interp(s1, tmp) */ + spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; } + /** * Emit absolute value. See emit_ADD for comments. */ static boolean emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], d_reg[4]; + const int bit31mask_reg = get_itemp(gen); + spe_comment(gen->f, -4, "ABS:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - const int bit31mask_reg = get_itemp(gen); - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - /* d = sign bit cleared in s1 */ - spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + /* d = sign bit cleared in s1 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); } - return true; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; } /** @@ -775,16 +833,14 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) /* t0 = t0 + t1 */ spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } free_itemps(gen); - return true; + return TRUE; } /** @@ -824,16 +880,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) /* t0 = t0 + t1 */ spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } free_itemps(gen); - return true; + return TRUE; } /** @@ -867,16 +921,14 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) /* t = w1 + t */ spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } free_itemps(gen); - return true; + return TRUE; } /** @@ -911,17 +963,15 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_frsqest(gen->f, t1_reg, t0_reg); spe_fi(gen->f, t1_reg, t0_reg, t1_reg); - for (ch = 0; ch < 3; ch++) { /* NOTE: omit W channel */ - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* dst = src[ch] * t1 */ - spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* dst = src[ch] * t1 */ + spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } free_itemps(gen); - return true; + return TRUE; } @@ -978,201 +1028,101 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) } free_itemps(gen); - return true; + return TRUE; } + /** - * Emit set-if-greater-than. + * Emit inequality instruction. * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as * the result but OpenGL/TGSI needs 0.0 and 1.0 results. * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. */ static boolean -emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; + bool complement = FALSE; - spe_comment(gen->f, -4, "SGT:"); - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 > s2) */ - spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & one_reg */ - spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); - - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SGT: + spe_comment(gen->f, -4, "SGT:"); + break; + case TGSI_OPCODE_SLT: + spe_comment(gen->f, -4, "SLT:"); + break; + case TGSI_OPCODE_SGE: + spe_comment(gen->f, -4, "SGE:"); + complement = TRUE; + break; + case TGSI_OPCODE_SLE: + spe_comment(gen->f, -4, "SLE:"); + complement = TRUE; + break; + case TGSI_OPCODE_SEQ: + spe_comment(gen->f, -4, "SEQ:"); + break; + case TGSI_OPCODE_SNE: + spe_comment(gen->f, -4, "SNE:"); + complement = TRUE; + break; + default: + ; } - return true; -} + one_reg = get_const_one_reg(gen); -/** - * Emit set-if_less-then. See emit_SGT for comments. - */ -static boolean -emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - spe_comment(gen->f, -4, "SLT:"); - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 < s2) */ - spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & one_reg */ - spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); - - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); } - return true; -} - -/** - * Emit set-if_greater-then-or-equal. See emit_SGT for comments. - */ -static boolean -emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - spe_comment(gen->f, -4, "SGE:"); - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 >= s2) */ - spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = ~d & one_reg */ - spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg); - - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SGT: + spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SLT: + spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + break; + case TGSI_OPCODE_SGE: + spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + break; + case TGSI_OPCODE_SLE: + spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SEQ: + spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SNE: + spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + default: + assert(0); } } - return true; -} - -/** - * Emit set-if_less-then-or-equal. See emit_SGT for comments. - */ -static boolean -emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - spe_comment(gen->f, -4, "SLE:"); - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 <= s2) */ - spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = ~d & one_reg */ - spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg); - - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* d = d & one_reg */ + if (complement) + spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); + else + spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); } - return true; -} - -/** - * Emit set-if_equal. See emit_SGT for comments. - */ -static boolean -emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - spe_comment(gen->f, -4, "SEQ:"); - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 == s2) */ - spe_fceq(gen->f, d_reg, s1_reg, s2_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & one_reg */ - spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); - - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } - return true; + free_itemps(gen); + return TRUE; } -/** - * Emit set-if_not_equal. See emit_SGT for comments. - */ -static boolean -emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - spe_comment(gen->f, -4, "SNE:"); - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 != s2) */ - spe_fceq(gen->f, d_reg, s1_reg, s2_reg); - spe_nor(gen->f, d_reg, d_reg, d_reg); - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & one_reg */ - spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); - - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } - } - - return true; -} /** - * Emit compare. See emit_SGT for comments. + * Emit compare. */ static boolean emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) @@ -1181,26 +1131,24 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_comment(gen->f, -4, "CMP:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int zero_reg = get_itemp(gen); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int zero_reg = get_itemp(gen); - spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + spe_zero(gen->f, zero_reg); - /* d = (s1 < 0) ? s2 : s3 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); + /* d = (s1 < 0) ? s2 : s3 */ + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); } - return true; + return TRUE; } /** @@ -1211,29 +1159,34 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], d_reg[4]; spe_comment(gen->f, -4, "TRUNC:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } - /* Convert float to int */ - spe_cflts(gen->f, d_reg, s1_reg, 0); + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); + } - /* Convert int to float */ - spe_csflt(gen->f, d_reg, d_reg, 0); + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } - return true; + free_itemps(gen); + return TRUE; } + /** * Emit floor. * If negative int subtract one @@ -1243,77 +1196,103 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; spe_comment(gen->f, -4, "FLR:"); - int zero_reg = get_itemp(gen); - spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + one_reg = get_const_one_reg(gen); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int tmp_reg = get_itemp(gen); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } - /* If negative, subtract 1.0 */ - spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg); - spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg); - spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg); + /* If negative, subtract 1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); + } - /* Convert float to int */ - spe_cflts(gen->f, tmp_reg, tmp_reg, 0); + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } - /* Convert int to float */ - spe_csflt(gen->f, d_reg, tmp_reg, 0); + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } - return true; + free_itemps(gen); + return TRUE; } + /** * Compute frac = Input - FLR(Input) */ static boolean emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) { - int ch; + int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; spe_comment(gen->f, -4, "FRC:"); - int zero_reg = get_itemp(gen); - spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + one_reg = get_const_one_reg(gen); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int tmp_reg = get_itemp(gen); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } - /* If negative, subtract 1.0 */ - spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg); - spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg); - spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg); + /* If negative, subtract 1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); + } - /* Convert float to int */ - spe_cflts(gen->f, tmp_reg, tmp_reg, 0); + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } - /* Convert int to float */ - spe_csflt(gen->f, tmp_reg, tmp_reg, 0); + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } - /* d = s1 - FLR(s1) */ - spe_fs(gen->f, d_reg, s1_reg, tmp_reg); + /* d = s1 - FLR(s1) */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + /* store result */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } - return true; + free_itemps(gen); + return TRUE; } @@ -1379,73 +1358,71 @@ emit_function_call(struct codegen *gen, retval_reg = spe_allocate_available_register(gen->f); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg; - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - if (!scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + if (!scalar) { + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); } + } - d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - if (!scalar || !func_called) { - /* for a scalar function, we'll really only call the function once */ + if (!scalar || !func_called) { + /* for a scalar function, we'll really only call the function once */ - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } - /* setup function arguments */ - for (a = 0; a < num_args; a++) { - spe_move(gen->f, 3 + a, s_regs[a]); - } + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return value */ - if (scalar) - spe_move(gen->f, retval_reg, 3); - else - spe_move(gen->f, d_reg, 3); - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_reg && reg != retval_reg) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); - func_called = TRUE; - } + /* save function's return value */ + if (scalar) + spe_move(gen->f, retval_reg, 3); + else + spe_move(gen->f, d_reg, 3); - if (scalar) { - spe_move(gen->f, d_reg, retval_reg); + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg && reg != retval_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + func_called = TRUE; + } + + if (scalar) { + spe_move(gen->f, d_reg, retval_reg); } + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); } if (scalar) { spe_release_register(gen->f, retval_reg); } - return true; + return TRUE; } @@ -1525,11 +1502,9 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) } } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); - free_itemps(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); } return TRUE; @@ -1549,35 +1524,31 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) /* zero = {0,0,0,0} */ zero_reg = get_itemp(gen); - spe_load_uint(gen->f, zero_reg, 0); + spe_zero(gen->f, zero_reg); cmp_reg = get_itemp(gen); /* get src regs */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); } /* test if any src regs are < 0 */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - if (kil_reg >= 0) { - /* cmp = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); - /* kil = kil | cmp */ - spe_or(gen->f, kil_reg, kil_reg, cmp_reg); - } - else { - kil_reg = get_itemp(gen); - /* kil = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (kil_reg >= 0) { + /* cmp = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); + /* kil = kil | cmp */ + spe_or(gen->f, kil_reg, kil_reg, cmp_reg); + } + else { + kil_reg = get_itemp(gen); + /* kil = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); } } - if (gen->if_nesting) { + if (gen->if_nesting || gen->loop_nesting) { /* may have been a conditional kil */ spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); } @@ -1599,96 +1570,92 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) /** - * Emit max. See emit_SGT for comments. + * Emit min or max. */ static boolean -emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; spe_comment(gen->f, -4, "MAX:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - tmp_reg[ch] = get_itemp(gen); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); } /* d = (s0 > s1) ? s0 : s1 */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); - } + else + spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); } free_itemps(gen); - return true; + return TRUE; } + /** - * Emit max. See emit_SGT for comments. + * Emit code to update the execution mask. + * This needs to be done whenever the execution status of a conditional + * or loop is changed. */ -static boolean -emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) +static void +emit_update_exec_mask(struct codegen *gen) { - int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; + const int exec_reg = get_exec_mask_reg(gen); + const int cond_reg = gen->cond_mask_reg; + const int loop_reg = gen->loop_mask_reg; - spe_comment(gen->f, -4, "MIN:"); + spe_comment(gen->f, 0, "Update master execution mask"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - tmp_reg[ch] = get_itemp(gen); - } + if (gen->if_nesting > 0 && gen->loop_nesting > 0) { + /* exec_mask = cond_mask & loop_mask */ + assert(cond_reg > 0); + assert(loop_reg > 0); + spe_and(gen->f, exec_reg, cond_reg, loop_reg); } - - /* d = (s1 > s0) ? s0 : s1 */ - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); - } + else if (gen->if_nesting > 0) { + assert(cond_reg > 0); + spe_move(gen->f, exec_reg, cond_reg); } - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); - } + else if (gen->loop_nesting > 0) { + assert(loop_reg > 0); + spe_move(gen->f, exec_reg, loop_reg); } - - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); - } + else { + spe_load_int(gen->f, exec_reg, ~0x0); } - - free_itemps(gen); - return true; } + static boolean emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) { const int channel = 0; - const int exec_reg = get_exec_mask_reg(gen); + int cond_reg; spe_comment(gen->f, -4, "IF:"); - /* update execution mask with the predicate register */ + cond_reg = get_cond_mask_reg(gen); + + /* XXX push cond exec mask */ + + spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); + spe_load_int(gen->f, cond_reg, ~0); + + /* update conditional execution mask with the predicate register */ int tmp_reg = get_itemp(gen); int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); @@ -1696,44 +1663,126 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_ceqi(gen->f, tmp_reg, s1_reg, 0); /* tmp = !tmp */ spe_complement(gen->f, tmp_reg, tmp_reg); - /* exec_mask = exec_mask & tmp */ - spe_and(gen->f, exec_reg, exec_reg, tmp_reg); + /* cond_mask = cond_mask & tmp */ + spe_and(gen->f, cond_reg, cond_reg, tmp_reg); gen->if_nesting++; + /* update the master execution mask */ + emit_update_exec_mask(gen); + free_itemps(gen); - return true; + return TRUE; } static boolean emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) { - const int exec_reg = get_exec_mask_reg(gen); + const int cond_reg = get_cond_mask_reg(gen); spe_comment(gen->f, -4, "ELSE:"); - /* exec_mask = !exec_mask */ - spe_complement(gen->f, exec_reg, exec_reg); + spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); + spe_complement(gen->f, cond_reg, cond_reg); + emit_update_exec_mask(gen); - return true; + return TRUE; } static boolean emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) { + spe_comment(gen->f, -4, "ENDIF:"); + + /* XXX todo: pop cond exec mask */ + + gen->if_nesting--; + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int exec_reg, loop_reg; + + spe_comment(gen->f, -4, "BGNLOOP:"); + + exec_reg = get_exec_mask_reg(gen); + loop_reg = get_loop_mask_reg(gen); + + /* XXX push loop_exec mask */ + + spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); + spe_load_int(gen->f, loop_reg, ~0x0); + + gen->loop_nesting++; + gen->loop_start = spe_code_size(gen->f); /* in bytes */ + + return TRUE; +} + + +static boolean +emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int loop_reg = get_loop_mask_reg(gen); + const int tmp_reg = get_itemp(gen); + int offset; + + spe_comment(gen->f, -4, "ENDLOOP:"); + + /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ + spe_orx(gen->f, tmp_reg, loop_reg); + + offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ + + /* branch back to top of loop if tmp_reg != 0 */ + spe_brnz(gen->f, tmp_reg, offset / 4); + + /* XXX pop loop_exec mask */ + + gen->loop_nesting--; + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ const int exec_reg = get_exec_mask_reg(gen); + const int loop_reg = get_loop_mask_reg(gen); - spe_comment(gen->f, -4, "ENDIF:"); + spe_comment(gen->f, -4, "BREAK:"); - /* XXX todo: pop execution mask */ + assert(gen->loop_nesting > 0); - spe_load_int(gen->f, exec_reg, ~0x0); + spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); + spe_andc(gen->f, loop_reg, loop_reg, exec_reg); - gen->if_nesting--; - return true; + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + spe_comment(gen->f, -4, "CONT:"); + + assert(gen->loop_nesting > 0); + + return TRUE; } @@ -1745,28 +1794,26 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:"); - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int t1_reg = get_itemp(gen); - int t2_reg = get_itemp(gen); + int t1_reg = get_itemp(gen); + int t2_reg = get_itemp(gen); - spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ - if (ddx) { - spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ - } - else { - spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ - } - spe_fs(gen->f, d_reg, t2_reg, t1_reg); - - free_itemps(gen); + spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ + if (ddx) { + spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ + } + else { + spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ } + spe_fs(gen->f, d_reg, t2_reg, t1_reg); + + free_itemps(gen); } - return true; + return TRUE; } @@ -1784,7 +1831,7 @@ emit_END(struct codegen *gen) { spe_comment(gen->f, -4, "END:"); emit_epilogue(gen); - return true; + return TRUE; } @@ -1796,15 +1843,15 @@ emit_instruction(struct codegen *gen, const struct tgsi_full_instruction *inst) { switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + return emit_ARL(gen, inst); case TGSI_OPCODE_MOV: case TGSI_OPCODE_SWZ: return emit_MOV(gen, inst); - case TGSI_OPCODE_MUL: - return emit_MUL(gen, inst); case TGSI_OPCODE_ADD: - return emit_ADD(gen, inst); case TGSI_OPCODE_SUB: - return emit_SUB(gen, inst); + case TGSI_OPCODE_MUL: + return emit_binop(gen, inst); case TGSI_OPCODE_MAD: return emit_MAD(gen, inst); case TGSI_OPCODE_LERP: @@ -1820,29 +1867,22 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_XPD: return emit_XPD(gen, inst); case TGSI_OPCODE_RCP: - return emit_RCP(gen, inst); case TGSI_OPCODE_RSQ: - return emit_RSQ(gen, inst); + return emit_RCP_RSQ(gen, inst); case TGSI_OPCODE_ABS: return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: - return emit_SGT(gen, inst); case TGSI_OPCODE_SLT: - return emit_SLT(gen, inst); case TGSI_OPCODE_SGE: - return emit_SGE(gen, inst); case TGSI_OPCODE_SLE: - return emit_SLE(gen, inst); case TGSI_OPCODE_SEQ: - return emit_SEQ(gen, inst); case TGSI_OPCODE_SNE: - return emit_SNE(gen, inst); + return emit_inequality(gen, inst); case TGSI_OPCODE_CMP: return emit_CMP(gen, inst); - case TGSI_OPCODE_MAX: - return emit_MAX(gen, inst); case TGSI_OPCODE_MIN: - return emit_MIN(gen, inst); + case TGSI_OPCODE_MAX: + return emit_MIN_MAX(gen, inst); case TGSI_OPCODE_TRUNC: return emit_TRUNC(gen, inst); case TGSI_OPCODE_FLR: @@ -1882,20 +1922,29 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_ENDIF: return emit_ENDIF(gen, inst); + case TGSI_OPCODE_BGNLOOP2: + return emit_BGNLOOP(gen, inst); + case TGSI_OPCODE_ENDLOOP2: + return emit_ENDLOOP(gen, inst); + case TGSI_OPCODE_BRK: + return emit_BRK(gen, inst); + case TGSI_OPCODE_CONT: + return emit_CONT(gen, inst); + case TGSI_OPCODE_DDX: - return emit_DDX_DDY(gen, inst, true); + return emit_DDX_DDY(gen, inst, TRUE); case TGSI_OPCODE_DDY: - return emit_DDX_DDY(gen, inst, false); + return emit_DDX_DDY(gen, inst, FALSE); /* XXX lots more cases to do... */ default: fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", inst->Instruction.Opcode); - return false; + return FALSE; } - return true; + return TRUE; } @@ -1923,10 +1972,14 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; } else { + char str[100]; int reg = spe_allocate_available_register(gen->f); if (reg < 0) - return false; + return FALSE; + + sprintf(str, "init $%d = %f", reg, val); + spe_comment(gen->f, 0, str); /* update immediate map */ gen->imm_regs[gen->num_imm][ch] = reg; @@ -1938,7 +1991,7 @@ emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) gen->num_imm++; - return true; + return TRUE; } @@ -1963,7 +2016,7 @@ emit_declaration(struct cell_context *cell, for (ch = 0; ch < 4; ch++) { gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); if (gen->temp_regs[i][ch] < 0) - return false; /* out of regs */ + return FALSE; /* out of regs */ } /* XXX if we run out of SPE registers, we need to spill @@ -1983,7 +2036,7 @@ emit_declaration(struct cell_context *cell, ; /* ignore */ } - return true; + return TRUE; } @@ -2019,7 +2072,7 @@ cell_gen_fragment_program(struct cell_context *cell, spe_allocate_register(f, gen.constants_reg); if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, true); + spe_print_code(f, TRUE); spe_indent(f, 8); printf("Begin %s\n", __FUNCTION__); tgsi_dump(tokens, 0); @@ -2035,17 +2088,17 @@ cell_gen_fragment_program(struct cell_context *cell, switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) - gen.error = true; + gen.error = TRUE; break; case TGSI_TOKEN_TYPE_DECLARATION: if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) - gen.error = true; + gen.error = TRUE; break; case TGSI_TOKEN_TYPE_INSTRUCTION: if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) - gen.error = true; + gen.error = TRUE; break; default: diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index d2235579507..6fc2257e2a3 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -81,8 +81,12 @@ cell_get_param(struct pipe_screen *screen, int param) return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return CELL_MAX_TEXTURE_LEVELS; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; /* XXX not really true */ + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; /* XXX to do */ default: - return 10; + return 0; } } @@ -108,7 +112,7 @@ cell_get_paramf(struct pipe_screen *screen, int param) return 16.0; /* arbitrary */ default: - return 10; + return 0; } } diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h new file mode 100644 index 00000000000..7cbdb814d28 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_shuffle.h @@ -0,0 +1,186 @@ +#ifndef SPU_SHUFFLE_H +#define SPU_SHUFFLE_H + +/* + * Generate shuffle patterns with minimal fuss. + * + * Based on ideas from + * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf + * + * A-P indicates 0-15th position in first vector + * a-p indicates 0-15th position in second vector + * + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | A| B| C| D| E| F| G| H| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * + * x or X indicates 0xff + * 8 indicates 0x80 + * 0 indicates 0x00 + * + * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector + * unsigned char literal suitable for use with spu_shuffle(). + * + * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector + * literal suitable for use with si_shufb(). + * + * + * For example : + * SHUFB4(A,A,A,A) + * expands to : + * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) + * + * SHUFFLE8(A,B,a,b,C,c,8,8) + * expands to : + * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, + * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) + * + */ + +#include <spu_intrinsics.h> + +#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03 +#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07 +#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b +#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f +#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13 +#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17 +#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b +#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f +#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80 +#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0 + +#define SHUFFLE_VECTOR_4__(A, B, C, D) \ + SHUFFLE_PATTERN_4_##A##__, \ + SHUFFLE_PATTERN_4_##B##__, \ + SHUFFLE_PATTERN_4_##C##__, \ + SHUFFLE_PATTERN_4_##D##__ + +#define SHUFFLE4(A, B, C, D) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_4__(A, B, C, D) \ + }) + +#define SHUFB4(A, B, C, D) \ + ((const qword){ \ + SHUFFLE_VECTOR_4__(A, B, C, D) \ + }) + + +#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01 +#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03 +#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05 +#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07 +#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09 +#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b +#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d +#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f +#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11 +#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13 +#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15 +#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17 +#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19 +#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b +#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d +#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f +#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80 +#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0 + + +#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + SHUFFLE_PATTERN_8_##A##__, \ + SHUFFLE_PATTERN_8_##B##__, \ + SHUFFLE_PATTERN_8_##C##__, \ + SHUFFLE_PATTERN_8_##D##__, \ + SHUFFLE_PATTERN_8_##E##__, \ + SHUFFLE_PATTERN_8_##F##__, \ + SHUFFLE_PATTERN_8_##G##__, \ + SHUFFLE_PATTERN_8_##H##__ + +#define SHUFFLE8(A, B, C, D, E, F, G, H) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + }) + +#define SHUFB8(A, B, C, D, E, F, G, H) \ + ((const qword){ \ + SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + }) + + +#define SHUFFLE_PATTERN_16_A__ 0x00 +#define SHUFFLE_PATTERN_16_B__ 0x01 +#define SHUFFLE_PATTERN_16_C__ 0x02 +#define SHUFFLE_PATTERN_16_D__ 0x03 +#define SHUFFLE_PATTERN_16_E__ 0x04 +#define SHUFFLE_PATTERN_16_F__ 0x05 +#define SHUFFLE_PATTERN_16_G__ 0x06 +#define SHUFFLE_PATTERN_16_H__ 0x07 +#define SHUFFLE_PATTERN_16_I__ 0x08 +#define SHUFFLE_PATTERN_16_J__ 0x09 +#define SHUFFLE_PATTERN_16_K__ 0x0a +#define SHUFFLE_PATTERN_16_L__ 0x0b +#define SHUFFLE_PATTERN_16_M__ 0x0c +#define SHUFFLE_PATTERN_16_N__ 0x0d +#define SHUFFLE_PATTERN_16_O__ 0x0e +#define SHUFFLE_PATTERN_16_P__ 0x0f +#define SHUFFLE_PATTERN_16_a__ 0x10 +#define SHUFFLE_PATTERN_16_b__ 0x11 +#define SHUFFLE_PATTERN_16_c__ 0x12 +#define SHUFFLE_PATTERN_16_d__ 0x13 +#define SHUFFLE_PATTERN_16_e__ 0x14 +#define SHUFFLE_PATTERN_16_f__ 0x15 +#define SHUFFLE_PATTERN_16_g__ 0x16 +#define SHUFFLE_PATTERN_16_h__ 0x17 +#define SHUFFLE_PATTERN_16_i__ 0x18 +#define SHUFFLE_PATTERN_16_j__ 0x19 +#define SHUFFLE_PATTERN_16_k__ 0x1a +#define SHUFFLE_PATTERN_16_l__ 0x1b +#define SHUFFLE_PATTERN_16_m__ 0x1c +#define SHUFFLE_PATTERN_16_n__ 0x1d +#define SHUFFLE_PATTERN_16_o__ 0x1e +#define SHUFFLE_PATTERN_16_p__ 0x1f +#define SHUFFLE_PATTERN_16_X__ 0xc0 +#define SHUFFLE_PATTERN_16_x__ 0xc0 +#define SHUFFLE_PATTERN_16_0__ 0x80 +#define SHUFFLE_PATTERN_16_8__ 0xe0 + +#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + SHUFFLE_PATTERN_16_##A##__, \ + SHUFFLE_PATTERN_16_##B##__, \ + SHUFFLE_PATTERN_16_##C##__, \ + SHUFFLE_PATTERN_16_##D##__, \ + SHUFFLE_PATTERN_16_##E##__, \ + SHUFFLE_PATTERN_16_##F##__, \ + SHUFFLE_PATTERN_16_##G##__, \ + SHUFFLE_PATTERN_16_##H##__, \ + SHUFFLE_PATTERN_16_##I##__, \ + SHUFFLE_PATTERN_16_##J##__, \ + SHUFFLE_PATTERN_16_##K##__, \ + SHUFFLE_PATTERN_16_##L##__, \ + SHUFFLE_PATTERN_16_##M##__, \ + SHUFFLE_PATTERN_16_##N##__, \ + SHUFFLE_PATTERN_16_##O##__, \ + SHUFFLE_PATTERN_16_##P + +#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + }) + +#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + ((const qword){ \ + SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + }) + +#endif diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 22e51a86ae5..322be1252e9 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -35,6 +35,7 @@ #include "util/u_math.h" #include "spu_colorpack.h" #include "spu_main.h" +#include "spu_shuffle.h" #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" @@ -76,8 +77,13 @@ struct vertex_header { * Triangle edge info */ struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ + union { + struct { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + }; + vec_float4 ds; /**< vector accessor for dx and dy */ + }; float dxdy; /**< dx/dy */ float sx, sy; /**< first sample point coord */ int lines; /**< number of lines on this edge */ @@ -102,10 +108,15 @@ struct setup_stage { * turn. Currently fixed at 4 floats, but should change in time. * Codegen will help cope with this. */ - const struct vertex_header *vmax; - const struct vertex_header *vmid; - const struct vertex_header *vmin; - const struct vertex_header *vprovoke; + union { + struct { + const struct vertex_header *vmin; + const struct vertex_header *vmid; + const struct vertex_header *vmax; + const struct vertex_header *vprovoke; + }; + qword vertex_headers; + }; struct edge ebot; struct edge etop; @@ -122,8 +133,7 @@ struct setup_stage { struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; + vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ int y; unsigned y_flags; unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ @@ -306,52 +316,35 @@ block(int x) /** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * The mask is a uint4 vector and each element will be 0 or 0xffffffff. - */ -static INLINE mask_t -calculate_mask(int x) -{ - /* This is a little tricky. - * Use & instead of && to avoid branches. - * Use negation to convert true/false to ~0/0 values. - */ - mask_t mask; - mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); - mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); - mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); - mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); - return mask; -} - - -/** * Render a horizontal span of quads */ static void flush_spans(void) { int minleft, maxright; - int x; + + const int l0 = spu_extract(setup.span.quad, 0); + const int l1 = spu_extract(setup.span.quad, 1); + const int r0 = spu_extract(setup.span.quad, 2); + const int r1 = spu_extract(setup.span.quad, 3); switch (setup.span.y_flags) { case 0x3: /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup.span.left[0], setup.span.left[1]); - maxright = MAX2(setup.span.right[0], setup.span.right[1]); + minleft = MIN2(l0, l1); + maxright = MAX2(r0, r1); break; case 0x1: /* only even line written (quad top row) */ - minleft = setup.span.left[0]; - maxright = setup.span.right[0]; + minleft = l0; + maxright = r0; break; case 0x2: /* only odd line written (quad bottom row) */ - minleft = setup.span.left[1]; - maxright = setup.span.right[1]; + minleft = l1; + maxright = r1; break; default: @@ -389,17 +382,42 @@ flush_spans(void) ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); } - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { - emit_quad( x, setup.span.y, calculate_mask( x )); + /* XXX this loop could be moved into the above switch cases... */ + + /* Setup for mask calculation */ + const vec_int4 quad_LlRr = setup.span.quad; + const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); + const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); + const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); + + const vec_int4 twos = spu_splats(2); + + const int x = block(minleft); + vec_int4 xs = {x, x+1, x, x+1}; + + for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { + /** + * Computes mask to indicate which pixels in the 2x2 quad are actually + * inside the triangle's bounds. + */ + + /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ + const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); + const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); + + /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ + const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); + + /* Combine results to create mask */ + const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); + + emit_quad(spu_extract(xs, 0), setup.span.y, mask); } setup.span.y = 0; setup.span.y_flags = 0; - setup.span.right[0] = 0; - setup.span.right[1] = 0; + /* Zero right elements */ + setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); } @@ -444,55 +462,39 @@ setup_sort_vertices(const struct vertex_header *v0, /* determine bottom to top order of vertices */ { - float y0 = spu_extract(v0->data[0], 1); - float y1 = spu_extract(v1->data[0], 1); - float y2 = spu_extract(v2->data[0], 1); - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup.vmin = v0; - setup.vmid = v1; - setup.vmax = v2; - sign = -1.0f; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup.vmin = v2; - setup.vmid = v0; - setup.vmax = v1; - sign = -1.0f; - } - else { - /* y0<=y2<=y1 */ - setup.vmin = v0; - setup.vmid = v2; - setup.vmax = v1; - sign = 1.0f; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup.vmin = v1; - setup.vmid = v0; - setup.vmax = v2; - sign = 1.0f; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup.vmin = v2; - setup.vmid = v1; - setup.vmax = v0; - sign = 1.0f; - } - else { - /* y1<=y2<=y0 */ - setup.vmin = v1; - setup.vmid = v2; - setup.vmax = v0; - sign = -1.0f; - } - } + /* A table of shuffle patterns for putting vertex_header pointers into + correct order. Quite magical. */ + const vec_uchar16 sort_order_patterns[] = { + SHUFFLE4(A,B,C,C), + SHUFFLE4(C,A,B,C), + SHUFFLE4(A,C,B,C), + SHUFFLE4(B,C,A,C), + SHUFFLE4(B,A,C,C), + SHUFFLE4(C,B,A,C) }; + + /* The vertex_header pointers, packed for easy shuffling later */ + const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2}; + + /* Collate y values into two vectors for comparison. + Using only one shuffle constant! ;) */ + const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C)); + + /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ + const vec_uint4 compare = spu_cmpgt(y_012, y_120); + /* Compress the result of the comparison into 4 bits */ + const vec_uint4 gather = spu_gather(compare); + /* Subtract one to attain the index into the LUT. Magical. */ + const unsigned int index = spu_extract(gather, 0) - 1; + + /* Load the appropriate pattern and construct the desired vector. */ + setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]); + + /* Using the result of the comparison, set sign. + Very magical. */ + sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f); } /* Check if triangle is completely outside the tile bounds */ @@ -509,12 +511,9 @@ setup_sort_vertices(const struct vertex_header *v0, spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) return FALSE; - setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); - setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); - setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); - setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); - setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); - setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); + setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); + setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); + setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); /* * Compute triangle's area. Use 1/area to compute partial @@ -535,8 +534,6 @@ setup_sort_vertices(const struct vertex_header *v0, setup.facing = (area * sign > 0.0f) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); - setup.vprovoke = v2; - return TRUE; } @@ -746,9 +743,11 @@ subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) setup.span.y = block(_y); } - setup.span.left[_y&1] = left; - setup.span.right[_y&1] = right; - setup.span.y_flags |= 1<<(_y&1); + int offset = _y&1; + vec_int4 quad_LlRr = {left, left, right, right}; + /* Store left and right in 0 or 1 row of quad based on offset */ + setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset)); + setup.span.y_flags |= 1<<offset; } } @@ -790,8 +789,8 @@ tri_draw(const float *v0, const float *v1, const float *v2, setup.span.y = 0; setup.span.y_flags = 0; - setup.span.right[0] = 0; - setup.span.right[1] = 0; + /* Zero right elements */ + setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); if (setup.oneOverArea < 0.0) { /* emaj on left */ diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 4fda1ab64f5..a8e97e7c306 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -197,9 +197,7 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, i915_render->fallback = 0; return TRUE; default: - assert((int)"Error unkown primtive type" & 0); - /* Actually, can handle a lot more just fine... Fixme. - */ + /* FIXME: Actually, can handle a lot more just fine... */ return FALSE; } } diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c index d40d75f2640..2f974cf5c40 100644 --- a/src/gallium/drivers/nv30/nv30_query.c +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -50,7 +50,7 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) * the existing query to notify completion, but it could be better. */ if (q->object) { - uint64 tmp; + uint64_t tmp; pipe->get_query_result(pipe, pq, 1, &tmp); } @@ -80,7 +80,7 @@ nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) static boolean nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64 *result) + boolean wait, uint64_t *result) { struct nv30_context *nv30 = nv30_context(pipe); struct nv30_query *q = nv30_query(pq); diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 40fed621b24..9480695d6e5 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -49,7 +49,7 @@ nv30_state_emit(struct nv30_context *nv30) struct nv30_state *state = &nv30->state; struct nv30_screen *screen = nv30->screen; unsigned i, samplers; - uint64 states; + uint64_t states; if (nv30->pctx_id != screen->cur_pctx) { for (i = 0; i < NV30_STATE_MAX; i++) { diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c index 57f39cfab0c..9b9a43f49df 100644 --- a/src/gallium/drivers/nv40/nv40_query.c +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -50,7 +50,7 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) * the existing query to notify completion, but it could be better. */ if (q->object) { - uint64 tmp; + uint64_t tmp; pipe->get_query_result(pipe, pq, 1, &tmp); } @@ -80,7 +80,7 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) static boolean nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64 *result) + boolean wait, uint64_t *result) { struct nv40_context *nv40 = nv40_context(pipe); struct nv40_query *q = nv40_query(pq); diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index ab88dc416e5..52ec4c044b4 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -65,7 +65,7 @@ nv40_state_emit(struct nv40_context *nv40) struct nv40_state *state = &nv40->state; struct nv40_screen *screen = nv40->screen; unsigned i, samplers; - uint64 states; + uint64_t states; if (nv40->pctx_id != screen->cur_pctx) { for (i = 0; i < NV40_STATE_MAX; i++) { diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 5d377f2d067..0958bba334a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -70,6 +70,10 @@ struct nv50_rasterizer_stateobj { struct nv50_miptree { struct pipe_texture base; struct pipe_buffer *buffer; + + int *image_offset; + int image_nr; + int total_size; }; static INLINE struct nv50_miptree * diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 28a8bdc0fab..24973712324 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -31,7 +31,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { struct pipe_winsys *ws = pscreen->winsys; struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); - unsigned usage, pitch; + unsigned usage, width = pt->width[0], height = pt->height[0]; + int i; mt->base = *pt; mt->base.refcount = 1; @@ -47,11 +48,31 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) break; } - pitch = ((pt->width[0] + 63) & ~63) * pt->block.size; - /*XXX*/ - pitch *= 2; + switch (pt->target) { + case PIPE_TEXTURE_3D: + mt->image_nr = pt->depth[0]; + break; + case PIPE_TEXTURE_CUBE: + mt->image_nr = 6; + break; + default: + mt->image_nr = 1; + break; + } + mt->image_offset = CALLOC(mt->image_nr, sizeof(int)); - mt->buffer = ws->buffer_create(ws, 256, usage, pitch * pt->height[0]); + for (i = 0; i < mt->image_nr; i++) { + int image_size; + + image_size = align(width, 8) * pt->block.size; + image_size = align(image_size, 64); + image_size *= align(height, 8) * pt->block.size; + + mt->image_offset[i] = mt->total_size; + mt->total_size += image_size; + } + + mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; @@ -83,6 +104,15 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_surface *s; struct pipe_surface *ps; + int img; + + if (pt->target == PIPE_TEXTURE_CUBE) + img = face; + else + if (pt->target == PIPE_TEXTURE_3D) + img = zslice; + else + img = 0; s = CALLOC_STRUCT(nv50_surface); if (!s) @@ -98,7 +128,7 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->nblocksx = pt->nblocksx[level]; ps->nblocksy = pt->nblocksy[level]; ps->stride = ps->width * ps->block.size; - ps->offset = 0; + ps->offset = mt->image_offset[img]; ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d6fbdd18243..d66e1d0949d 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -179,6 +179,38 @@ free_temp(struct nv50_pc *pc, struct nv50_reg *r) } } +static int +alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) +{ + int i; + + if ((idx + 4) >= NV50_SU_MAX_TEMP) + return 1; + + if (pc->r_temp[idx] || pc->r_temp[idx + 1] || + pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) + return alloc_temp4(pc, dst, idx + 1); + + for (i = 0; i < 4; i++) { + dst[i] = CALLOC_STRUCT(nv50_reg); + dst[i]->type = P_TEMP; + dst[i]->index = -1; + dst[i]->hw = idx + i; + pc->r_temp[idx + i] = dst[i]; + } + + return 0; +} + +static void +free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) +{ + int i; + + for (i = 0; i < 4; i++) + free_temp(pc, reg[i]); +} + static struct nv50_reg * temp_temp(struct nv50_pc *pc) { @@ -902,7 +934,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) { const struct tgsi_full_instruction *inst = &tok->FullInstruction; struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; - unsigned mask, sat; + unsigned mask, sat, unit; int i, c; mask = inst->FullDstRegisters[0].DstRegister.WriteMask; @@ -916,8 +948,13 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + + if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) + unit = fs->SrcRegister.Index; + for (c = 0; c < 4; c++) - src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); + src[i][c] = tgsi_src(pc, c, fs); } if (sat) { @@ -1155,35 +1192,30 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } break; case TGSI_OPCODE_TEX: - { - struct nv50_reg *t0, *t1, *t2, *t3; - struct nv50_program_exec *e; + case TGSI_OPCODE_TXP: + { + struct nv50_reg *t[4]; + struct nv50_program_exec *e; - t0 = alloc_temp(pc, NULL); - t0 = alloc_temp(pc, NULL); - t1 = alloc_temp(pc, NULL); - t2 = alloc_temp(pc, NULL); - t3 = alloc_temp(pc, NULL); - emit_mov(pc, t0, src[0][0]); - emit_mov(pc, t1, src[0][1]); + alloc_temp4(pc, t, 0); + emit_mov(pc, t[0], src[0][0]); + emit_mov(pc, t[1], src[0][1]); - e = exec(pc); - e->inst[0] = 0xf6400000; - set_long(pc, e); - e->inst[1] |= 0x0000c004; - set_dst(pc, t0, e); - emit(pc, e); + e = exec(pc); + e->inst[0] = 0xf6400000; + e->inst[0] |= (unit << 9); + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t[0], e); + emit(pc, e); - if (mask & (1 << 0)) emit_mov(pc, dst[0], t0); - if (mask & (1 << 1)) emit_mov(pc, dst[1], t1); - if (mask & (1 << 2)) emit_mov(pc, dst[2], t2); - if (mask & (1 << 3)) emit_mov(pc, dst[3], t3); + if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); - free_temp(pc, t0); - free_temp(pc, t1); - free_temp(pc, t2); - free_temp(pc, t3); - } + free_temp4(pc, t); + } break; case TGSI_OPCODE_XPD: temp = alloc_temp(pc, NULL); @@ -1570,8 +1602,13 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) if (!upload) return; + NOUVEAU_ERR("-------\n"); up = ptr = MALLOC(p->exec_size * 4); for (e = p->exec_head; e; e = e->next) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + *(ptr++) = e->inst[0]; if (is_long(e)) *(ptr++) = e->inst[1]; @@ -1687,7 +1724,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) { - struct pipe_winsys *ws = nv50->pipe.winsys; + struct pipe_screen *pscreen = nv50->pipe.screen; while (p->exec_head) { struct nv50_program_exec *e = p->exec_head; @@ -1699,7 +1736,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) p->exec_size = 0; if (p->buffer) - pipe_buffer_reference(ws, &p->buffer, NULL); + pipe_buffer_reference(pscreen, &p->buffer, NULL); nv50->screen->nvws->res_free(&p->data); diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 26bd90ccc5e..777e77906d5 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -51,7 +51,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *q) static boolean nv50_query_result(struct pipe_context *pipe, struct pipe_query *q, - boolean wait, uint64 *result) + boolean wait, uint64_t *result) { NOUVEAU_ERR("unimplemented\n"); *result = 0xdeadcafe; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 7ab12a6d702..e2451c6ecb5 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -94,7 +94,7 @@ struct softpipe_context { /* Counter for occlusion queries. Note this supports overlapping * queries. */ - uint64 occlusion_count; + uint64_t occlusion_count; /* * Mapped vertex buffers diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 40329a95627..5dacbbe55f8 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -171,7 +171,6 @@ static void shade_destroy(struct quad_stage *qs) struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) { struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; /* allocate storage for program inputs/outputs, aligned to 16 bytes */ qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 2106ee1d235..b0d8e01426d 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -37,8 +37,8 @@ #include "sp_query.h" struct softpipe_query { - uint64 start; - uint64 end; + uint64_t start; + uint64_t end; }; @@ -87,7 +87,7 @@ static boolean softpipe_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64 *result ) + uint64_t *result ) { struct softpipe_query *sq = softpipe_query(q); *result = sq->end - sq->start; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 12f98c32f53..11b08b3a82d 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -55,9 +55,9 @@ softpipe_get_param(struct pipe_screen *screen, int param) { switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; + return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 8; + return PIPE_MAX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 0cb4b2f03c3..a64dc89f432 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -94,31 +94,50 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } +/* Hack it up to use the old winsys->surface_alloc_storage() + * method for now: + */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) { struct pipe_winsys *ws = screen->winsys; - size_t tex_size; - unsigned cpp; - - switch (spt->base.format) { - case PIPE_FORMAT_R5G6B5_UNORM: - cpp = 2; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - default: - cpp = 4; - break; + struct pipe_surface surf; + unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + int ret; + + + memset(&surf, 0, sizeof(surf)); + + ret =ws->surface_alloc_storage( ws, + &surf, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + flags, + spt->base.tex_usage); + if(ret != 0) + return FALSE; + + if (!surf.buffer) { + /* allocation failed */ + return FALSE; } - tex_size = spt->base.width[0] * cpp * spt->base.height[0]; - spt->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, tex_size); + /* Now extract the goodies: */ spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); - spt->stride[0] = spt->base.width[0] * cpp; + spt->stride[0] = surf.stride; + + /* Transfer the reference: + */ + spt->buffer = surf.buffer; + surf.buffer = NULL; + return spt->buffer != NULL; } @@ -220,10 +239,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps = CALLOC_STRUCT(pipe_surface); ps->refcount = 1; - ps->winsys = ws; if (ps) { assert(ps->refcount); - assert(ps->winsys); pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 1dd77193791..f0d51ad82ef 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -274,11 +274,11 @@ static INLINE boolean trace_context_get_query_result(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, - uint64 *presult) + uint64_t *presult) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - uint64 result; + uint64_t result; boolean _result; trace_dump_call_begin("pipe_context", "get_query_result"); diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 7bcebd3d6b6..bc2a0a7ef3a 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -96,7 +96,6 @@ typedef int _Bool; typedef unsigned int uint; typedef unsigned char ubyte; typedef unsigned short ushort; -typedef uint64_t uint64; #if 0 #define boolean bool @@ -112,20 +111,22 @@ typedef unsigned char boolean; /* Function inlining */ -#ifdef __cplusplus -# define INLINE inline -#elif defined(__GNUC__) -# define INLINE __inline__ -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#else -# define INLINE +#ifndef INLINE +# ifdef __cplusplus +# define INLINE inline +# elif defined(__GNUC__) +# define INLINE __inline__ +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# else +# define INLINE +# endif #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 2646706ff23..166c6b6b7e0 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -109,7 +109,7 @@ struct pipe_context { boolean (*get_query_result)(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64 *result); + uint64_t *result); /*@}*/ /** diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 1fdcec639f7..7b8b64b5923 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -245,7 +245,7 @@ struct st_context { memcpy(map, vertices, size); pipe_buffer_unmap(screen, vbuf); - util_draw_vertex_buffer(pipe, vbuf, prim, num_verts, num_attribs); + util_draw_vertex_buffer(pipe, vbuf, 0, prim, num_verts, num_attribs); error2: pipe_buffer_reference(screen, &vbuf, NULL); diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript index fdcdde101ac..e68d5db7f4a 100644 --- a/src/gallium/winsys/gdi/SConscript +++ b/src/gallium/winsys/gdi/SConscript @@ -19,7 +19,7 @@ if env['platform'] == 'windows': env.Append(LIBS = [ 'gdi32', - 'user32',ss + 'user32', 'kernel32', ]) diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index e66ce48f2dc..bd5aa10a20f 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -320,7 +320,7 @@ gdi_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, memset(&bmi, 0, sizeof(BITMAPINFO)); bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); bmi.bmiHeader.biWidth = surface->stride / pf_get_size(surface->format); - bmi.bmiHeader.biHeight= -surface->height; + bmi.bmiHeader.biHeight= -(long)surface->height; bmi.bmiHeader.biPlanes = 1; bmi.bmiHeader.biBitCount = pf_get_bits(surface->format); bmi.bmiHeader.biCompression = BI_RGB; @@ -337,8 +337,23 @@ gdi_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, } -const struct stw_winsys stw_winsys = { +static const struct stw_winsys stw_winsys = { &gdi_softpipe_screen_create, &gdi_softpipe_context_create, &gdi_softpipe_flush_frontbuffer }; + + +BOOL WINAPI +DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) +{ + switch (fdwReason) { + case DLL_PROCESS_ATTACH: + return st_init(&stw_winsys); + + case DLL_PROCESS_DETACH: + st_cleanup(); + break; + } + return TRUE; +} diff --git a/src/glu/sgi/glu.exports b/src/glu/sgi/glu.exports index 1d1b6da24fe..aeb72729166 100644 --- a/src/glu/sgi/glu.exports +++ b/src/glu/sgi/glu.exports @@ -57,3 +57,62 @@ gluTessVertex gluUnProject gluUnProject4 + mgluBeginCurve + mgluBeginPolygon + mgluBeginSurface + mgluBeginTrim + mgluBuild1DMipmapLevels + mgluBuild1DMipmaps + mgluBuild2DMipmapLevels + mgluBuild2DMipmaps + mgluBuild3DMipmapLevels + mgluBuild3DMipmaps + mgluCheckExtension + mgluCylinder + mgluDeleteNurbsRenderer + mgluDeleteQuadric + mgluDeleteTess + mgluDisk + mgluEndCurve + mgluEndPolygon + mgluEndSurface + mgluEndTrim + mgluErrorString + mgluGetNurbsProperty + mgluGetString + mgluGetTessProperty + mgluLoadSamplingMatrices + mgluLookAt + mgluNewNurbsRenderer + mgluNewQuadric + mgluNewTess + mgluNextContour + mgluNurbsCallback + mgluNurbsCallbackData + mgluNurbsCallbackDataEXT + mgluNurbsCurve + mgluNurbsProperty + mgluNurbsSurface + mgluOrtho2D + mgluPartialDisk + mgluPerspective + mgluPickMatrix + mgluProject + mgluPwlCurve + mgluQuadricCallback + mgluQuadricDrawStyle + mgluQuadricNormals + mgluQuadricOrientation + mgluQuadricTexture + mgluScaleImage + mgluSphere + mgluTessBeginContour + mgluTessBeginPolygon + mgluTessCallback + mgluTessEndContour + mgluTessEndPolygon + mgluTessNormal + mgluTessProperty + mgluTessVertex + mgluUnProject + mgluUnProject4 diff --git a/src/glut/glx/SConscript b/src/glut/glx/SConscript index 99b3bb7df64..cb442ba027f 100644 --- a/src/glut/glx/SConscript +++ b/src/glut/glx/SConscript @@ -11,7 +11,6 @@ env.Replace(CPPDEFINES = [ 'BUILD_GLUT32', 'GLUT_BUILDING_LIB', 'MESA', - '_DLL', 'NDEBUG', 'GLUT_NO_WARNING_DISABLE', ]) @@ -38,7 +37,6 @@ sources = [ 'glut_dstr.c', 'glut_event.c', 'glut_ext.c', - 'glut_fcb.c', 'glut_fullscrn.c', 'glut_gamemode.c', 'glut_get.c', @@ -51,6 +49,7 @@ sources = [ 'glut_mesa.c', 'glut_modifier.c', 'glut_overlay.c', + 'glut_ppm.c', 'glut_shapes.c', 'glut_space.c', 'glut_stroke.c', diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 274010c54f5..552c20e0b9b 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -34,14 +34,10 @@ libmesa.a: $(MESA_OBJECTS) # Make archive of gl* API dispatcher functions only libglapi.a: $(GLAPI_OBJECTS) - @ $(MKLIB) -o glapi -static $(GLAPI_OBJECTS) - -# Make archive of gl* API dispatcher functions only -$(GLAPI_LIB): $(GLAPI_OBJECTS) @if [ "${WINDOW_SYSTEM}" = "dri" ] ; then \ touch libglapi.a ; \ else \ - $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) ; \ + $(MKLIB) -o glapi -static $(GLAPI_OBJECTS) ; \ fi ###################################################################### diff --git a/src/mesa/SConscript b/src/mesa/SConscript index abd64030b63..01620ee6147 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -12,10 +12,11 @@ if env['platform'] != 'winddk': '#/src/mesa', ]) - if gcc: - env.Append(CFLAGS = [ - '-std=c99', - ]) + if env['platform'] == 'windows': + env.Append(CPPDEFINES = [ + '_GDI32_', # prevent gl* being declared __declspec(dllimport) in MS headers + 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers + ]) # # Source files @@ -165,6 +166,7 @@ if env['platform'] != 'winddk': 'state_tracker/st_context.c', 'state_tracker/st_debug.c', 'state_tracker/st_draw.c', + 'state_tracker/st_draw_feedback.c', 'state_tracker/st_extensions.c', 'state_tracker/st_format.c', 'state_tracker/st_framebuffer.c', diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index a2316e2662b..ae790554055 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -314,10 +314,28 @@ static void driReportDamage(__DRIdrawable *pdp, static void driSwapBuffers(__DRIdrawable *dPriv) { __DRIscreen *psp = dPriv->driScreenPriv; + drm_clip_rect_t *rects; + int i; + + if (!dPriv->numClipRects) + return; psp->DriverAPI.SwapBuffers(dPriv); - driReportDamage(dPriv, dPriv->pClipRects, dPriv->numClipRects); + rects = _mesa_malloc(sizeof(*rects) * dPriv->numClipRects); + + if (!rects) + return; + + for (i = 0; i < dPriv->numClipRects; i++) { + rects[i].x1 = dPriv->pClipRects[i].x1 - dPriv->x; + rects[i].y1 = dPriv->pClipRects[i].y1 - dPriv->y; + rects[i].x2 = dPriv->pClipRects[i].x2 - dPriv->x; + rects[i].y2 = dPriv->pClipRects[i].y2 - dPriv->y; + } + + driReportDamage(dPriv, rects, dPriv->numClipRects); + _mesa_free(rects); } static int driDrawableGetMSC( __DRIscreen *sPriv, __DRIdrawable *dPriv, diff --git a/src/mesa/drivers/dri/common/vblank.c b/src/mesa/drivers/dri/common/vblank.c index d610253fe6f..12aeaa108f7 100644 --- a/src/mesa/drivers/dri/common/vblank.c +++ b/src/mesa/drivers/dri/common/vblank.c @@ -130,9 +130,8 @@ int driWaitForMSC32( __DRIdrawablePrivate *priv, if ( divisor != 0 ) { - unsigned int target = (unsigned int)target_msc; - unsigned int next = target; - unsigned int r; + int64_t next = target_msc; + int64_t r; int dont_wait = (target_msc == 0); do { @@ -154,9 +153,9 @@ int driWaitForMSC32( __DRIdrawablePrivate *priv, *msc = vblank_to_msc(priv, vbl.reply.sequence); - dont_wait = 0; - if (target_msc != 0 && *msc == target) + if (!dont_wait && *msc == next) break; + dont_wait = 0; /* Assuming the wait-done test fails, the next refresh to wait for * will be one that satisfies (MSC % divisor) == remainder. The @@ -165,11 +164,12 @@ int driWaitForMSC32( __DRIdrawablePrivate *priv, * If this refresh has already happened, we add divisor to obtain * the next refresh after the current one that will satisfy it. */ - r = (*msc % (unsigned int)divisor); - next = (*msc - r + (unsigned int)remainder); - if (next <= *msc) next += (unsigned int)divisor; + r = ((uint64_t)*msc % divisor); + next = (*msc - r + remainder); + if (next <= *msc) + next += divisor; - } while ( r != (unsigned int)remainder ); + } while (r != remainder); } else { /* If the \c divisor is zero, just wait until the MSC is greater diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index e0ddc7fd613..9bff74294d8 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -55,6 +55,7 @@ static const struct dri_extension i915_extensions[] = { {"GL_ARB_fragment_program", NULL}, {"GL_ARB_shadow", NULL}, {"GL_ARB_texture_non_power_of_two", NULL}, + {"GL_ATI_texture_env_combine3", NULL}, {"GL_EXT_shadow_funcs", NULL}, {NULL, NULL} }; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 8bd761ec6a1..4760906a7ed 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1105,30 +1105,14 @@ i915ValidateFragmentProgram(struct i915_context *i915) EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); } - if ((inputsRead & (FRAG_BIT_COL1 | FRAG_BIT_FOGC)) || - i915->vertex_fog != I915_FOG_NONE) { - - if (inputsRead & FRAG_BIT_COL1) { - intel->specoffset = offset / 4; - EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, S4_VFMT_SPEC_FOG, 3); - } - else - EMIT_PAD(3); - - if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) - EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, S4_VFMT_SPEC_FOG, 1); - else - EMIT_PAD(1); + if (inputsRead & FRAG_BIT_COL1) { + intel->specoffset = offset / 4; + EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4); } - /* XXX this was disabled, but enabling this code helped fix the Glean - * tfragprog1 fog tests. - */ -#if 1 if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) { EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); } -#endif for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { if (inputsRead & FRAG_BIT_TEX(i)) { diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 1d6ac2cea68..a415e378fff 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -32,6 +32,7 @@ #include "main/imports.h" #include "main/api_noop.h" +#include "main/macros.h" #include "main/vtxfmt.h" #include "main/simple_list.h" #include "shader/shader_api.h" @@ -128,9 +129,10 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT; + ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ + ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, + ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ /* Advertise the full hardware capabilities. The new memory diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 77980109cdf..5d3f99e025e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -238,7 +238,7 @@ struct brw_vs_ouput_sizes { }; -#define BRW_MAX_TEX_UNIT 8 +#define BRW_MAX_TEX_UNIT 16 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS enum brw_cache_id { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 49b422ee2ff..9e2b39af9bb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -129,17 +129,28 @@ static INLINE int type_sz( GLuint type ) } } +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ static INLINE struct brw_reg brw_reg( GLuint file, - GLuint nr, - GLuint subnr, - GLuint type, - GLuint vstride, - GLuint width, - GLuint hstride, - GLuint swizzle, - GLuint writemask) -{ - + GLuint nr, + GLuint subnr, + GLuint type, + GLuint vstride, + GLuint width, + GLuint hstride, + GLuint swizzle, + GLuint writemask ) +{ struct brw_reg reg; reg.type = type; reg.file = file; @@ -166,6 +177,7 @@ static INLINE struct brw_reg brw_reg( GLuint file, return reg; } +/** Construct float[16] register */ static INLINE struct brw_reg brw_vec16_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -181,6 +193,7 @@ static INLINE struct brw_reg brw_vec16_reg( GLuint file, WRITEMASK_XYZW); } +/** Construct float[8] register */ static INLINE struct brw_reg brw_vec8_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -196,7 +209,7 @@ static INLINE struct brw_reg brw_vec8_reg( GLuint file, WRITEMASK_XYZW); } - +/** Construct float[4] register */ static INLINE struct brw_reg brw_vec4_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -212,7 +225,7 @@ static INLINE struct brw_reg brw_vec4_reg( GLuint file, WRITEMASK_XYZW); } - +/** Construct float[2] register */ static INLINE struct brw_reg brw_vec2_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -228,6 +241,7 @@ static INLINE struct brw_reg brw_vec2_reg( GLuint file, WRITEMASK_XY); } +/** Construct float[1] register */ static INLINE struct brw_reg brw_vec1_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -277,6 +291,7 @@ static INLINE struct brw_reg byte_offset( struct brw_reg reg, } +/** Construct unsigned word[16] register */ static INLINE struct brw_reg brw_uw16_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -284,6 +299,7 @@ static INLINE struct brw_reg brw_uw16_reg( GLuint file, return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } +/** Construct unsigned word[8] register */ static INLINE struct brw_reg brw_uw8_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -291,6 +307,7 @@ static INLINE struct brw_reg brw_uw8_reg( GLuint file, return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } +/** Construct unsigned word[1] register */ static INLINE struct brw_reg brw_uw1_reg( GLuint file, GLuint nr, GLuint subnr ) @@ -311,6 +328,7 @@ static INLINE struct brw_reg brw_imm_reg( GLuint type ) 0); } +/** Construct float immediate register */ static INLINE struct brw_reg brw_imm_f( GLfloat f ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); @@ -318,6 +336,7 @@ static INLINE struct brw_reg brw_imm_f( GLfloat f ) return imm; } +/** Construct integer immediate register */ static INLINE struct brw_reg brw_imm_d( GLint d ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); @@ -325,6 +344,7 @@ static INLINE struct brw_reg brw_imm_d( GLint d ) return imm; } +/** Construct uint immediate register */ static INLINE struct brw_reg brw_imm_ud( GLuint ud ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); @@ -332,6 +352,7 @@ static INLINE struct brw_reg brw_imm_ud( GLuint ud ) return imm; } +/** Construct ushort immediate register */ static INLINE struct brw_reg brw_imm_uw( GLushort uw ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); @@ -339,6 +360,7 @@ static INLINE struct brw_reg brw_imm_uw( GLushort uw ) return imm; } +/** Construct short immediate register */ static INLINE struct brw_reg brw_imm_w( GLshort w ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); @@ -350,8 +372,7 @@ static INLINE struct brw_reg brw_imm_w( GLshort w ) * numbers alias with _V and _VF below: */ -/* Vector of eight signed half-byte values: - */ +/** Construct vector of eight signed half-byte values */ static INLINE struct brw_reg brw_imm_v( GLuint v ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); @@ -362,8 +383,7 @@ static INLINE struct brw_reg brw_imm_v( GLuint v ) return imm; } -/* Vector of four 8-bit float values: - */ +/** Construct vector of four 8-bit float values */ static INLINE struct brw_reg brw_imm_vf( GLuint v ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); @@ -400,44 +420,43 @@ static INLINE struct brw_reg brw_address( struct brw_reg reg ) return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); } - -static INLINE struct brw_reg brw_vec1_grf( GLuint nr, - GLuint subnr ) +/** Construct float[1] general-purpose register */ +static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) { return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static INLINE struct brw_reg brw_vec8_grf( GLuint nr, - GLuint subnr ) +/** Construct float[2] general-purpose register */ +static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) { - return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static INLINE struct brw_reg brw_vec4_grf( GLuint nr, - GLuint subnr ) +/** Construct float[4] general-purpose register */ +static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) { return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } - -static INLINE struct brw_reg brw_vec2_grf( GLuint nr, - GLuint subnr ) +/** Construct float[8] general-purpose register */ +static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) { - return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static INLINE struct brw_reg brw_uw8_grf( GLuint nr, - GLuint subnr ) + +static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) { return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static INLINE struct brw_reg brw_uw16_grf( GLuint nr, - GLuint subnr ) +static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr ) { return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } + +/** Construct null register (usually used for setting condition codes) */ static INLINE struct brw_reg brw_null_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, @@ -524,13 +543,13 @@ static INLINE struct brw_reg stride( struct brw_reg reg, GLuint width, GLuint hstride ) { - reg.vstride = cvt(vstride); reg.width = cvt(width) - 1; reg.hstride = cvt(hstride); return reg; } + static INLINE struct brw_reg vec16( struct brw_reg reg ) { return stride(reg, 16,16,1); @@ -556,6 +575,7 @@ static INLINE struct brw_reg vec1( struct brw_reg reg ) return stride(reg, 0,1,0); } + static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) { return vec1(suboffset(reg, elt)); @@ -687,7 +707,7 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset static INLINE struct brw_instruction *current_insn( struct brw_compile *p) { - return &p->store[p->nr_insn]; + return &p->store[p->nr_insn]; } void brw_pop_insn_state( struct brw_compile *p ); @@ -733,6 +753,7 @@ ALU2(ADD) ALU2(MUL) ALU1(FRC) ALU1(RNDD) +ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ce4cf46cfa6..4e099b5945c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -439,6 +439,7 @@ ALU2(ADD) ALU2(MUL) ALU1(FRC) ALU1(RNDD) +ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 4a9541378f7..71e2a95bfd9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -73,8 +73,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.curb_read_length = reg - 1; - - /* Allocate input regs: */ c->nr_inputs = 0; @@ -84,8 +82,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); reg++; } - } - + } /* Allocate outputs: TODO: could organize the non-position outputs * to go straight into message regs. @@ -339,6 +336,7 @@ static void emit_math1( struct brw_vs_compile *c, } } + static void emit_math2( struct brw_vs_compile *c, GLuint function, struct brw_reg dst, @@ -370,7 +368,6 @@ static void emit_math2( struct brw_vs_compile *c, release_tmp(c, tmp); } } - static void emit_exp_noalias( struct brw_vs_compile *c, @@ -521,8 +518,6 @@ static void emit_log_noalias( struct brw_vs_compile *c, } - - /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 */ static void emit_dst_noalias( struct brw_vs_compile *c, @@ -544,6 +539,7 @@ static void emit_dst_noalias( struct brw_vs_compile *c, brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1); } + static void emit_xpd( struct brw_compile *p, struct brw_reg dst, struct brw_reg t, @@ -554,7 +550,6 @@ static void emit_xpd( struct brw_compile *p, } - static void emit_lit_noalias( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -596,7 +591,29 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } +/** 3 or 4-component vector normalization */ +static void emit_nrm( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + int num_comps) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + /* tmp = dot(arg0, arg0) */ + if (num_comps == 3) + brw_DP3(p, tmp, arg0, arg0); + else + brw_DP4(p, tmp, arg0, arg0); + + /* tmp = 1 / sqrt(tmp) */ + emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL); + + /* dst = arg0 * tmp */ + brw_MUL(p, dst, arg0, tmp); + + release_tmp(c, tmp); +} /* TODO: relative addressing! @@ -634,7 +651,6 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } - static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) @@ -728,8 +744,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, } - - static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, struct prog_src_register src ) @@ -801,8 +815,8 @@ static void emit_swz( struct brw_vs_compile *c, } - -/* Post-vertex-program processing. Send the results to the URB. +/** + * Post-vertex-program processing. Send the results to the URB. */ static void emit_vertex_write( struct brw_vs_compile *c) { @@ -817,7 +831,6 @@ static void emit_vertex_write( struct brw_vs_compile *c) get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); } - /* Build ndc coords */ if (!c->key.know_w_is_one) { ndc = get_tmp(c); @@ -848,7 +861,6 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); } - for (i = 0; i < c->key.nr_userclip; i++) { brw_set_conditionalmod(p, BRW_CONDITIONAL_L); brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); @@ -856,7 +868,6 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - /* i965 clipping workaround: * 1) Test for -ve rhw * 2) If set, @@ -888,14 +899,12 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); } - /* Emit the (interleaved) headers for the two vertices - an 8-reg * of zeros followed by two sets of NDC coordinates: */ brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); brw_MOV(p, offset(m0, 3), pos); - brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -909,9 +918,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) 1, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); - } + static void post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) { @@ -1035,6 +1044,12 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; + case OPCODE_NRM3: + emit_nrm(c, dst, args[0], 3); + break; + case OPCODE_NRM4: + emit_nrm(c, dst, args[0], 4); + break; case OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; @@ -1102,7 +1117,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_SGT: emit_sgt(p, dst, args[0], args[1]); - break; + break; case OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; @@ -1118,6 +1133,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ emit_swz(c, dst, inst->SrcReg[0] ); break; + case OPCODE_TRUNC: + /* round toward zero */ + brw_RNDZ(p, dst, args[0]); + break; case OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; @@ -1136,7 +1155,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_set_predicate_control_flag_value(p, 0xff); - break; + break; case OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); @@ -1145,7 +1164,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) get_addr_reg(stack_index), brw_imm_d(4)); inst->Data = &p->store[p->nr_insn]; brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; + break; case OPCODE_RET: brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(-4)); @@ -1154,17 +1173,17 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_16); case OPCODE_END: brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; + break; case OPCODE_PRINT: case OPCODE_BGNSUB: case OPCODE_ENDSUB: + /* no-op instructions */ break; default: - _mesa_printf("Unsupported opcode %i (%s) in vertex shader\n", - inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader", + inst->Opcode, inst->Opcode < MAX_OPCODE ? _mesa_opcode_string(inst->Opcode) : "unknown"); - break; } if ((inst->DstReg.File == PROGRAM_OUTPUT) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index bad76793af7..5b4ee20ecb5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -36,6 +36,7 @@ #include "brw_state.h" +/** Return number of src args for given instruction */ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { @@ -58,6 +59,8 @@ GLuint brw_wm_nr_args( GLuint opcode ) case OPCODE_TXP: case OPCODE_KIL: case OPCODE_LIT: + case OPCODE_NRM3: + case OPCODE_NRM4: case WM_CINTERP: case WM_WPOSXY: return 1; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 58c78c4b2c5..b5050a3e40b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -194,7 +194,7 @@ static void emit_linterp( struct brw_compile *p, interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); - for(i = 0; i < 4; i++ ) { + for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); @@ -219,42 +219,40 @@ static void emit_pinterp( struct brw_compile *p, interp[2] = brw_vec1_grf(nr+1, 0); interp[3] = brw_vec1_grf(nr+1, 4); - for(i = 0; i < 4; i++ ) { + for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); } } - for(i = 0; i < 4; i++ ) { + for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MUL(p, dst[i], dst[i], w[3]); } } } + static void emit_cinterp( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0 ) { - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ - } - } -} - + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ + } + } +} static void emit_alu1( struct brw_compile *p, @@ -280,6 +278,7 @@ static void emit_alu1( struct brw_compile *p, brw_set_saturate(p, 0); } + static void emit_alu2( struct brw_compile *p, struct brw_instruction *(*func)(struct brw_compile *, struct brw_reg, @@ -351,6 +350,7 @@ static void emit_lrp( struct brw_compile *p, } } } + static void emit_sop( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -376,7 +376,7 @@ static void emit_slt( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); + emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); } static void emit_sle( struct brw_compile *p, @@ -385,7 +385,7 @@ static void emit_sle( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); + emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); } static void emit_sgt( struct brw_compile *p, @@ -394,7 +394,7 @@ static void emit_sgt( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); + emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); } static void emit_sge( struct brw_compile *p, @@ -403,7 +403,7 @@ static void emit_sge( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); + emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); } static void emit_seq( struct brw_compile *p, @@ -412,7 +412,7 @@ static void emit_seq( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); + emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); } static void emit_sne( struct brw_compile *p, @@ -421,7 +421,7 @@ static void emit_sne( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); + emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); } static void emit_cmp( struct brw_compile *p, @@ -505,7 +505,7 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg1 ) { if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code*/ + return; /* Do not emit dead code */ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); @@ -525,7 +525,7 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg1 ) { if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code*/ + return; /* Do not emit dead code */ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); @@ -546,7 +546,7 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg1 ) { if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code*/ + return; /* Do not emit dead code */ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); @@ -592,7 +592,7 @@ static void emit_math1( struct brw_compile *p, const struct brw_reg *arg0 ) { if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code*/ + return; /* Do not emit dead code */ //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || // function == BRW_MATH_FUNCTION_SINCOS); @@ -619,7 +619,7 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg1) { if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code*/ + return; /* Do not emit dead code */ assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); @@ -760,7 +760,6 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(8), arg[3]); msgLength = 9; - brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, @@ -772,7 +771,6 @@ static void emit_txb( struct brw_wm_compile *c, 8, /* responseLength */ msgLength, 0); - } @@ -823,7 +821,6 @@ static void emit_kil( struct brw_wm_compile *c, struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); GLuint i; - /* XXX - usually won't need 4 compares! */ for (i = 0; i < 4; i++) { @@ -836,6 +833,7 @@ static void emit_kil( struct brw_wm_compile *c, } } + static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -869,6 +867,7 @@ static void fire_fb_write( struct brw_wm_compile *c, eot); } + static void emit_aa( struct brw_wm_compile *c, struct brw_reg *arg1, GLuint reg ) @@ -962,7 +961,6 @@ static void emit_fb_write( struct brw_wm_compile *c, nr += 2; } - if (!c->key.runtime_check_aads_emit) { if (c->key.aa_dest_stencil_reg) emit_aa(c, arg1, 2); @@ -996,8 +994,6 @@ static void emit_fb_write( struct brw_wm_compile *c, } - - /* Post-fragment-program processing. Send the results to the * framebuffer. */ @@ -1022,6 +1018,7 @@ static void emit_spill( struct brw_wm_compile *c, slot); } + static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) @@ -1047,7 +1044,6 @@ static void emit_unspill( struct brw_wm_compile *c, } - /** * Retrieve upto 4 GEN4 register pairs for the given wm reg: */ @@ -1073,6 +1069,7 @@ static void get_argument_regs( struct brw_wm_compile *c, } } + static void spill_values( struct brw_wm_compile *c, struct brw_wm_value *values, GLuint nr ) @@ -1085,7 +1082,6 @@ static void spill_values( struct brw_wm_compile *c, } - /* Emit the fragment program instructions here. */ void brw_wm_emit( struct brw_wm_compile *c ) @@ -1176,7 +1172,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; - case OPCODE_DP3: /* */ + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1188,7 +1184,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_dph(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_LRP: /* */ + case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; @@ -1315,8 +1311,3 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->spill_slot); } } - - - - - diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index baecfdcb799..d43e326f7d1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -267,7 +267,7 @@ static void emit_trunc( struct brw_wm_compile *c, struct brw_reg src, dst; dst = get_dst_reg(c, inst, i, 1) ; src = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_RNDD(p, dst, src); + brw_RNDZ(p, dst, src); } } brw_set_saturate(p, 0); diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index f8f009c6a30..4d036dee42e 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -181,7 +181,7 @@ intelUpdatePageFlipping(struct intel_context *intel, intel_fb->pf_current_page = (intel->sarea->pf_current_page >> (intel_fb->pf_planes & 0x2)) & 0x3; - intel_fb->pf_num_pages = intel->intelScreen->third.handle ? 3 : 2; + intel_fb->pf_num_pages = 2; pf_active = pf_planes && (pf_planes & intel->sarea->pf_active) == pf_planes; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 6c625b428c0..44b276a123e 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -412,6 +412,7 @@ static const struct dri_extension brw_extensions[] = { { "GL_EXT_shadow_funcs", NULL }, { "GL_EXT_texture_sRGB", NULL }, { "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions }, + { "GL_ATI_texture_env_combine3", NULL }, { NULL, NULL } }; @@ -775,7 +776,6 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel_region_release(&intel->front_region); intel_region_release(&intel->back_region); - intel_region_release(&intel->third_region); intel_region_release(&intel->depth_region); driDestroyOptionCache(&intel->optionCache); @@ -825,12 +825,7 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, intel_renderbuffer_set_region(intel_fb->color_rb[1], intel->back_region); } -#if 0 - if (intel_fb->color_rb[2]) { - intel_renderbuffer_set_region(intel_fb->color_rb[2], - intel->third_region); - } -#endif + if (irbDepth) { intel_renderbuffer_set_region(irbDepth, intel->depth_region); } @@ -867,7 +862,7 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, driDrawableInitVBlank(driDrawPriv); intel_fb->vbl_waited = driDrawPriv->vblSeq; - for (i = 0; i < (intel->intelScreen->third.handle ? 3 : 2); i++) { + for (i = 0; i < 2; i++) { if (intel_fb->color_rb[i]) intel_fb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; } diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index ee43ed7e833..048286c196b 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -157,6 +157,19 @@ struct intel_context void (*debug_batch)(struct intel_context *intel); } vtbl; + struct { + struct gl_fragment_program *bitmap_fp; + struct gl_vertex_program *passthrough_vp; + + struct gl_fragment_program *saved_fp; + GLboolean saved_fp_enable; + struct gl_vertex_program *saved_vp; + GLboolean saved_vp_enable; + + GLint saved_vp_x, saved_vp_y; + GLsizei saved_vp_width, saved_vp_height; + } meta; + GLint refcount; GLuint Fallback; GLuint NewGLState; @@ -166,7 +179,6 @@ struct intel_context struct intel_region *front_region; struct intel_region *back_region; - struct intel_region *third_region; struct intel_region *depth_region; /** diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index 5702ad9bb57..cf2f32d3845 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -27,7 +27,12 @@ #include "main/enums.h" #include "main/state.h" +#include "main/context.h" +#include "main/enable.h" +#include "main/matrix.h" #include "swrast/swrast.h" +#include "shader/arbprogram.h" +#include "shader/program.h" #include "intel_context.h" #include "intel_pixel.h" @@ -167,6 +172,159 @@ intel_check_blit_format(struct intel_region * region, return GL_FALSE; } +void +intel_meta_set_passthrough_transform(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + intel->meta.saved_vp_x = ctx->Viewport.X; + intel->meta.saved_vp_y = ctx->Viewport.Y; + intel->meta.saved_vp_width = ctx->Viewport.Width; + intel->meta.saved_vp_height = ctx->Viewport.Height; + + _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); + + _mesa_MatrixMode(GL_PROJECTION); + _mesa_PushMatrix(); + _mesa_LoadIdentity(); + _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1); + + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_PushMatrix(); + _mesa_LoadIdentity(); +} + +void +intel_meta_restore_transform(struct intel_context *intel) +{ + _mesa_MatrixMode(GL_PROJECTION); + _mesa_PopMatrix(); + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_PopMatrix(); + + _mesa_Viewport(intel->meta.saved_vp_x, intel->meta.saved_vp_y, + intel->meta.saved_vp_width, intel->meta.saved_vp_height); +} + +/** + * Set up a vertex program to pass through the position and first texcoord + * for pixel path. + */ +void +intel_meta_set_passthrough_vertex_program(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + static const char *vp = + "!!ARBvp1.0\n" + "TEMP vertexClip;\n" + "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n" + "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n" + "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n" + "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n" + "MOV result.position, vertexClip;\n" + "MOV result.texcoord[0], vertex.texcoord[0];\n" + "MOV result.color, vertex.color;\n" + "END\n"; + + assert(intel->meta.saved_vp == NULL); + + _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, + ctx->VertexProgram.Current); + if (intel->meta.passthrough_vp == NULL) { + GLuint prog_name; + _mesa_GenPrograms(1, &prog_name); + _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name); + _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB, + GL_PROGRAM_FORMAT_ASCII_ARB, + strlen(vp), (const GLubyte *)vp); + _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, + ctx->VertexProgram.Current); + _mesa_DeletePrograms(1, &prog_name); + } + + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + intel->meta.passthrough_vp); + ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, + &intel->meta.passthrough_vp->Base); + + intel->meta.saved_vp_enable = ctx->VertexProgram.Enabled; + _mesa_Enable(GL_VERTEX_PROGRAM_ARB); +} + +/** + * Restores the previous vertex program after + * intel_meta_set_passthrough_vertex_program() + */ +void +intel_meta_restore_vertex_program(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, + intel->meta.saved_vp); + _mesa_reference_vertprog(ctx, &intel->meta.saved_vp, NULL); + ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, + &ctx->VertexProgram.Current->Base); + + if (!intel->meta.saved_vp_enable) + _mesa_Disable(GL_VERTEX_PROGRAM_ARB); +} + +/** + * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the + * program object. + */ +void +intel_meta_set_fragment_program(struct intel_context *intel, + struct gl_fragment_program **prog, + const char *prog_string) +{ + GLcontext *ctx = &intel->ctx; + assert(intel->meta.saved_fp == NULL); + + _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, + ctx->FragmentProgram.Current); + if (*prog == NULL) { + GLuint prog_name; + _mesa_GenPrograms(1, &prog_name); + _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name); + _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, + GL_PROGRAM_FORMAT_ASCII_ARB, + strlen(prog_string), (const GLubyte *)prog_string); + _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current); + /* Note that DeletePrograms unbinds the program on us */ + _mesa_DeletePrograms(1, &prog_name); + } + + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog); + ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base)); + + intel->meta.saved_fp_enable = ctx->FragmentProgram.Enabled; + _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB); +} + +/** + * Restores the previous fragment program after + * intel_meta_set_fragment_program() + */ +void +intel_meta_restore_fragment_program(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + FLUSH_VERTICES(ctx, _NEW_PROGRAM); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, + intel->meta.saved_fp); + _mesa_reference_fragprog(ctx, &intel->meta.saved_fp, NULL); + ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, + &ctx->FragmentProgram.Current->Base); + + if (!intel->meta.saved_fp_enable) + _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); +} void intelInitPixelFuncs(struct dd_function_table *functions) @@ -181,3 +339,13 @@ intelInitPixelFuncs(struct dd_function_table *functions) #endif } } + +void +intel_free_pixel_state(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL); + _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL); +} + diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h index 6fa6effe835..76b8781316b 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.h +++ b/src/mesa/drivers/dri/intel/intel_pixel.h @@ -31,6 +31,15 @@ #include "main/mtypes.h" void intelInitPixelFuncs(struct dd_function_table *functions); +void intel_meta_set_passthrough_transform(struct intel_context *intel); +void intel_meta_restore_transform(struct intel_context *intel); +void intel_meta_set_passthrough_vertex_program(struct intel_context *intel); +void intel_meta_restore_vertex_program(struct intel_context *intel); +void intel_meta_set_fragment_program(struct intel_context *intel, + struct gl_fragment_program **prog, + const char *prog_string); +void intel_meta_restore_fragment_program(struct intel_context *intel); +void intel_free_pixel_state(struct intel_context *intel); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index fb1a051cdc8..1d7f15f10a5 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -32,7 +32,18 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/bufferobj.h" +#include "main/pixelstore.h" #include "main/state.h" +#include "main/teximage.h" +#include "main/texenv.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "main/texparam.h" +#include "main/varray.h" +#include "main/attrib.h" +#include "main/enable.h" +#include "shader/arbprogram.h" +#include "glapi/dispatch.h" #include "swrast/swrast.h" #include "intel_screen.h" @@ -87,6 +98,11 @@ static GLboolean test_bit( const GLubyte *src, return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; } +static GLboolean test_msb_bit(const GLubyte *src, GLuint bit) +{ + return (src[bit/8] & (1<<(7 - (bit % 8)))) ? 1 : 0; +} + static void set_bit( GLubyte *dest, GLuint bit ) { @@ -244,8 +260,8 @@ do_blit_bitmap( GLcontext *ctx, /* Clip to drawable cliprect */ if (!_mesa_clip_to_region(cliprects[i].x1, cliprects[i].y1, - cliprects[i].x2 - cliprects[i].x1, - cliprects[i].y2 - cliprects[i].y1, + cliprects[i].x2, + cliprects[i].y2, &box_x, &box_y, &box_w, &box_h)) continue; @@ -317,9 +333,174 @@ out: return GL_TRUE; } +static GLboolean +intel_texture_bitmap(GLcontext * ctx, + GLint dst_x, GLint dst_y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap) +{ + struct intel_context *intel = intel_context(ctx); + static const char *fp = + "!!ARBfp1.0\n" + "TEMP val;\n" + "PARAM color=program.local[0];\n" + "TEX val, fragment.texcoord[0], texture[0], 2D;\n" + "ADD val, val.wwww, {-.5, -.5, -.5, -.5};\n" + "KIL val;\n" + "MOV result.color, color;\n" + "END\n"; + GLuint texname; + GLfloat vertices[4][4]; + GLfloat texcoords[4][2]; + GLint old_active_texture; + GLubyte *unpacked_bitmap; + GLubyte *a8_bitmap; + int x, y; + + /* We need a fragment program for the KIL effect */ + if (!ctx->Extensions.ARB_fragment_program || + !ctx->Extensions.ARB_vertex_program) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, + "glBitmap fallback: No fragment/vertex program support\n"); + return GL_FALSE; + } + + /* We're going to mess with texturing with no regard to existing texture + * state, so if there is some set up we have to bail. + */ + if (ctx->Texture._EnabledUnits != 0) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glBitmap fallback: texturing enabled\n"); + return GL_FALSE; + } + + /* Can't do textured DrawPixels with a fragment program, unless we were + * to generate a new program that sampled our texture and put the results + * in the fragment color before the user's program started. + */ + if (ctx->FragmentProgram.Enabled) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glBitmap fallback: fragment program enabled\n"); + return GL_FALSE; + } + + if (ctx->VertexProgram.Enabled) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glBitmap fallback: vertex program enabled\n"); + return GL_FALSE; + } + + /* Check that we can load in a texture this big. */ + if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) || + height > (1 << (ctx->Const.MaxTextureLevels - 1))) { + if (INTEL_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "glBitmap fallback: bitmap too large (%dx%d)\n", + width, height); + return GL_FALSE; + } + /* Convert the A1 bitmap to an A8 format suitable for glTexImage */ + if (unpack->BufferObj->Name) { + bitmap = map_pbo(ctx, width, height, unpack, bitmap); + if (bitmap == NULL) + return GL_TRUE; /* even though this is an error, we're done */ + } + unpacked_bitmap = _mesa_unpack_bitmap(width, height, bitmap, + unpack); + a8_bitmap = _mesa_calloc(width * height); + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + if (test_msb_bit(unpacked_bitmap, ALIGN(width, 8) * y + x)) + a8_bitmap[y * width + x] = 0xff; + } + } + _mesa_free(unpacked_bitmap); + if (unpack->BufferObj->Name) { + /* done with PBO so unmap it now */ + ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, + unpack->BufferObj); + } + /* Save GL state before we start setting up our drawing */ + _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT | + GL_VIEWPORT_BIT); + _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT | + GL_CLIENT_PIXEL_STORE_BIT); + old_active_texture = ctx->Texture.CurrentUnit; + + _mesa_Disable(GL_POLYGON_STIPPLE); + + /* Upload our bitmap data to an alpha texture */ + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); + _mesa_Enable(GL_TEXTURE_2D); + _mesa_GenTextures(1, &texname); + _mesa_BindTexture(GL_TEXTURE_2D, texname); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + _mesa_PixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); + _mesa_PixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE); + _mesa_PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + _mesa_PixelStorei(GL_UNPACK_SKIP_PIXELS, 0); + _mesa_PixelStorei(GL_UNPACK_SKIP_ROWS, 0); + _mesa_PixelStorei(GL_UNPACK_ALIGNMENT, 1); + _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, width, height, 0, + GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap); + _mesa_free(a8_bitmap); + + intel_meta_set_fragment_program(intel, &intel->meta.bitmap_fp, fp); + _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, + ctx->Current.RasterColor); + intel_meta_set_passthrough_vertex_program(intel); + intel_meta_set_passthrough_transform(intel); + + vertices[0][0] = dst_x; + vertices[0][1] = dst_y; + vertices[0][2] = ctx->Current.RasterPos[2]; + vertices[0][3] = 1.0; + vertices[1][0] = dst_x + width; + vertices[1][1] = dst_y; + vertices[1][2] = ctx->Current.RasterPos[2]; + vertices[1][3] = 1.0; + vertices[2][0] = dst_x + width; + vertices[2][1] = dst_y + height; + vertices[2][2] = ctx->Current.RasterPos[2]; + vertices[2][3] = 1.0; + vertices[3][0] = dst_x; + vertices[3][1] = dst_y + height; + vertices[3][2] = ctx->Current.RasterPos[2]; + vertices[3][3] = 1.0; + + texcoords[0][0] = 0.0; + texcoords[0][1] = 0.0; + texcoords[1][0] = 1.0; + texcoords[1][1] = 0.0; + texcoords[2][0] = 1.0; + texcoords[2][1] = 1.0; + texcoords[3][0] = 0.0; + texcoords[3][1] = 1.0; + + _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); + _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); + _mesa_Enable(GL_VERTEX_ARRAY); + _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + + intel_meta_restore_transform(intel); + intel_meta_restore_fragment_program(intel); + intel_meta_restore_vertex_program(intel); + + _mesa_PopClientAttrib(); + _mesa_Disable(GL_TEXTURE_2D); /* asserted that it was disabled at entry */ + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); + _mesa_PopAttrib(); + + _mesa_DeleteTextures(1, &texname); + return GL_TRUE; +} /* There are a large number of possible ways to implement bitmap on * this hardware, most of them have some sort of drawback. Here are a @@ -352,6 +533,10 @@ intelBitmap(GLcontext * ctx, unpack, pixels)) return; + if (intel_texture_bitmap(ctx, x, y, width, height, + unpack, pixels)) + return; + if (INTEL_DEBUG & DEBUG_PIXEL) _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index 8ebbc95a1d0..0d66935ad27 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -36,7 +36,6 @@ #include "main/texobj.h" #include "main/texstate.h" #include "main/texparam.h" -#include "main/matrix.h" #include "main/varray.h" #include "main/attrib.h" #include "main/enable.h" @@ -68,6 +67,7 @@ intel_texture_drawpixels(GLcontext * ctx, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) { + struct intel_context *intel = intel_context(ctx); GLuint texname; GLfloat vertices[4][4]; GLfloat texcoords[4][2]; @@ -117,7 +117,7 @@ intel_texture_drawpixels(GLcontext * ctx, return GL_FALSE; } - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_TEXTURE_BIT | + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); @@ -138,14 +138,7 @@ intel_texture_drawpixels(GLcontext * ctx, _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, pixels); - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); - _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1); - - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); + intel_meta_set_passthrough_transform(intel); /* Create the vertex buffer based on the current raster pos. The x and y * we're handed are ctx->Current.RasterPos[0,1] rounded to integers. @@ -184,10 +177,7 @@ intel_texture_drawpixels(GLcontext * ctx, _mesa_Enable(GL_TEXTURE_COORD_ARRAY); CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PopMatrix(); - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PopMatrix(); + intel_meta_restore_transform(intel); _mesa_PopClientAttrib(); _mesa_PopAttrib(); @@ -205,6 +195,7 @@ intel_stencil_drawpixels(GLcontext * ctx, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) { + struct intel_context *intel = intel_context(ctx); GLuint texname, rb_name, fb_name, old_fb_name; GLfloat vertices[4][2]; GLfloat texcoords[4][2]; @@ -267,7 +258,7 @@ intel_stencil_drawpixels(GLcontext * ctx, return GL_FALSE; } - _mesa_PushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT | GL_TEXTURE_BIT | + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); old_fb_name = ctx->DrawBuffer->Name; @@ -335,14 +326,7 @@ intel_stencil_drawpixels(GLcontext * ctx, ctx->Unpack = old_unpack; _mesa_free(stencil_pixels); - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); - _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1); - - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); + intel_meta_set_passthrough_transform(intel); vertices[0][0] = x; vertices[0][1] = y; @@ -368,12 +352,10 @@ intel_stencil_drawpixels(GLcontext * ctx, _mesa_Enable(GL_TEXTURE_COORD_ARRAY); CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + intel_meta_restore_transform(intel); + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, old_fb_name); - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PopMatrix(); - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PopMatrix(); _mesa_PopClientAttrib(); _mesa_PopAttrib(); diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 8dbcc3050ee..51ce32a9679 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -550,15 +550,6 @@ intel_recreate_static_regions(struct intel_context *intel) intel->back_region, &intelScreen->back); -#ifdef I915 - if (intelScreen->third.handle) { - intel->third_region = - intel_recreate_static(intel, "third", - intel->third_region, - &intelScreen->third); - } -#endif /* I915 */ - /* Still assumes front.cpp == depth.cpp. We can kill this when we move to * private buffers. */ diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 61b55b97b51..fc4e82b56c9 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -177,13 +177,6 @@ intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen, intelScreen->back.size = sarea->back_size; intelScreen->back.tiled = sarea->back_tiled; - if (intelScreen->driScrnPriv->ddx_version.minor >= 8) { - intelScreen->third.offset = sarea->third_offset; - intelScreen->third.handle = sarea->third_handle; - intelScreen->third.size = sarea->third_size; - intelScreen->third.tiled = sarea->third_tiled; - } - intelScreen->depth.offset = sarea->depth_offset; intelScreen->depth.handle = sarea->depth_handle; intelScreen->depth.size = sarea->depth_size; @@ -192,12 +185,10 @@ intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen, if (intelScreen->driScrnPriv->ddx_version.minor >= 9) { intelScreen->front.bo_handle = sarea->front_bo_handle; intelScreen->back.bo_handle = sarea->back_bo_handle; - intelScreen->third.bo_handle = sarea->third_bo_handle; intelScreen->depth.bo_handle = sarea->depth_bo_handle; } else { intelScreen->front.bo_handle = -1; intelScreen->back.bo_handle = -1; - intelScreen->third.bo_handle = -1; intelScreen->depth.bo_handle = -1; } @@ -353,12 +344,6 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT, &intel_fb->color_rb[1]->Base); - if (screen->third.handle) { - struct gl_renderbuffer *tmp_rb = NULL; - - intel_fb->color_rb[2] = intel_create_renderbuffer(rgbFormat); - _mesa_reference_renderbuffer(&tmp_rb, &intel_fb->color_rb[2]->Base); - } } if (mesaVis->depthBits == 24) { diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 91f0d6d1ae8..cf5359baaeb 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -56,7 +56,6 @@ typedef struct { intelRegion front; intelRegion back; - intelRegion third; intelRegion depth; intelRegion tex; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 778db96cc1f..7c6485ef60b 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -64,7 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_SE_VPORT_ZSCALE 0x1DA8 #define R300_SE_VPORT_ZOFFSET 0x1DAC - +#define R300_VAP_PORT_IDX0 0x2040 /* * Vertex Array Processing (VAP) Control */ @@ -3201,9 +3201,9 @@ enum { #define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 #define R300_PACKET3_INDX_BUFFER 0x00003300 -# define R300_EB_UNK1_SHIFT 24 -# define R300_EB_UNK1 (0x80<<24) -# define R300_EB_UNK2 0x0810 +# define R300_INDX_BUFFER_DST_SHIFT 0 +# define R300_INDX_BUFFER_SKIP_SHIFT 16 +# define R300_INDX_BUFFER_ONE_REG_WR (1<<31) /* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ #define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 292f87a2b1b..f9266e44c13 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -208,7 +208,8 @@ static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); e32(addr); e32(vertex_count); } diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 14612393176..f3f482f8c87 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -142,7 +142,7 @@ _mesa_initialize_array_object( GLcontext *ctx, obj->Index.StrideB = 0; obj->Index.Ptr = NULL; obj->Index.Enabled = GL_FALSE; - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { obj->TexCoord[i].Size = 4; obj->TexCoord[i].Type = GL_FLOAT; obj->TexCoord[i].Stride = 0; @@ -181,7 +181,7 @@ _mesa_initialize_array_object( GLcontext *ctx, obj->SecondaryColor.BufferObj = ctx->Array.NullBufferObj; obj->FogCoord.BufferObj = ctx->Array.NullBufferObj; obj->Index.BufferObj = ctx->Array.NullBufferObj; - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { obj->TexCoord[i].BufferObj = ctx->Array.NullBufferObj; } obj->EdgeFlag.BufferObj = ctx->Array.NullBufferObj; @@ -335,7 +335,7 @@ _mesa_DeleteVertexArraysAPPLE(GLsizei n, const GLuint *ids) unbind_buffer_object( ctx, obj->SecondaryColor.BufferObj ); unbind_buffer_object( ctx, obj->FogCoord.BufferObj ); unbind_buffer_object( ctx, obj->Index.BufferObj ); - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { unbind_buffer_object( ctx, obj->TexCoord[i].BufferObj ); } unbind_buffer_object( ctx, obj->EdgeFlag.BufferObj ); diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index dc85da2518f..825c841ee21 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -1,8 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 7.2 + * Version: 7.3 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -224,7 +225,7 @@ _mesa_PushAttrib(GLbitfield mask) attr->SampleAlphaToOne = ctx->Multisample.SampleAlphaToOne; attr->SampleCoverage = ctx->Multisample.SampleCoverage; attr->SampleCoverageInvert = ctx->Multisample.SampleCoverageInvert; - for (i=0; i<MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { attr->Texture[i] = ctx->Texture.Unit[i].Enabled; attr->TexGen[i] = ctx->Texture.Unit[i].TexGenEnabled; attr->TextureColorTable[i] = ctx->Texture.Unit[i].ColorTableEnabled; diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 190e6ab5641..016543da01f 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -808,7 +808,7 @@ _mesa_DeleteBuffersARB(GLsizei n, const GLuint *ids) unbind(ctx, &ctx->Array.ArrayObj->FogCoord.BufferObj, bufObj); unbind(ctx, &ctx->Array.ArrayObj->Index.BufferObj, bufObj); unbind(ctx, &ctx->Array.ArrayObj->EdgeFlag.BufferObj, bufObj); - for (j = 0; j < MAX_TEXTURE_UNITS; j++) { + for (j = 0; j < MAX_TEXTURE_COORD_UNITS; j++) { unbind(ctx, &ctx->Array.ArrayObj->TexCoord[j].BufferObj, bufObj); } for (j = 0; j < VERT_ATTRIB_MAX; j++) { diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index e29964a1e86..c3feffda986 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -1,13 +1,9 @@ -/** - * \file config.h - * Tunable configuration parameters. - */ - /* * Mesa 3-D graphics library - * Version: 7.1 + * Version: 7.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,6 +23,10 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/** + * \file config.h + * Tunable configuration parameters. + */ #ifndef MESA_CONFIG_H_INCLUDED #define MESA_CONFIG_H_INCLUDED @@ -115,27 +115,28 @@ /** Maximum number of layers in a 1D or 2D array texture - GL_MESA_texture_array */ #define MAX_ARRAY_TEXTURE_LAYERS 64 -/** Number of texture units - GL_ARB_multitexture - * This needs to be the larger of MAX_TEXTURE_COORD_UNITS and - * MAX_TEXTURE_IMAGE_UNITS seen below, since MAX_TEXTURE_UNITS is used - * to dimension some arrays that store both coord and image data. -*/ -#define MAX_TEXTURE_UNITS 8 - -/*@}*/ +/** + * Max number of texture coordinate units. This mainly just applies to + * the fixed-function vertex code. This will be difficult to raise above + * eight because of various vertex attribute bitvectors. + */ +#define MAX_TEXTURE_COORD_UNITS 8 +/** + * Max number of texture image units. Also determines number of texture + * samplers in shaders. + */ +#define MAX_TEXTURE_IMAGE_UNITS 16 /** - * \name Separate numbers of texture coordinates and texture image units. - * - * These values will eventually replace most instances of MAX_TEXTURE_UNITS. - * We should always have MAX_TEXTURE_COORD_UNITS <= MAX_TEXTURE_IMAGE_UNITS. - * And, GL_MAX_TEXTURE_UNITS <= MAX_TEXTURE_COORD_UNITS. + * Larger of MAX_TEXTURE_COORD_UNITS and MAX_TEXTURE_IMAGE_UNITS. + * This value is only used for dimensioning arrays. + * Either MAX_TEXTURE_COORD_UNITS or MAX_TEXTURE_IMAGE_UNITS (or the + * corresponding ctx->Const.MaxTextureCoord/ImageUnits fields) should be + * used almost everywhere else. */ -/*@{*/ -#define MAX_TEXTURE_COORD_UNITS 8 -#define MAX_TEXTURE_IMAGE_UNITS 8 -/*@}*/ +#define MAX_TEXTURE_UNITS ((MAX_TEXTURE_COORD_UNITS > MAX_TEXTURE_IMAGE_UNITS) ? MAX_TEXTURE_COORD_UNITS : MAX_TEXTURE_IMAGE_UNITS) + /** * Maximum viewport/image width. Must accomodate all texture sizes too. @@ -181,16 +182,16 @@ /** For any program target/extension */ /*@{*/ #define MAX_PROGRAM_INSTRUCTIONS (16 * 1024) -#define MAX_PROGRAM_LOCAL_PARAMS 128 /* KW: power of two */ +#define MAX_PROGRAM_LOCAL_PARAMS 256 /**< per-program constants (power of two) */ #define MAX_PROGRAM_ENV_PARAMS 128 #define MAX_PROGRAM_MATRICES 8 #define MAX_PROGRAM_MATRIX_STACK_DEPTH 4 #define MAX_PROGRAM_CALL_DEPTH 8 #define MAX_PROGRAM_TEMPS 128 #define MAX_PROGRAM_ADDRESS_REGS 2 -#define MAX_UNIFORMS 128 /**< number of float components */ +#define MAX_UNIFORMS 256 /**< number of vec4 uniforms */ #define MAX_VARYING 8 /**< number of float[4] vectors */ -#define MAX_SAMPLERS 8 +#define MAX_SAMPLERS MAX_TEXTURE_IMAGE_UNITS #define MAX_PROGRAM_INPUTS 32 #define MAX_PROGRAM_OUTPUTS 32 /*@}*/ @@ -218,8 +219,8 @@ /** For GL_ARB_vertex_shader */ /*@{*/ #define MAX_VERTEX_ATTRIBS 16 -#define MAX_VERTEX_TEXTURE_IMAGE_UNITS MAX_TEXTURE_UNITS -#define MAX_COMBINED_TEXTURE_IMAGE_UNITS (MAX_TEXTURE_IMAGE_UNITS + MAX_VERTEX_TEXTURE_IMAGE_UNITS) +#define MAX_VERTEX_TEXTURE_IMAGE_UNITS MAX_TEXTURE_IMAGE_UNITS +#define MAX_COMBINED_TEXTURE_IMAGE_UNITS MAX_TEXTURE_IMAGE_UNITS /*@}*/ diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 61c0861cbd4..ea52b26f0f7 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1,14 +1,9 @@ -/** - * \file context.c - * Mesa context/visual/framebuffer management functions. - * \author Brian Paul - */ - /* * Mesa 3-D graphics library - * Version: 7.1 + * Version: 7.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +23,11 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/** + * \file context.c + * Mesa context/visual/framebuffer management functions. + * \author Brian Paul + */ /** * \mainpage Mesa Main Module @@ -870,9 +870,6 @@ _mesa_init_constants(GLcontext *ctx) assert(MAX_TEXTURE_LEVELS >= MAX_3D_TEXTURE_LEVELS); assert(MAX_TEXTURE_LEVELS >= MAX_CUBE_TEXTURE_LEVELS); - assert(MAX_TEXTURE_UNITS >= MAX_TEXTURE_COORD_UNITS); - assert(MAX_TEXTURE_UNITS >= MAX_TEXTURE_IMAGE_UNITS); - /* Constants, may be overriden (usually only reduced) by device drivers */ ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS; ctx->Const.Max3DTextureLevels = MAX_3D_TEXTURE_LEVELS; @@ -963,6 +960,9 @@ check_context_limits(GLcontext *ctx) assert(ctx->Const.MaxTextureUnits <= MAX_TEXTURE_IMAGE_UNITS); assert(ctx->Const.MaxTextureUnits <= MAX_TEXTURE_COORD_UNITS); + /* number of coord units cannot be greater than number of image units */ + assert(ctx->Const.MaxTextureCoordUnits <= ctx->Const.MaxTextureImageUnits); + assert(ctx->Const.MaxViewportWidth <= MAX_WIDTH); assert(ctx->Const.MaxViewportHeight <= MAX_WIDTH); diff --git a/src/mesa/main/dlopen.c b/src/mesa/main/dlopen.c index becef8173ef..8bc83c094fd 100644 --- a/src/mesa/main/dlopen.c +++ b/src/mesa/main/dlopen.c @@ -48,7 +48,7 @@ _mesa_dlopen(const char *libname, int flags) flags = RTLD_LAZY | RTLD_GLOBAL; /* Overriding flags at this time */ return dlopen(libname, flags); #elif defined(__MINGW32__) - return LoadLibrary(libname); + return LoadLibraryA(libname); #else return NULL; #endif diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index 248df1badca..72ed50808c5 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -201,6 +201,26 @@ _mesa_DisableClientState( GLenum cap ) } + +/** + * Return pointer to current texture unit for setting/getting coordinate + * state. + * Note that we'll set GL_INVALID_OPERATION if the active texture unit is + * higher than the number of supported coordinate units. And we'll return NULL. + */ +static struct gl_texture_unit * +get_texcoord_unit(GLcontext *ctx) +{ + if (ctx->Texture.CurrentUnit >= ctx->Const.MaxTextureCoordUnits) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glEnable/Disable(texcoord unit)"); + return NULL; + } + else { + return &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + } +} + + /** * Helper function to enable or disable a texture target. */ @@ -612,54 +632,62 @@ _mesa_set_enable(GLcontext *ctx, GLenum cap, GLboolean state) return; } break; - case GL_TEXTURE_GEN_Q: { - GLuint unit = ctx->Texture.CurrentUnit; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - GLuint newenabled = texUnit->TexGenEnabled & ~Q_BIT; - if (state) - newenabled |= Q_BIT; - if (texUnit->TexGenEnabled == newenabled) - return; - FLUSH_VERTICES(ctx, _NEW_TEXTURE); - texUnit->TexGenEnabled = newenabled; + case GL_TEXTURE_GEN_Q: + { + struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + GLuint newenabled = texUnit->TexGenEnabled & ~Q_BIT; + if (state) + newenabled |= Q_BIT; + if (texUnit->TexGenEnabled == newenabled) + return; + FLUSH_VERTICES(ctx, _NEW_TEXTURE); + texUnit->TexGenEnabled = newenabled; + } + } break; - } - case GL_TEXTURE_GEN_R: { - GLuint unit = ctx->Texture.CurrentUnit; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - GLuint newenabled = texUnit->TexGenEnabled & ~R_BIT; - if (state) - newenabled |= R_BIT; - if (texUnit->TexGenEnabled == newenabled) - return; - FLUSH_VERTICES(ctx, _NEW_TEXTURE); - texUnit->TexGenEnabled = newenabled; + case GL_TEXTURE_GEN_R: + { + struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + GLuint newenabled = texUnit->TexGenEnabled & ~R_BIT; + if (state) + newenabled |= R_BIT; + if (texUnit->TexGenEnabled == newenabled) + return; + FLUSH_VERTICES(ctx, _NEW_TEXTURE); + texUnit->TexGenEnabled = newenabled; + } + } break; - } - case GL_TEXTURE_GEN_S: { - GLuint unit = ctx->Texture.CurrentUnit; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - GLuint newenabled = texUnit->TexGenEnabled & ~S_BIT; - if (state) - newenabled |= S_BIT; - if (texUnit->TexGenEnabled == newenabled) - return; - FLUSH_VERTICES(ctx, _NEW_TEXTURE); - texUnit->TexGenEnabled = newenabled; + case GL_TEXTURE_GEN_S: + { + struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + GLuint newenabled = texUnit->TexGenEnabled & ~S_BIT; + if (state) + newenabled |= S_BIT; + if (texUnit->TexGenEnabled == newenabled) + return; + FLUSH_VERTICES(ctx, _NEW_TEXTURE); + texUnit->TexGenEnabled = newenabled; + } + } break; - } - case GL_TEXTURE_GEN_T: { - GLuint unit = ctx->Texture.CurrentUnit; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - GLuint newenabled = texUnit->TexGenEnabled & ~T_BIT; - if (state) - newenabled |= T_BIT; - if (texUnit->TexGenEnabled == newenabled) - return; - FLUSH_VERTICES(ctx, _NEW_TEXTURE); - texUnit->TexGenEnabled = newenabled; + case GL_TEXTURE_GEN_T: + { + struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + GLuint newenabled = texUnit->TexGenEnabled & ~T_BIT; + if (state) + newenabled |= T_BIT; + if (texUnit->TexGenEnabled == newenabled) + return; + FLUSH_VERTICES(ctx, _NEW_TEXTURE); + texUnit->TexGenEnabled = newenabled; + } + } break; - } /* * CLIENT STATE!!! @@ -1155,28 +1183,36 @@ _mesa_IsEnabled( GLenum cap ) return is_texture_enabled(ctx, TEXTURE_3D_BIT); case GL_TEXTURE_GEN_Q: { - const struct gl_texture_unit *texUnit; - texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - return (texUnit->TexGenEnabled & Q_BIT) ? GL_TRUE : GL_FALSE; + const struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + return (texUnit->TexGenEnabled & Q_BIT) ? GL_TRUE : GL_FALSE; + } } + return GL_FALSE; case GL_TEXTURE_GEN_R: { - const struct gl_texture_unit *texUnit; - texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - return (texUnit->TexGenEnabled & R_BIT) ? GL_TRUE : GL_FALSE; + const struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + return (texUnit->TexGenEnabled & R_BIT) ? GL_TRUE : GL_FALSE; + } } + return GL_FALSE; case GL_TEXTURE_GEN_S: { - const struct gl_texture_unit *texUnit; - texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - return (texUnit->TexGenEnabled & S_BIT) ? GL_TRUE : GL_FALSE; + const struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + return (texUnit->TexGenEnabled & S_BIT) ? GL_TRUE : GL_FALSE; + } } + return GL_FALSE; case GL_TEXTURE_GEN_T: { - const struct gl_texture_unit *texUnit; - texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; - return (texUnit->TexGenEnabled & T_BIT) ? GL_TRUE : GL_FALSE; + const struct gl_texture_unit *texUnit = get_texcoord_unit(ctx); + if (texUnit) { + return (texUnit->TexGenEnabled & T_BIT) ? GL_TRUE : GL_FALSE; + } } + return GL_FALSE; /* * CLIENT STATE!!! diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index ec0a5e3896d..d70b78f2586 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -26,7 +26,7 @@ **************************************************************************/ /** - * \file ffvertex_prog. + * \file ffvertex_prog.c * * Create a vertex program to execute the current fixed function T&L pipeline. * \author Keith Whitwell @@ -101,6 +101,7 @@ static GLuint translate_fog_mode( GLenum mode ) } } + #define TXG_NONE 0 #define TXG_OBJ_LINEAR 1 #define TXG_EYE_LINEAR 2 @@ -145,6 +146,7 @@ tnl_get_per_vertex_materials(GLcontext *ctx) return mask; } + /** * Should fog be computed per-vertex? */ @@ -159,6 +161,7 @@ tnl_get_per_vertex_fog(GLcontext *ctx) #endif } + static GLboolean check_active_shininess( GLcontext *ctx, const struct state_key *key, GLuint side ) @@ -176,8 +179,6 @@ static GLboolean check_active_shininess( GLcontext *ctx, return GL_FALSE; } - - static void make_state_key( GLcontext *ctx, struct state_key *key ) @@ -278,7 +279,7 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) ctx->Texture._EnabledUnits) key->texture_enabled_global = 1; - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; if (texUnit->_ReallyEnabled) @@ -410,11 +411,13 @@ static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) return reg; } + static struct ureg swizzle1( struct ureg reg, int x ) { return swizzle(reg, x, x, x, x); } + static struct ureg get_temp( struct tnl_program *p ) { int bit = _mesa_ffs( ~p->temp_in_use ); @@ -430,6 +433,7 @@ static struct ureg get_temp( struct tnl_program *p ) return make_ureg(PROGRAM_TEMPORARY, bit-1); } + static struct ureg reserve_temp( struct tnl_program *p ) { struct ureg temp = get_temp( p ); @@ -437,6 +441,7 @@ static struct ureg reserve_temp( struct tnl_program *p ) return temp; } + static void release_temp( struct tnl_program *p, struct ureg reg ) { if (reg.file == PROGRAM_TEMPORARY) { @@ -493,6 +498,7 @@ static struct ureg register_input( struct tnl_program *p, GLuint input ) } } + /** * \param input one of VERT_RESULT_x tokens. */ @@ -502,6 +508,7 @@ static struct ureg register_output( struct tnl_program *p, GLuint output ) return make_ureg(PROGRAM_OUTPUT, output); } + static struct ureg register_const4f( struct tnl_program *p, GLfloat s0, GLfloat s1, @@ -531,6 +538,7 @@ static GLboolean is_undef( struct ureg reg ) return reg.file == PROGRAM_UNDEFINED; } + static struct ureg get_identity_param( struct tnl_program *p ) { if (is_undef(p->identity)) @@ -571,6 +579,7 @@ static void emit_arg( struct prog_src_register *src, ASSERT(src->Index == reg.idx); } + static void emit_dst( struct prog_dst_register *dst, struct ureg reg, GLuint mask ) { @@ -586,6 +595,7 @@ static void emit_dst( struct prog_dst_register *dst, ASSERT(dst->Index == reg.idx); } + static void debug_insn( struct prog_instruction *inst, const char *fn, GLuint line ) { @@ -696,6 +706,7 @@ static void emit_matrix_transform_vec4( struct tnl_program *p, emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); } + /* This version is much easier to implement if writemasks are not * supported natively on the target or (like SSE), the target doesn't * have a clean/obvious dotproduct implementation. @@ -721,6 +732,7 @@ static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, release_temp(p, tmp); } + static void emit_matrix_transform_vec3( struct tnl_program *p, struct ureg dest, const struct ureg *mat, @@ -748,6 +760,7 @@ static void emit_normalize_vec3( struct tnl_program *p, #endif } + static void emit_passthrough( struct tnl_program *p, GLuint input, GLuint output ) @@ -756,6 +769,7 @@ static void emit_passthrough( struct tnl_program *p, emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); } + static struct ureg get_eye_position( struct tnl_program *p ) { if (is_undef(p->eye_position)) { @@ -803,7 +817,6 @@ static struct ureg get_eye_position_z( struct tnl_program *p ) } - static struct ureg get_eye_position_normalized( struct tnl_program *p ) { if (is_undef(p->eye_position_normalized)) { @@ -865,7 +878,6 @@ static struct ureg get_transformed_normal( struct tnl_program *p ) } - static void build_hpos( struct tnl_program *p ) { struct ureg pos = register_input( p, VERT_ATTRIB_POS ); @@ -891,7 +903,9 @@ static GLuint material_attrib( GLuint side, GLuint property ) side); } -/* Get a bitmask of which material values vary on a per-vertex basis. + +/** + * Get a bitmask of which material values vary on a per-vertex basis. */ static void set_material_flags( struct tnl_program *p ) { @@ -927,7 +941,9 @@ static struct ureg get_material( struct tnl_program *p, GLuint side, MAT_BIT_FRONT_AMBIENT | \ MAT_BIT_FRONT_DIFFUSE) << (side)) -/* Either return a precalculated constant value or emit code to + +/** + * Either return a precalculated constant value or emit code to * calculate these values dynamically in the case where material calls * are present between begin/end pairs. * @@ -970,6 +986,7 @@ static struct ureg get_lightprod( struct tnl_program *p, GLuint light, return register_param4(p, STATE_LIGHTPROD, light, side, property); } + static struct ureg calculate_light_attenuation( struct tnl_program *p, GLuint i, struct ureg VPpli, @@ -1226,7 +1243,6 @@ static void build_lighting( struct tnl_program *p ) struct ureg res0, res1; GLuint mask0, mask1; - if (count == nr_lights) { if (separate) { mask0 = WRITEMASK_XYZ; @@ -1247,7 +1263,6 @@ static void build_lighting( struct tnl_program *p ) res1 = _col1; } - if (!is_undef(att)) { /* light is attenuated by distance */ emit_op1(p, OPCODE_LIT, lit, 0, dots); @@ -1320,7 +1335,6 @@ static void build_lighting( struct tnl_program *p ) emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); - /* restore negate flag for next lighting */ dots = negate(dots); @@ -1395,6 +1409,7 @@ static void build_fog( struct tnl_program *p ) emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input); } } + static void build_reflect_texgen( struct tnl_program *p, struct ureg dest, @@ -1414,6 +1429,7 @@ static void build_reflect_texgen( struct tnl_program *p, release_temp(p, tmp); } + static void build_sphere_texgen( struct tnl_program *p, struct ureg dest, GLuint writemask ) @@ -1461,7 +1477,7 @@ static void build_texture_transform( struct tnl_program *p ) { GLuint i, j; - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) continue; @@ -1524,10 +1540,8 @@ static void build_texture_transform( struct tnl_program *p ) case TXG_NONE: copy_mask |= WRITEMASK_X << j; } - } - if (sphere_mask) { build_sphere_texgen(p, out_texgen, sphere_mask); } @@ -1610,6 +1624,7 @@ static void build_atten_pointsize( struct tnl_program *p ) release_temp(p, ut); } + /** * Emit constant point size. */ @@ -1620,6 +1635,7 @@ static void build_constant_pointsize( struct tnl_program *p ) emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, state_size); } + /** * Pass-though per-vertex point size, from user's point size array. */ diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index f72aa6a2882..8ce9b0ae69a 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -289,7 +289,7 @@ _mesa_GetBooleanv( GLenum pname, GLboolean *params ) params[0] = INT_TO_BOOLEAN(ctx->DrawBuffer->Visual.depthBits); break; case GL_DEPTH_CLEAR_VALUE: - params[0] = FLOAT_TO_BOOLEAN(ctx->Depth.Clear); + params[0] = FLOAT_TO_BOOLEAN(((GLfloat) ctx->Depth.Clear)); break; case GL_DEPTH_FUNC: params[0] = ENUM_TO_BOOLEAN(ctx->Depth.Func); @@ -2137,7 +2137,7 @@ _mesa_GetFloatv( GLenum pname, GLfloat *params ) params[0] = (GLfloat)(ctx->DrawBuffer->Visual.depthBits); break; case GL_DEPTH_CLEAR_VALUE: - params[0] = ctx->Depth.Clear; + params[0] = ((GLfloat) ctx->Depth.Clear); break; case GL_DEPTH_FUNC: params[0] = ENUM_TO_FLOAT(ctx->Depth.Func); @@ -3985,7 +3985,7 @@ _mesa_GetIntegerv( GLenum pname, GLint *params ) params[0] = ctx->DrawBuffer->Visual.depthBits; break; case GL_DEPTH_CLEAR_VALUE: - params[0] = FLOAT_TO_INT(ctx->Depth.Clear); + params[0] = FLOAT_TO_INT(((GLfloat) ctx->Depth.Clear)); break; case GL_DEPTH_FUNC: params[0] = ENUM_TO_INT(ctx->Depth.Func); diff --git a/src/mesa/main/get_gen.py b/src/mesa/main/get_gen.py index 152e378b4fe..a191b045d33 100644 --- a/src/mesa/main/get_gen.py +++ b/src/mesa/main/get_gen.py @@ -180,7 +180,7 @@ StateVars = [ ( "GL_DEPTH_BIAS", GLfloat, ["ctx->Pixel.DepthBias"], "", None ), ( "GL_DEPTH_BITS", GLint, ["ctx->DrawBuffer->Visual.depthBits"], "", None ), - ( "GL_DEPTH_CLEAR_VALUE", GLfloatN, ["ctx->Depth.Clear"], "", None ), + ( "GL_DEPTH_CLEAR_VALUE", GLfloatN, ["((GLfloat) ctx->Depth.Clear)"], "", None ), ( "GL_DEPTH_FUNC", GLenum, ["ctx->Depth.Func"], "", None ), ( "GL_DEPTH_RANGE", GLfloatN, [ "ctx->Viewport.Near", "ctx->Viewport.Far" ], "", None ), diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index c205b4b766d..4d86c547775 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -61,7 +61,7 @@ /** * \return GL_TRUE if type is packed pixel type, GL_FALSE otherwise. */ -static GLboolean +GLboolean _mesa_type_is_packed(GLenum type) { switch (type) { diff --git a/src/mesa/main/image.h b/src/mesa/main/image.h index 38e1374c20c..0e0bbd96d85 100644 --- a/src/mesa/main/image.h +++ b/src/mesa/main/image.h @@ -36,6 +36,9 @@ _mesa_swap2( GLushort *p, GLuint n ); extern void _mesa_swap4( GLuint *p, GLuint n ); +extern GLboolean +_mesa_type_is_packed(GLenum type); + extern GLint _mesa_sizeof_type( GLenum type ); diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 6cfd7ccc723..69d55923c37 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -930,6 +930,18 @@ _mesa_sprintf( char *str, const char *fmt, ... ) return r; } +/** Wrapper around vsnprintf() */ +int +_mesa_snprintf( char *str, size_t size, const char *fmt, ... ) +{ + int r; + va_list args; + va_start( args, fmt ); + r = vsnprintf( str, size, fmt, args ); + va_end( args ); + return r; +} + /** Wrapper around printf(), using vsprintf() for the formatting. */ void _mesa_printf( const char *fmtString, ... ) diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index bab00420710..92d5d0bb4a8 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -763,6 +763,9 @@ _mesa_strtod( const char *s, char **end ); extern int _mesa_sprintf( char *str, const char *fmt, ... ); +extern int +_mesa_snprintf( char *str, size_t size, const char *fmt, ... ); + extern void _mesa_printf( const char *fmtString, ... ); diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 9e051ace25b..3dd4b3391b9 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -41,7 +41,11 @@ bytes_per_pixel(GLenum datatype, GLuint comps) { GLint b = _mesa_sizeof_packed_type(datatype); assert(b >= 0); - return b * comps; + + if (_mesa_type_is_packed(datatype)) + return b; + else + return b * comps; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 2a48b7c5179..a17bf9c7b0c 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1,15 +1,9 @@ -/** - * \file mtypes.h - * Main Mesa data structures. - * - * Please try to mark derived values with a leading underscore ('_'). - */ - /* * Mesa 3-D graphics library * Version: 7.3 * * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,7 +23,12 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - +/** + * \file mtypes.h + * Main Mesa data structures. + * + * Please try to mark derived values with a leading underscore ('_'). + */ #ifndef TYPES_H #define TYPES_H @@ -641,7 +640,7 @@ struct gl_current_attrib GLfloat RasterColor[4]; GLfloat RasterSecondaryColor[4]; GLfloat RasterIndex; - GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4]; + GLfloat RasterTexCoords[MAX_TEXTURE_UNITS][4]; GLboolean RasterPosValid; /*@}*/ }; @@ -725,10 +724,10 @@ struct gl_enable_attrib GLboolean SampleCoverage; /* GL_ARB_multisample */ GLboolean SampleCoverageInvert; /* GL_ARB_multisample */ GLboolean RasterPositionUnclipped; /* GL_IBM_rasterpos_clip */ - GLuint Texture[MAX_TEXTURE_IMAGE_UNITS]; - GLuint TexGen[MAX_TEXTURE_COORD_UNITS]; + GLuint Texture[MAX_TEXTURE_UNITS]; + GLuint TexGen[MAX_TEXTURE_UNITS]; /* SGI_texture_color_table */ - GLboolean TextureColorTable[MAX_TEXTURE_IMAGE_UNITS]; + GLboolean TextureColorTable[MAX_TEXTURE_UNITS]; /* GL_ARB_vertex_program / GL_NV_vertex_program */ GLboolean VertexProgram; GLboolean VertexProgramPointSize; @@ -1071,7 +1070,7 @@ struct gl_point_attrib GLfloat Threshold; /**< GL_EXT_point_parameters */ GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */ GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */ - GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite */ + GLboolean CoordReplace[MAX_TEXTURE_UNITS]; /**< GL_ARB_point_sprite */ GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */ GLenum SpriteOrigin; /**< GL_ARB_point_sprite */ }; @@ -1559,7 +1558,7 @@ struct gl_texture_attrib * name multitexture */ /**@{*/ - GLuint CurrentUnit; /**< Active texture unit */ + GLuint CurrentUnit; /**< Active texture unit [0, MaxTextureImageUnits-1] */ GLbitfield _EnabledUnits; /**< one bit set for each really-enabled unit */ GLbitfield _EnabledCoordUnits; /**< one bit per enabled coordinate unit */ GLbitfield _GenFlags; /**< for texgen */ @@ -1876,7 +1875,7 @@ struct gl_program GLbitfield OutputsWritten; /**< Bitmask of which output regs are written to */ GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield TexturesUsed[MAX_TEXTURE_IMAGE_UNITS]; /**< TEXTURE_x_BIT bitmask */ + GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ @@ -2473,9 +2472,9 @@ struct gl_constants GLint MaxTextureRectSize; /* GL_NV_texture_rectangle */ GLuint MaxTextureCoordUnits; GLuint MaxTextureImageUnits; - GLuint MaxTextureUnits; /* = MIN(CoordUnits, ImageUnits) */ - GLfloat MaxTextureMaxAnisotropy; /* GL_EXT_texture_filter_anisotropic */ - GLfloat MaxTextureLodBias; /* GL_EXT_texture_lod_bias */ + GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */ + GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ + GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */ GLuint MaxArrayLockSize; GLint SubPixelBits; GLfloat MinPointSize, MaxPointSize; /* aliased */ @@ -2951,7 +2950,7 @@ struct __GLcontextRec struct gl_matrix_stack ModelviewMatrixStack; struct gl_matrix_stack ProjectionMatrixStack; struct gl_matrix_stack ColorMatrixStack; - struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_COORD_UNITS]; + struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS]; struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES]; struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */ /*@}*/ diff --git a/src/mesa/main/points.c b/src/mesa/main/points.c index 1fe697033f1..4c8fc1f72e0 100644 --- a/src/mesa/main/points.c +++ b/src/mesa/main/points.c @@ -257,7 +257,7 @@ _mesa_init_point(GLcontext *ctx) ctx->Point.PointSprite = GL_FALSE; /* GL_ARB/NV_point_sprite */ ctx->Point.SpriteRMode = GL_ZERO; /* GL_NV_point_sprite (only!) */ ctx->Point.SpriteOrigin = GL_UPPER_LEFT; /* GL_ARB_point_sprite */ - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { ctx->Point.CoordReplace[i] = GL_FALSE; /* GL_ARB/NV_point_sprite */ } } diff --git a/src/mesa/main/rastpos.c b/src/mesa/main/rastpos.c index 9842172f460..9f309d6ab84 100644 --- a/src/mesa/main/rastpos.c +++ b/src/mesa/main/rastpos.c @@ -500,7 +500,7 @@ void _mesa_init_rastpos( GLcontext * ctx ) ASSIGN_4V( ctx->Current.RasterColor, 1.0, 1.0, 1.0, 1.0 ); ASSIGN_4V( ctx->Current.RasterSecondaryColor, 0.0, 0.0, 0.0, 1.0 ); ctx->Current.RasterIndex = 1.0; - for (i=0; i<MAX_TEXTURE_UNITS; i++) + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) ASSIGN_4V( ctx->Current.RasterTexCoords[i], 0.0, 0.0, 0.0, 1.0 ); ctx->Current.RasterPosValid = GL_TRUE; } diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index 2f90a342202..48abf51d890 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -38,6 +38,21 @@ #include "texenvprogram.h" +/* + * Note on texture units: + * + * The number of texture units supported by fixed-function fragment + * processing is MAX_TEXTURE_COORD_UNITS, not MAX_TEXTURE_IMAGE_UNITS. + * That's because there's a one-to-one correspondence between texture + * coordinates and samplers in fixed-function processing. + * + * Since fixed-function vertex processing is limited to MAX_TEXTURE_COORD_UNITS + * sets of texcoords, so is fixed-function fragment processing. + * + * We can safely use ctx->Const.MaxTextureUnits for loop bounds. + */ + + struct texenvprog_cache_item { GLuint hash; @@ -50,13 +65,13 @@ struct texenvprog_cache_item /** * Up to nine instructions per tex unit, plus fog, specular color. */ -#define MAX_INSTRUCTIONS ((MAX_TEXTURE_UNITS * 9) + 12) +#define MAX_INSTRUCTIONS ((MAX_TEXTURE_COORD_UNITS * 9) + 12) #define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM) struct mode_opt { - GLubyte Source:4; - GLubyte Operand:3; + GLuint Source:4; + GLuint Operand:3; }; struct state_key { @@ -313,7 +328,7 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) memset(key, 0, sizeof(*key)); - for (i=0;i<MAX_TEXTURE_UNITS;i++) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; GLenum format; @@ -416,7 +431,7 @@ struct texenv_fragment_program { GLbitfield temp_in_use; /**< Tracks temporary regs which are in use. */ GLboolean error; - struct ureg src_texture[MAX_TEXTURE_UNITS]; + struct ureg src_texture[MAX_TEXTURE_COORD_UNITS]; /* Reg containing each texture unit's sampled texture color, * else undef. */ @@ -1201,7 +1216,7 @@ create_new_program(GLcontext *ctx, struct state_key *key, p.program->Base.InputsRead = 0; p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLR; - for (unit = 0; unit < MAX_TEXTURE_UNITS; unit++) + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) p.src_texture[unit] = undef; p.src_previous = undef; diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index db3525666a2..50e8d7a3b87 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -1685,7 +1685,7 @@ _mesa_format_to_type_and_comps(const struct gl_texture_format *format, case MESA_FORMAT_ARGB1555: case MESA_FORMAT_ARGB1555_REV: *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV; - *comps = 3; + *comps = 4; return; case MESA_FORMAT_AL88: diff --git a/src/mesa/main/texrender.c b/src/mesa/main/texrender.c index 163bda45014..4ae13a7b9b5 100644 --- a/src/mesa/main/texrender.c +++ b/src/mesa/main/texrender.c @@ -49,6 +49,14 @@ texture_get_row(GLcontext *ctx, struct gl_renderbuffer *rb, GLuint count, trb->TexImage->FetchTexelc(trb->TexImage, x + i, y, z, rgbaOut + 4 * i); } } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + GLushort *zValues = (GLushort *) values; + for (i = 0; i < count; i++) { + GLfloat flt; + trb->TexImage->FetchTexelf(trb->TexImage, x + i, y, z, &flt); + zValues[i] = (GLushort) (flt * 0xffff); + } + } else if (rb->DataType == GL_UNSIGNED_INT) { GLuint *zValues = (GLuint *) values; /* @@ -96,6 +104,15 @@ texture_get_values(GLcontext *ctx, struct gl_renderbuffer *rb, GLuint count, z, rgbaOut + 4 * i); } } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + GLushort *zValues = (GLushort *) values; + for (i = 0; i < count; i++) { + GLfloat flt; + trb->TexImage->FetchTexelf(trb->TexImage, x[i], y[i] + trb->Yoffset, + z, &flt); + zValues[i] = (GLushort) (flt * 0xffff); + } + } else if (rb->DataType == GL_UNSIGNED_INT) { GLuint *zValues = (GLuint *) values; for (i = 0; i < count; i++) { @@ -147,6 +164,14 @@ texture_put_row(GLcontext *ctx, struct gl_renderbuffer *rb, GLuint count, rgba += 4; } } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + const GLushort *zValues = (const GLushort *) values; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + trb->Store(trb->TexImage, x + i, y, z, zValues + i); + } + } + } else if (rb->DataType == GL_UNSIGNED_INT) { const GLuint *zValues = (const GLuint *) values; for (i = 0; i < count; i++) { @@ -189,6 +214,14 @@ texture_put_mono_row(GLcontext *ctx, struct gl_renderbuffer *rb, GLuint count, } } } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + const GLushort zValue = *((const GLushort *) value); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + trb->Store(trb->TexImage, x + i, y, z, &zValue); + } + } + } else if (rb->DataType == GL_UNSIGNED_INT) { const GLuint zValue = *((const GLuint *) value); for (i = 0; i < count; i++) { @@ -231,12 +264,19 @@ texture_put_values(GLcontext *ctx, struct gl_renderbuffer *rb, GLuint count, rgba += 4; } } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + const GLushort *zValues = (const GLushort *) values; + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + trb->Store(trb->TexImage, x[i], y[i] + trb->Yoffset, z, zValues + i); + } + } + } else if (rb->DataType == GL_UNSIGNED_INT) { const GLuint *zValues = (const GLuint *) values; for (i = 0; i < count; i++) { if (!mask || mask[i]) { - trb->Store(trb->TexImage, x[i], y[i] + trb->Yoffset, z, - zValues + i); + trb->Store(trb->TexImage, x[i], y[i] + trb->Yoffset, z, zValues + i); } } } @@ -281,6 +321,14 @@ texture_put_mono_values(GLcontext *ctx, struct gl_renderbuffer *rb, } } } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + const GLushort zValue = *((const GLushort *) value); + for (i = 0; i < count; i++) { + if (!mask || mask[i]) { + trb->Store(trb->TexImage, x[i], y[i] + trb->Yoffset, z, &zValue); + } + } + } else if (rb->DataType == GL_UNSIGNED_INT_24_8_EXT) { const GLuint zValue = *((const GLuint *) value); const GLfloat flt = (GLfloat) ((zValue >> 8) * (1.0 / 0xffffff)); diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index f0193770412..29955d76cbe 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -81,7 +81,7 @@ _mesa_copy_texture_state( const GLcontext *src, GLcontext *dst ) dst->Texture.SharedPalette = src->Texture.SharedPalette; /* per-unit state */ - for (i = 0; i < src->Const.MaxTextureUnits; i++) { + for (i = 0; i < src->Const.MaxTextureImageUnits; i++) { dst->Texture.Unit[i].Enabled = src->Texture.Unit[i].Enabled; dst->Texture.Unit[i].EnvMode = src->Texture.Unit[i].EnvMode; COPY_4V(dst->Texture.Unit[i].EnvColor, src->Texture.Unit[i].EnvColor); @@ -307,8 +307,7 @@ _mesa_ActiveTextureARB(GLenum texture) _mesa_debug(ctx, "glActiveTexture %s\n", _mesa_lookup_enum_by_nr(texture)); - /* XXX error-check against max(coordunits, imageunits) */ - if (texUnit >= ctx->Const.MaxTextureUnits) { + if (texUnit >= ctx->Const.MaxTextureImageUnits) { _mesa_error(ctx, GL_INVALID_ENUM, "glActiveTexture(texture)"); return; } @@ -369,7 +368,7 @@ update_texture_matrices( GLcontext *ctx ) ctx->Texture._TexMatEnabled = 0; - for (i=0; i < ctx->Const.MaxTextureUnits; i++) { + for (i=0; i < ctx->Const.MaxTextureCoordUnits; i++) { if (_math_matrix_is_dirty(ctx->TextureMatrixStack[i].Top)) { _math_matrix_analyse( ctx->TextureMatrixStack[i].Top ); @@ -491,7 +490,7 @@ update_texture_state( GLcontext *ctx ) /* * Update texture unit state. */ - for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + for (unit = 0; unit < ctx->Const.MaxTextureImageUnits; unit++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; GLbitfield enableBits; @@ -621,7 +620,7 @@ update_texture_state( GLcontext *ctx ) } /* Setup texgen for those texture coordinate sets that are in use */ - for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + for (unit = 0; unit < ctx->Const.MaxTextureCoordUnits; unit++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; if (!(ctx->Texture._EnabledCoordUnits & (1 << unit))) diff --git a/src/mesa/shader/prog_parameter.c b/src/mesa/shader/prog_parameter.c index e6653157d46..8ae961241f6 100644 --- a/src/mesa/shader/prog_parameter.c +++ b/src/mesa/shader/prog_parameter.c @@ -290,7 +290,8 @@ _mesa_use_uniform(struct gl_program_parameter_list *paramList, GLuint i; for (i = 0; i < paramList->NumParameters; i++) { struct gl_program_parameter *p = paramList->Parameters + i; - if (p->Type == PROGRAM_UNIFORM && _mesa_strcmp(p->Name, name) == 0) { + if ((p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) && + _mesa_strcmp(p->Name, name) == 0) { p->Used = GL_TRUE; /* Note that large uniforms may occupy several slots so we're * not done searching yet. diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index 72e51f40314..d5358a1d042 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -79,10 +79,10 @@ typedef enum gl_state_index_ { STATE_SHININESS, STATE_HALF_VECTOR, - STATE_POSITION, - STATE_ATTENUATION, - STATE_SPOT_DIRECTION, - STATE_SPOT_CUTOFF, + STATE_POSITION, /**< xyzw = position */ + STATE_ATTENUATION, /**< xyz = attenuation, w = spot exponent */ + STATE_SPOT_DIRECTION, /**< xyz = direction, w = cos(cutoff) */ + STATE_SPOT_CUTOFF, /**< x = cutoff, yzw = undefined */ STATE_TEXGEN_EYE_S, STATE_TEXGEN_EYE_T, diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc index a051c53eea3..230c57cea8d 100644 --- a/src/mesa/shader/slang/library/slang_common_builtin.gc +++ b/src/mesa/shader/slang/library/slang_common_builtin.gc @@ -1,8 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.3 * * Copyright (C) 2006 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,8 +27,9 @@ // From Shader Spec, ver. 1.10, rev. 59 // -//bp: XXX these will probably go away since the value needs to be -//determined at runtime and may vary from one GLcontext to another... +// Note: the values assigned to these constants here aren't actually used. +// They're set by the compiler according to the GL context limits. +// See slang_simplify.c const int gl_MaxLights = 8; const int gl_MaxClipPlanes = 6; const int gl_MaxTextureUnits = 8; @@ -98,6 +100,9 @@ struct gl_MaterialParameters { uniform gl_MaterialParameters gl_FrontMaterial; uniform gl_MaterialParameters gl_BackMaterial; +/* NOTE: the order of these fields is significant! + * See the definition of the lighting state vars such as STATE_SPOT_DIRECTION. + */ struct gl_LightSourceParameters { vec4 ambient; vec4 diffuse; @@ -105,12 +110,14 @@ struct gl_LightSourceParameters { vec4 position; vec4 halfVector; vec3 spotDirection; - float spotExponent; - float spotCutoff; float spotCosCutoff; + float constantAttenuation; float linearAttenuation; float quadraticAttenuation; + float spotExponent; + + float spotCutoff; }; uniform gl_LightSourceParameters gl_LightSource[gl_MaxLights]; diff --git a/src/mesa/shader/slang/library/slang_common_builtin_gc.h b/src/mesa/shader/slang/library/slang_common_builtin_gc.h index 46632010815..759bf247d80 100644 --- a/src/mesa/shader/slang/library/slang_common_builtin_gc.h +++ b/src/mesa/shader/slang/library/slang_common_builtin_gc.h @@ -66,58 +66,58 @@ 116,83,111,117,114,99,101,80,97,114,97,109,101,116,101,114,115,0,12,0,97,109,98,105,101,110,116,0, 0,0,1,12,0,100,105,102,102,117,115,101,0,0,0,1,12,0,115,112,101,99,117,108,97,114,0,0,0,1,12,0,112, 111,115,105,116,105,111,110,0,0,0,1,12,0,104,97,108,102,86,101,99,116,111,114,0,0,0,1,11,0,115,112, -111,116,68,105,114,101,99,116,105,111,110,0,0,0,1,9,0,115,112,111,116,69,120,112,111,110,101,110, -116,0,0,0,1,9,0,115,112,111,116,67,117,116,111,102,102,0,0,0,1,9,0,115,112,111,116,67,111,115,67, -117,116,111,102,102,0,0,0,1,9,0,99,111,110,115,116,97,110,116,65,116,116,101,110,117,97,116,105, -111,110,0,0,0,1,9,0,108,105,110,101,97,114,65,116,116,101,110,117,97,116,105,111,110,0,0,0,1,9,0, -113,117,97,100,114,97,116,105,99,65,116,116,101,110,117,97,116,105,111,110,0,0,0,0,0,0,0,2,2,90,95, -4,0,25,103,108,95,76,105,103,104,116,83,111,117,114,99,101,80,97,114,97,109,101,116,101,114,115,0, -0,1,103,108,95,76,105,103,104,116,83,111,117,114,99,101,0,3,18,103,108,95,77,97,120,76,105,103,104, -116,115,0,0,0,2,2,90,95,0,0,24,103,108,95,76,105,103,104,116,77,111,100,101,108,80,97,114,97,109, -101,116,101,114,115,0,12,0,97,109,98,105,101,110,116,0,0,0,0,0,0,0,2,2,90,95,4,0,25,103,108,95,76, -105,103,104,116,77,111,100,101,108,80,97,114,97,109,101,116,101,114,115,0,0,1,103,108,95,76,105, -103,104,116,77,111,100,101,108,0,0,0,2,2,90,95,0,0,24,103,108,95,76,105,103,104,116,77,111,100,101, -108,80,114,111,100,117,99,116,115,0,12,0,115,99,101,110,101,67,111,108,111,114,0,0,0,0,0,0,0,2,2, -90,95,4,0,25,103,108,95,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,115,0,0,1, -103,108,95,70,114,111,110,116,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,0,0, -0,2,2,90,95,4,0,25,103,108,95,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,115, -0,0,1,103,108,95,66,97,99,107,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,0,0, -0,2,2,90,95,0,0,24,103,108,95,76,105,103,104,116,80,114,111,100,117,99,116,115,0,12,0,97,109,98, -105,101,110,116,0,0,0,1,12,0,100,105,102,102,117,115,101,0,0,0,1,12,0,115,112,101,99,117,108,97, -114,0,0,0,0,0,0,0,2,2,90,95,4,0,25,103,108,95,76,105,103,104,116,80,114,111,100,117,99,116,115,0,0, -1,103,108,95,70,114,111,110,116,76,105,103,104,116,80,114,111,100,117,99,116,0,3,18,103,108,95,77, -97,120,76,105,103,104,116,115,0,0,0,2,2,90,95,4,0,25,103,108,95,76,105,103,104,116,80,114,111,100, -117,99,116,115,0,0,1,103,108,95,66,97,99,107,76,105,103,104,116,80,114,111,100,117,99,116,0,3,18, -103,108,95,77,97,120,76,105,103,104,116,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,84,101,120,116, -117,114,101,69,110,118,67,111,108,111,114,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101, -73,109,97,103,101,85,110,105,116,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,69,121,101,80,108,97, +111,116,68,105,114,101,99,116,105,111,110,0,0,0,1,9,0,115,112,111,116,67,111,115,67,117,116,111, +102,102,0,0,0,1,9,0,99,111,110,115,116,97,110,116,65,116,116,101,110,117,97,116,105,111,110,0,0,0, +1,9,0,108,105,110,101,97,114,65,116,116,101,110,117,97,116,105,111,110,0,0,0,1,9,0,113,117,97,100, +114,97,116,105,99,65,116,116,101,110,117,97,116,105,111,110,0,0,0,1,9,0,115,112,111,116,69,120,112, +111,110,101,110,116,0,0,0,1,9,0,115,112,111,116,67,117,116,111,102,102,0,0,0,0,0,0,0,2,2,90,95,4,0, +25,103,108,95,76,105,103,104,116,83,111,117,114,99,101,80,97,114,97,109,101,116,101,114,115,0,0,1, +103,108,95,76,105,103,104,116,83,111,117,114,99,101,0,3,18,103,108,95,77,97,120,76,105,103,104,116, +115,0,0,0,2,2,90,95,0,0,24,103,108,95,76,105,103,104,116,77,111,100,101,108,80,97,114,97,109,101, +116,101,114,115,0,12,0,97,109,98,105,101,110,116,0,0,0,0,0,0,0,2,2,90,95,4,0,25,103,108,95,76,105, +103,104,116,77,111,100,101,108,80,97,114,97,109,101,116,101,114,115,0,0,1,103,108,95,76,105,103, +104,116,77,111,100,101,108,0,0,0,2,2,90,95,0,0,24,103,108,95,76,105,103,104,116,77,111,100,101,108, +80,114,111,100,117,99,116,115,0,12,0,115,99,101,110,101,67,111,108,111,114,0,0,0,0,0,0,0,2,2,90,95, +4,0,25,103,108,95,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,115,0,0,1,103, +108,95,70,114,111,110,116,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,0,0,0,2, +2,90,95,4,0,25,103,108,95,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,115,0,0, +1,103,108,95,66,97,99,107,76,105,103,104,116,77,111,100,101,108,80,114,111,100,117,99,116,0,0,0,2, +2,90,95,0,0,24,103,108,95,76,105,103,104,116,80,114,111,100,117,99,116,115,0,12,0,97,109,98,105, +101,110,116,0,0,0,1,12,0,100,105,102,102,117,115,101,0,0,0,1,12,0,115,112,101,99,117,108,97,114,0, +0,0,0,0,0,0,2,2,90,95,4,0,25,103,108,95,76,105,103,104,116,80,114,111,100,117,99,116,115,0,0,1,103, +108,95,70,114,111,110,116,76,105,103,104,116,80,114,111,100,117,99,116,0,3,18,103,108,95,77,97,120, +76,105,103,104,116,115,0,0,0,2,2,90,95,4,0,25,103,108,95,76,105,103,104,116,80,114,111,100,117,99, +116,115,0,0,1,103,108,95,66,97,99,107,76,105,103,104,116,80,114,111,100,117,99,116,0,3,18,103,108, +95,77,97,120,76,105,103,104,116,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,84,101,120,116,117,114, +101,69,110,118,67,111,108,111,114,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,73,109,97, +103,101,85,110,105,116,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,69,121,101,80,108,97,110,101,83,0, +3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12, +0,1,103,108,95,69,121,101,80,108,97,110,101,84,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114, +101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,69,121,101,80,108,97,110,101,82,0, +3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12, +0,1,103,108,95,69,121,101,80,108,97,110,101,81,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114, +101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,79,98,106,101,99,116,80,108,97, 110,101,83,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2, -90,95,4,0,12,0,1,103,108,95,69,121,101,80,108,97,110,101,84,0,3,18,103,108,95,77,97,120,84,101,120, -116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,69,121,101,80,108,97, -110,101,82,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2, -90,95,4,0,12,0,1,103,108,95,69,121,101,80,108,97,110,101,81,0,3,18,103,108,95,77,97,120,84,101,120, -116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,79,98,106,101,99,116, -80,108,97,110,101,83,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115, -0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,79,98,106,101,99,116,80,108,97,110,101,84,0,3,18,103,108,95, -77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95, -79,98,106,101,99,116,80,108,97,110,101,82,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101, -67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,79,98,106,101,99,116,80,108,97,110, -101,81,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90, -95,0,0,24,103,108,95,70,111,103,80,97,114,97,109,101,116,101,114,115,0,12,0,99,111,108,111,114,0,0, -0,1,9,0,100,101,110,115,105,116,121,0,0,0,1,9,0,115,116,97,114,116,0,0,0,1,9,0,101,110,100,0,0,0,1, -9,0,115,99,97,108,101,0,0,0,0,0,0,0,2,2,90,95,4,0,25,103,108,95,70,111,103,80,97,114,97,109,101, -116,101,114,115,0,0,1,103,108,95,70,111,103,0,0,0,1,90,95,0,0,9,0,0,114,97,100,105,97,110,115,0,1, -1,0,0,9,0,100,101,103,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49,53,57,50,54,0,0,17,49,56, -48,0,48,0,0,49,0,0,4,118,101,99,52,95,109,117,108,116,105,112,108,121,0,18,95,95,114,101,116,86,97, -108,0,0,18,100,101,103,0,0,18,99,0,0,0,0,1,90,95,0,0,10,0,0,114,97,100,105,97,110,115,0,1,1,0,0,10, -0,100,101,103,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49,53,57,50,54,0,0,17,49,56,48,0,48, -0,0,49,0,0,4,118,101,99,52,95,109,117,108,116,105,112,108,121,0,18,95,95,114,101,116,86,97,108,0, -59,120,121,0,0,18,100,101,103,0,59,120,121,0,0,18,99,0,59,120,120,0,0,0,0,1,90,95,0,0,11,0,0,114, -97,100,105,97,110,115,0,1,1,0,0,11,0,100,101,103,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52, -49,53,57,50,54,0,0,17,49,56,48,0,48,0,0,49,0,0,4,118,101,99,52,95,109,117,108,116,105,112,108,121, -0,18,95,95,114,101,116,86,97,108,0,59,120,121,122,0,0,18,100,101,103,0,59,120,121,122,0,0,18,99,0, -59,120,120,120,0,0,0,0,1,90,95,0,0,12,0,0,114,97,100,105,97,110,115,0,1,1,0,0,12,0,100,101,103,0,0, -0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49,53,57,50,54,0,0,17,49,56,48,0,48,0,0,49,0,0,4,118, +90,95,4,0,12,0,1,103,108,95,79,98,106,101,99,116,80,108,97,110,101,84,0,3,18,103,108,95,77,97,120, +84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,79,98,106, +101,99,116,80,108,97,110,101,82,0,3,18,103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111, +114,100,115,0,0,0,2,2,90,95,4,0,12,0,1,103,108,95,79,98,106,101,99,116,80,108,97,110,101,81,0,3,18, +103,108,95,77,97,120,84,101,120,116,117,114,101,67,111,111,114,100,115,0,0,0,2,2,90,95,0,0,24,103, +108,95,70,111,103,80,97,114,97,109,101,116,101,114,115,0,12,0,99,111,108,111,114,0,0,0,1,9,0,100, +101,110,115,105,116,121,0,0,0,1,9,0,115,116,97,114,116,0,0,0,1,9,0,101,110,100,0,0,0,1,9,0,115,99, +97,108,101,0,0,0,0,0,0,0,2,2,90,95,4,0,25,103,108,95,70,111,103,80,97,114,97,109,101,116,101,114, +115,0,0,1,103,108,95,70,111,103,0,0,0,1,90,95,0,0,9,0,0,114,97,100,105,97,110,115,0,1,1,0,0,9,0, +100,101,103,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49,53,57,50,54,0,0,17,49,56,48,0,48,0, +0,49,0,0,4,118,101,99,52,95,109,117,108,116,105,112,108,121,0,18,95,95,114,101,116,86,97,108,0,0, +18,100,101,103,0,0,18,99,0,0,0,0,1,90,95,0,0,10,0,0,114,97,100,105,97,110,115,0,1,1,0,0,10,0,100, +101,103,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49,53,57,50,54,0,0,17,49,56,48,0,48,0,0, +49,0,0,4,118,101,99,52,95,109,117,108,116,105,112,108,121,0,18,95,95,114,101,116,86,97,108,0,59, +120,121,0,0,18,100,101,103,0,59,120,121,0,0,18,99,0,59,120,120,0,0,0,0,1,90,95,0,0,11,0,0,114,97, +100,105,97,110,115,0,1,1,0,0,11,0,100,101,103,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49, +53,57,50,54,0,0,17,49,56,48,0,48,0,0,49,0,0,4,118,101,99,52,95,109,117,108,116,105,112,108,121,0, +18,95,95,114,101,116,86,97,108,0,59,120,121,122,0,0,18,100,101,103,0,59,120,121,122,0,0,18,99,0,59, +120,120,120,0,0,0,0,1,90,95,0,0,12,0,0,114,97,100,105,97,110,115,0,1,1,0,0,12,0,100,101,103,0,0,0, +1,3,2,90,95,1,0,9,0,1,99,0,2,17,51,0,49,52,49,53,57,50,54,0,0,17,49,56,48,0,48,0,0,49,0,0,4,118, 101,99,52,95,109,117,108,116,105,112,108,121,0,18,95,95,114,101,116,86,97,108,0,0,18,100,101,103,0, 0,18,99,0,59,120,120,120,120,0,0,0,0,1,90,95,0,0,9,0,0,100,101,103,114,101,101,115,0,1,1,0,0,9,0, 114,97,100,0,0,0,1,3,2,90,95,1,0,9,0,1,99,0,2,17,49,56,48,0,48,0,0,17,51,0,49,52,49,53,57,50,54,0, diff --git a/src/mesa/shader/slang/slang_builtin.c b/src/mesa/shader/slang/slang_builtin.c index db00c54b8a2..c0f4c79e136 100644 --- a/src/mesa/shader/slang/slang_builtin.c +++ b/src/mesa/shader/slang/slang_builtin.c @@ -1,8 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 6.5.3 + * Version: 7.3 * * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,6 +40,10 @@ #include "shader/slang/slang_builtin.h" +/** special state token (see below) */ +#define STATE_ARRAY ((gl_state_index) 0xfffff) + + /** * Lookup GL state given a variable name, 0, 1 or 2 indexes and a field. * Allocate room for the state in the given param list and return position @@ -132,6 +137,8 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, } } else if (strcmp(var, "gl_ClipPlane") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_CLIPPLANE; tokens[1] = index1; } @@ -196,8 +203,12 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, } } else if (strcmp(var, "gl_LightSource") == 0) { + if (!field || index1 < 0) + return -1; + tokens[0] = STATE_LIGHT; tokens[1] = index1; + if (strcmp(field, "ambient") == 0) { tokens[2] = STATE_AMBIENT; } @@ -272,6 +283,9 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, } else if (strcmp(var, "gl_FrontLightProduct") == 0 || strcmp(var, "gl_BackLightProduct") == 0) { + if (index1 < 0 || !field) + return -1; + tokens[0] = STATE_LIGHTPROD; tokens[1] = index1; /* light number */ if (strcmp(var, "gl_FrontLightProduct") == 0) { @@ -294,45 +308,63 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, } } else if (strcmp(var, "gl_TextureEnvColor") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXENV_COLOR; tokens[1] = index1; } else if (strcmp(var, "gl_EyePlaneS") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_EYE_S; } else if (strcmp(var, "gl_EyePlaneT") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_EYE_T; } else if (strcmp(var, "gl_EyePlaneR") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_EYE_R; } else if (strcmp(var, "gl_EyePlaneQ") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_EYE_Q; } else if (strcmp(var, "gl_ObjectPlaneS") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_OBJECT_S; } else if (strcmp(var, "gl_ObjectPlaneT") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_OBJECT_T; } else if (strcmp(var, "gl_ObjectPlaneR") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_OBJECT_R; } else if (strcmp(var, "gl_ObjectPlaneQ") == 0) { + if (index1 < 0) + return -1; tokens[0] = STATE_TEXGEN; tokens[1] = index1; /* tex unit */ tokens[2] = STATE_TEXGEN_OBJECT_Q; @@ -386,13 +418,222 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, } + +/** + * Given a variable name and datatype, emit uniform/constant buffer + * entries which will store that state variable. + * For example, if name="gl_LightSource" we'll emit 64 state variable + * vectors/references and return position where that data starts. This will + * allow run-time array indexing into the light source array. + * + * Note that this is a recursive function. + * + * \return -1 if error, else index of start of data in the program parameter list + */ +static GLint +emit_statevars(const char *name, int array_len, + const slang_type_specifier *type, + gl_state_index tokens[STATE_LENGTH], + struct gl_program_parameter_list *paramList) +{ + if (type->type == SLANG_SPEC_ARRAY) { + GLint i, pos; + assert(array_len > 0); + if (strcmp(name, "gl_ClipPlane") == 0) { + tokens[0] = STATE_CLIPPLANE; + } + else if (strcmp(name, "gl_LightSource") == 0) { + tokens[0] = STATE_LIGHT; + } + else if (strcmp(name, "gl_FrontLightProduct") == 0) { + tokens[0] = STATE_LIGHTPROD; + tokens[2] = 0; /* front */ + } + else if (strcmp(name, "gl_BackLightProduct") == 0) { + tokens[0] = STATE_LIGHTPROD; + tokens[2] = 1; /* back */ + } + else if (strcmp(name, "gl_TextureEnvColor") == 0) { + tokens[0] = STATE_TEXENV_COLOR; + } + else if (strcmp(name, "gl_EyePlaneS") == 0) { + tokens[0] = STATE_TEXGEN_EYE_S; + } + else if (strcmp(name, "gl_EyePlaneT") == 0) { + tokens[0] = STATE_TEXGEN_EYE_T; + } + else if (strcmp(name, "gl_EyePlaneR") == 0) { + tokens[0] = STATE_TEXGEN_EYE_R; + } + else if (strcmp(name, "gl_EyePlaneQ") == 0) { + tokens[0] = STATE_TEXGEN_EYE_Q; + } + else if (strcmp(name, "gl_ObjectPlaneS") == 0) { + tokens[0] = STATE_TEXGEN_OBJECT_S; + } + else if (strcmp(name, "gl_ObjectPlaneT") == 0) { + tokens[0] = STATE_TEXGEN_OBJECT_T; + } + else if (strcmp(name, "gl_ObjectPlaneR") == 0) { + tokens[0] = STATE_TEXGEN_OBJECT_R; + } + else if (strcmp(name, "gl_ObjectPlaneQ") == 0) { + tokens[0] = STATE_TEXGEN_OBJECT_Q; + } + else { + return -1; /* invalid array name */ + } + for (i = 0; i < array_len; i++) { + GLint p; + tokens[1] = i; + p = emit_statevars(NULL, 0, type->_array, tokens, paramList); + if (i == 0) + pos = p; + } + return pos; + } + else if (type->type == SLANG_SPEC_STRUCT) { + const slang_variable_scope *fields = type->_struct->fields; + GLuint i, pos; + for (i = 0; i < fields->num_variables; i++) { + const slang_variable *var = fields->variables[i]; + GLint p = emit_statevars(var->a_name, 0, &var->type.specifier, + tokens, paramList); + if (i == 0) + pos = p; + } + return pos; + } + else { + GLint pos; + assert(type->type == SLANG_SPEC_VEC4 || + type->type == SLANG_SPEC_VEC3 || + type->type == SLANG_SPEC_VEC2 || + type->type == SLANG_SPEC_FLOAT || + type->type == SLANG_SPEC_IVEC4 || + type->type == SLANG_SPEC_IVEC3 || + type->type == SLANG_SPEC_IVEC2 || + type->type == SLANG_SPEC_INT); + if (name) { + GLint t; + + if (tokens[0] == STATE_LIGHT) + t = 2; + else if (tokens[0] == STATE_LIGHTPROD) + t = 3; + else + return -1; /* invalid array name */ + + if (strcmp(name, "ambient") == 0) { + tokens[t] = STATE_AMBIENT; + } + else if (strcmp(name, "diffuse") == 0) { + tokens[t] = STATE_DIFFUSE; + } + else if (strcmp(name, "specular") == 0) { + tokens[t] = STATE_SPECULAR; + } + else if (strcmp(name, "position") == 0) { + tokens[t] = STATE_POSITION; + } + else if (strcmp(name, "halfVector") == 0) { + tokens[t] = STATE_HALF_VECTOR; + } + else if (strcmp(name, "spotDirection") == 0) { + tokens[t] = STATE_SPOT_DIRECTION; /* xyz components */ + } + else if (strcmp(name, "spotCosCutoff") == 0) { + tokens[t] = STATE_SPOT_DIRECTION; /* w component */ + } + + else if (strcmp(name, "constantAttenuation") == 0) { + tokens[t] = STATE_ATTENUATION; /* x component */ + } + else if (strcmp(name, "linearAttenuation") == 0) { + tokens[t] = STATE_ATTENUATION; /* y component */ + } + else if (strcmp(name, "quadraticAttenuation") == 0) { + tokens[t] = STATE_ATTENUATION; /* z component */ + } + else if (strcmp(name, "spotExponent") == 0) { + tokens[t] = STATE_ATTENUATION; /* w = spot exponent */ + } + + else if (strcmp(name, "spotCutoff") == 0) { + tokens[t] = STATE_SPOT_CUTOFF; /* x component */ + } + + else { + return -1; /* invalid field name */ + } + } + + pos = _mesa_add_state_reference(paramList, tokens); + return pos; + } + + return 1; +} + + +/** + * Unroll the named built-in uniform variable into a sequence of state + * vars in the given parameter list. + */ +static GLint +alloc_state_var_array(const slang_variable *var, + struct gl_program_parameter_list *paramList) +{ + gl_state_index tokens[STATE_LENGTH]; + GLuint i; + GLint pos; + + /* Initialize the state tokens array. This is very important. + * When we call _mesa_add_state_reference() it'll searches the parameter + * list to see if the given statevar token sequence is already present. + * This is normally a good thing since it prevents redundant values in the + * constant buffer. + * + * But when we're building arrays of state this can be bad. For example, + * consider this fragment of GLSL code: + * foo = gl_LightSource[3].diffuse; + * ... + * bar = gl_LightSource[i].diffuse; + * + * When we unroll the gl_LightSource array (for "bar") we want to re-emit + * gl_LightSource[3].diffuse and not re-use the first instance (from "foo") + * since that would upset the array layout. We handle this situation by + * setting the last token in the state var token array to the special + * value STATE_ARRAY. + * This token will only be set for array state. We can hijack the last + * element in the array for this since it's never used for light, clipplane + * or texture env array state. + */ + for (i = 0; i < STATE_LENGTH; i++) + tokens[i] = 0; + tokens[STATE_LENGTH - 1] = STATE_ARRAY; + + pos = emit_statevars(var->a_name, var->array_len, &var->type.specifier, + tokens, paramList); + + return pos; +} + + + /** * Allocate storage for a pre-defined uniform (a GL state variable). * As a memory-saving optimization, we try to only allocate storage for * state vars that are actually used. - * For example, the "gl_LightSource" uniform is huge. If we only use - * a handful of gl_LightSource fields, we don't want to allocate storage - * for all of gl_LightSource. + * + * Arrays such as gl_LightSource are handled specially. For an expression + * like "gl_LightSource[2].diffuse", we can allocate a single uniform/constant + * slot and return the index. In this case, we return direct=TRUE. + * + * Buf for something like "gl_LightSource[i].diffuse" we don't know the value + * of 'i' at compile time so we need to "unroll" the gl_LightSource array + * into a consecutive sequence of uniform/constant slots so it can be indexed + * at runtime. In this case, we return direct=FALSE. * * Currently, all pre-defined uniforms are in one of these forms: * var @@ -401,52 +642,62 @@ lookup_statevar(const char *var, GLint index1, GLint index2, const char *field, * var[i].field * var[i][j] * - * \return -1 upon error, else position in paramList of the state var/data + * \return -1 upon error, else position in paramList of the state variable/data */ GLint _slang_alloc_statevar(slang_ir_node *n, - struct gl_program_parameter_list *paramList) + struct gl_program_parameter_list *paramList, + GLboolean *direct) { slang_ir_node *n0 = n; - const char *field = NULL, *var; - GLint index1 = -1, index2 = -1, pos; + const char *field = NULL; + GLint index1 = -1, index2 = -1; GLuint swizzle; + *direct = GL_TRUE; + if (n->Opcode == IR_FIELD) { field = n->Field; n = n->Children[0]; } if (n->Opcode == IR_ELEMENT) { - /* XXX can only handle constant indexes for now */ if (n->Children[1]->Opcode == IR_FLOAT) { index1 = (GLint) n->Children[1]->Value[0]; - n = n->Children[0]; } else { - return -1; + *direct = GL_FALSE; } + n = n->Children[0]; } if (n->Opcode == IR_ELEMENT) { /* XXX can only handle constant indexes for now */ - assert(n->Children[1]->Opcode == IR_FLOAT); - index2 = (GLint) n->Children[1]->Value[0]; + if (n->Children[1]->Opcode == IR_FLOAT) { + index2 = (GLint) n->Children[1]->Value[0]; + } + else { + *direct = GL_FALSE; + } n = n->Children[0]; } assert(n->Opcode == IR_VAR); - var = (char *) n->Var->a_name; - pos = lookup_statevar(var, index1, index2, field, &swizzle, paramList); - assert(pos >= 0); - if (pos >= 0) { - /* newly resolved storage for the statevar/constant/uniform */ - n0->Store->File = PROGRAM_STATE_VAR; - n0->Store->Index = pos; - n0->Store->Swizzle = swizzle; - n0->Store->Parent = NULL; + if (*direct) { + const char *var = (const char *) n->Var->a_name; + GLint pos = + lookup_statevar(var, index1, index2, field, &swizzle, paramList); + if (pos >= 0) { + /* newly resolved storage for the statevar/constant/uniform */ + n0->Store->File = PROGRAM_STATE_VAR; + n0->Store->Index = pos; + n0->Store->Swizzle = swizzle; + n0->Store->Parent = NULL; + return pos; + } } - return pos; -} + *direct = GL_FALSE; + return alloc_state_var_array(n->Var, paramList); +} diff --git a/src/mesa/shader/slang/slang_builtin.h b/src/mesa/shader/slang/slang_builtin.h index 58629f4f7fe..7f6fe80fcca 100644 --- a/src/mesa/shader/slang/slang_builtin.h +++ b/src/mesa/shader/slang/slang_builtin.h @@ -33,7 +33,8 @@ extern GLint _slang_alloc_statevar(slang_ir_node *n, - struct gl_program_parameter_list *paramList); + struct gl_program_parameter_list *paramList, + GLboolean *direct); #endif /* SLANG_BUILTIN_H */ diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 7d764cb5c1d..66615d3afd6 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -391,7 +391,7 @@ _slang_input_index(const char *name, GLenum target, GLuint *swizzleOut) const struct input_info *inputs = (target == GL_VERTEX_PROGRAM_ARB) ? vertInputs : fragInputs; - ASSERT(MAX_TEXTURE_UNITS == 8); /* if this fails, fix vertInputs above */ + ASSERT(MAX_TEXTURE_COORD_UNITS == 8); /* if this fails, fix vertInputs above */ for (i = 0; inputs[i].Name; i++) { if (strcmp(inputs[i].Name, name) == 0) { @@ -1976,7 +1976,7 @@ _slang_make_array_constructor(slang_assemble_ctx *A, slang_operation *oper) */ slang_variable *p = slang_variable_scope_grow(fun->parameters); char name[10]; - snprintf(name, sizeof(name), "p%d", i); + _mesa_snprintf(name, sizeof(name), "p%d", i); p->a_name = slang_atom_pool_atom(A->atoms, name); p->type.qualifier = SLANG_QUAL_CONST; p->type.specifier.type = baseType; @@ -3323,6 +3323,22 @@ is_store_writable(const slang_assemble_ctx *A, const slang_ir_storage *store) /** + * Walk up an IR storage path to compute the final swizzle. + * This is used when we find an expression such as "foo.xz.yx". + */ +static GLuint +root_swizzle(const slang_ir_storage *st) +{ + GLuint swizzle = st->Swizzle; + while (st->Parent) { + st = st->Parent; + swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle); + } + return swizzle; +} + + +/** * Generate IR tree for an assignment (=). */ static slang_ir_node * @@ -3397,9 +3413,9 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper) rhs = _slang_gen_operation(A, &oper->children[1]); if (lhs && rhs) { /* convert lhs swizzle into writemask */ + const GLuint swizzle = root_swizzle(lhs->Store); GLuint writemask, newSwizzle; - if (!swizzle_to_writemask(A, lhs->Store->Swizzle, - &writemask, &newSwizzle)) { + if (!swizzle_to_writemask(A, swizzle, &writemask, &newSwizzle)) { /* Non-simple writemask, need to swizzle right hand side in * order to put components into the right place. */ diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c index d8aefd64950..b8044b10600 100644 --- a/src/mesa/shader/slang/slang_compile.c +++ b/src/mesa/shader/slang/slang_compile.c @@ -1470,9 +1470,9 @@ parse_expression(slang_parse_ctx * C, slang_output_ctx * O, RETURN0; } else { + slang_operation array_size; array_constructor = GL_TRUE; /* parse the array constructor size */ - slang_operation array_size; slang_operation_construct(&array_size); if (!parse_expression(C, O, &array_size)) { slang_operation_destruct(&array_size); diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 6587f9cf276..d3b4e64b78d 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -310,24 +310,22 @@ storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st) dst->WriteMask = swizzle_to_writemask(swizzle); } else { - GLuint writemask; switch (size) { case 1: - writemask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0); + dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0); break; case 2: - writemask = WRITEMASK_XY; + dst->WriteMask = WRITEMASK_XY; break; case 3: - writemask = WRITEMASK_XYZ; + dst->WriteMask = WRITEMASK_XYZ; break; case 4: - writemask = WRITEMASK_XYZW; + dst->WriteMask = WRITEMASK_XYZW; break; default: ; /* error would have been caught above */ } - dst->WriteMask = writemask; } dst->RelAddr = relAddr; @@ -348,6 +346,10 @@ storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st) assert(index >= 0); while (st->Parent) { st = st->Parent; + if (st->Index < 0) { + /* an error should have been reported already */ + return; + } assert(st->Index >= 0); index += st->Index; swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle); @@ -1286,6 +1288,7 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n) /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */ assert(n->Children[0]->Store); + assert(n->Children[0]->Store->File == PROGRAM_SAMPLER); /* Store->Index is the sampler index */ assert(n->Children[0]->Store->Index >= 0); /* Store->Size is the texture target */ @@ -1295,6 +1298,10 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n) inst->TexSrcTarget = n->Children[0]->Store->Size; inst->TexSrcUnit = n->Children[0]->Store->Index; /* i.e. uniform's index */ + /* mark the sampler as being used */ + _mesa_use_uniform(emitInfo->prog->Parameters, + (char *) n->Children[0]->Var->a_name); + return inst; } @@ -1841,9 +1848,17 @@ emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n) root = root->Parent; if (root->File == PROGRAM_STATE_VAR) { - GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters); - assert(n->Store->Index == index); - return NULL; + GLboolean direct; + GLint index = + _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct); + if (index < 0) { + /* error */ + return NULL; + } + if (direct) { + n->Store->Index = index; + return NULL; /* all done */ + } } } @@ -1967,28 +1982,30 @@ emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n) * space for the ones that we actually use! */ if (root->File == PROGRAM_STATE_VAR) { - root->Index = _slang_alloc_statevar(n, emitInfo->prog->Parameters); - if (root->Index < 0) { + GLboolean direct; + GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct); + if (index < 0) { slang_info_log_error(emitInfo->log, "Error parsing state variable"); return NULL; } - return NULL; - } - else { - /* do codegen for struct */ - emit(emitInfo, n->Children[0]); - assert(n->Children[0]->Store->Index >= 0); + if (direct) { + root->Index = index; + return NULL; /* all done */ + } } + /* do codegen for struct */ + emit(emitInfo, n->Children[0]); + assert(n->Children[0]->Store->Index >= 0); + + fieldOffset = n->Store->Index; fieldSize = n->Store->Size; _slang_copy_ir_storage(n->Store, n->Children[0]->Store); n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4; - /* XXX test this: - n->Store->Index += fieldOffset / 4; - */ + n->Store->Size = fieldSize; switch (fieldSize) { case 1: @@ -2077,9 +2094,21 @@ emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n) assert(n->Store->File != PROGRAM_UNDEFINED); if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) { - n->Store->Index = _slang_alloc_statevar(n, emitInfo->prog->Parameters); + GLboolean direct; + GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct); + if (index < 0) { + /* error */ + char s[100]; + _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'", + (char *) n->Var->a_name); + slang_info_log_error(emitInfo->log, s); + return NULL; + } + + n->Store->Index = index; } - else if (n->Store->File == PROGRAM_UNIFORM) { + else if (n->Store->File == PROGRAM_UNIFORM || + n->Store->File == PROGRAM_SAMPLER) { /* mark var as used */ _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name); } diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 3e97c8672b6..3f953d86e74 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -1,8 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 7.2 + * Version: 7.3 * * Copyright (C) 2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -136,15 +137,15 @@ link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog) } if (!bits_agree(var->Flags, v->Flags, PROG_PARAM_BIT_CENTROID)) { char msg[100]; - snprintf(msg, sizeof(msg), - "centroid modifier mismatch for '%s'", var->Name); + _mesa_snprintf(msg, sizeof(msg), + "centroid modifier mismatch for '%s'", var->Name); link_error(shProg, msg); return GL_FALSE; } if (!bits_agree(var->Flags, v->Flags, PROG_PARAM_BIT_INVARIANT)) { char msg[100]; - snprintf(msg, sizeof(msg), - "invariant modifier mismatch for '%s'", var->Name); + _mesa_snprintf(msg, sizeof(msg), + "invariant modifier mismatch for '%s'", var->Name); link_error(shProg, msg); return GL_FALSE; } @@ -206,13 +207,27 @@ link_varying_vars(struct gl_shader_program *shProg, struct gl_program *prog) * Build the shProg->Uniforms list. * This is basically a list/index of all uniforms found in either/both of * the vertex and fragment shaders. + * + * About uniforms: + * Each uniform has two indexes, one that points into the vertex + * program's parameter array and another that points into the fragment + * program's parameter array. When the user changes a uniform's value + * we have to change the value in the vertex and/or fragment program's + * parameter array. + * + * This function will be called twice to set up the two uniform->parameter + * mappings. + * + * If a uniform is only present in the vertex program OR fragment program + * then the fragment/vertex parameter index, respectively, will be -1. */ static GLboolean -link_uniform_vars(struct gl_shader_program *shProg, +link_uniform_vars(GLcontext *ctx, + struct gl_shader_program *shProg, struct gl_program *prog, GLuint *numSamplers) { - GLuint samplerMap[MAX_SAMPLERS]; + GLuint samplerMap[200]; /* max number of samplers declared, not used */ GLuint i; for (i = 0; i < prog->Parameters->NumParameters; i++) { @@ -227,30 +242,41 @@ link_uniform_vars(struct gl_shader_program *shProg, * Furthermore, we'll need to fix the state-var's size/datatype info. */ - if ((p->Type == PROGRAM_UNIFORM && p->Used) || - p->Type == PROGRAM_SAMPLER) { + if ((p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) + && p->Used) { + /* add this uniform, indexing into the target's Parameters list */ struct gl_uniform *uniform = _mesa_append_uniform(shProg->Uniforms, p->Name, prog->Target, i); if (uniform) uniform->Initialized = p->Initialized; } - if (p->Type == PROGRAM_SAMPLER) { + /* The samplerMap[] table we build here is used to remap/re-index + * sampler references by TEX instructions. + */ + if (p->Type == PROGRAM_SAMPLER && p->Used) { /* Allocate a new sampler index */ - GLuint sampNum = *numSamplers; GLuint oldSampNum = (GLuint) prog->Parameters->ParameterValues[i][0]; - if (oldSampNum >= MAX_SAMPLERS) { - link_error(shProg, "Too many texture samplers"); + GLuint newSampNum = *numSamplers; + if (newSampNum >= ctx->Const.MaxTextureImageUnits) { + char s[100]; + sprintf(s, "Too many texture samplers (%u, max is %u)", + newSampNum, ctx->Const.MaxTextureImageUnits); + link_error(shProg, s); return GL_FALSE; } - samplerMap[oldSampNum] = sampNum; + /* save old->new mapping in the table */ + if (oldSampNum < Elements(samplerMap)) + samplerMap[oldSampNum] = newSampNum; + /* update parameter's sampler index */ + prog->Parameters->ParameterValues[i][0] = (GLfloat) newSampNum; (*numSamplers)++; } } - - /* OK, now scan the program/shader instructions looking for sampler vars, - * replacing the old index with the new index. + /* OK, now scan the program/shader instructions looking for texture + * instructions using sampler vars. Replace old sampler indexes with + * new ones. */ prog->SamplersUsed = 0x0; for (i = 0; i < prog->NumInstructions; i++) { @@ -261,10 +287,13 @@ link_uniform_vars(struct gl_shader_program *shProg, inst->Sampler, map[ inst->Sampler ]); */ /* here, texUnit is really samplerUnit */ - assert(inst->TexSrcUnit < MAX_SAMPLERS); - inst->TexSrcUnit = samplerMap[inst->TexSrcUnit]; - prog->SamplerTargets[inst->TexSrcUnit] = inst->TexSrcTarget; - prog->SamplersUsed |= (1 << inst->TexSrcUnit); + const GLint oldSampNum = inst->TexSrcUnit; + if (oldSampNum < Elements(samplerMap)) { + const GLuint newSampNum = samplerMap[oldSampNum]; + inst->TexSrcUnit = newSampNum; + prog->SamplerTargets[newSampNum] = inst->TexSrcTarget; + prog->SamplersUsed |= (1 << newSampNum); + } } } @@ -564,13 +593,13 @@ _slang_link(GLcontext *ctx, /* link uniform vars */ if (shProg->VertexProgram) { - if (!link_uniform_vars(shProg, &shProg->VertexProgram->Base, + if (!link_uniform_vars(ctx, shProg, &shProg->VertexProgram->Base, &numSamplers)) { return; } } if (shProg->FragmentProgram) { - if (!link_uniform_vars(shProg, &shProg->FragmentProgram->Base, + if (!link_uniform_vars(ctx, shProg, &shProg->FragmentProgram->Base, &numSamplers)) { return; } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 18ffc08c6b1..21f7321f97a 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -53,7 +53,7 @@ update_textures(struct st_context *st) st->state.num_textures = 0; /* loop over sampler units (aka tex image units) */ - for (su = 0; su < st->ctx->Const.MaxTextureCoordUnits; su++) { + for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { struct pipe_texture *pt = NULL; if (samplersUsed & (1 << su)) { diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 3eaccb74e17..8867ca5652e 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -162,7 +162,7 @@ rastpos_point(struct draw_stage *stage, struct prim_header *prim) ctx->Current.RasterSecondaryColor, VERT_RESULT_COL1, VERT_ATTRIB_COLOR1); - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { update_attrib(ctx, outputMapping, prim->v[0], ctx->Current.RasterTexCoords[i], VERT_RESULT_TEX0 + i, VERT_ATTRIB_TEX0 + i); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 5ff0c611477..8ec2113f074 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -84,14 +84,18 @@ void st_init_limits(struct st_context *st) c->MaxTextureRectSize = _min(1 << (c->MaxTextureLevels - 1), MAX_TEXTURE_RECT_SIZE); - c->MaxTextureUnits - = c->MaxTextureImageUnits - = c->MaxTextureCoordUnits + c->MaxTextureImageUnits = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS), MAX_TEXTURE_IMAGE_UNITS); c->MaxVertexTextureImageUnits - = screen->get_param(screen, PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS); + = _min(screen->get_param(screen, PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS), + MAX_VERTEX_TEXTURE_IMAGE_UNITS); + + c->MaxTextureCoordUnits + = _min(c->MaxTextureImageUnits, MAX_TEXTURE_COORD_UNITS); + + c->MaxTextureUnits = _min(c->MaxTextureImageUnits, c->MaxTextureCoordUnits); c->MaxDrawBuffers = _clamp(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS), diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index 43ac195e67c..ea22a943039 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -170,7 +170,6 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf) { GET_CURRENT_CONTEXT(ctx); - struct st_context *st; static const GLuint invalid_size = 9999999; struct st_renderbuffer *strb; GLuint width, height, i; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index ce8b4411940..5b5707fa1c5 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -745,6 +745,7 @@ find_temporaries(const struct gl_program *program, */ GLuint st_translate_mesa_program( + GLcontext *ctx, uint procType, const struct gl_program *program, GLuint numInputs, @@ -992,7 +993,7 @@ st_translate_mesa_program( } /* texture samplers */ - for (i = 0; i < 8; i++) { + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (program->SamplersUsed & (1 << i)) { struct tgsi_full_declaration fulldecl; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 7b2bee1ab70..b465b3bddcf 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -41,6 +41,7 @@ struct gl_program; GLuint st_translate_mesa_program( + GLcontext *ctx, uint procType, const struct gl_program *program, GLuint numInputs, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index cf4b39cee40..442eeed1470 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -317,7 +317,8 @@ st_translate_vertex_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - num_tokens = st_translate_mesa_program(TGSI_PROCESSOR_VERTEX, + num_tokens = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, &stvp->Base.Base, /* inputs */ vs_num_inputs, @@ -503,7 +504,8 @@ st_translate_fragment_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - num_tokens = st_translate_mesa_program(TGSI_PROCESSOR_FRAGMENT, + num_tokens = st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, &stfp->Base.Base, /* inputs */ fs_num_inputs, diff --git a/src/mesa/state_tracker/wgl/SConscript b/src/mesa/state_tracker/wgl/SConscript index cceb8264ead..bb579930f52 100644 --- a/src/mesa/state_tracker/wgl/SConscript +++ b/src/mesa/state_tracker/wgl/SConscript @@ -11,8 +11,9 @@ if env['platform'] in ['windows']: ]) env.Append(CPPDEFINES = [ + '_GDI32_', # prevent wgl* being declared __declspec(dllimport) + 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers '__GL_EXPORTS', - 'BUILD_GL32', '_GNU_H_WINDOWS32_DEFINES', ]) diff --git a/src/mesa/state_tracker/wgl/stw_device.c b/src/mesa/state_tracker/wgl/stw_device.c index 52907f1a792..e2a17d83ac6 100644 --- a/src/mesa/state_tracker/wgl/stw_device.c +++ b/src/mesa/state_tracker/wgl/stw_device.c @@ -37,8 +37,8 @@ struct stw_device *stw_dev = NULL; -static BOOL -st_init(void) +boolean +st_init(const struct stw_winsys *stw_winsys) { static struct stw_device stw_dev_storage; @@ -47,7 +47,9 @@ st_init(void) stw_dev = &stw_dev_storage; memset(stw_dev, 0, sizeof(*stw_dev)); - stw_dev->screen = stw_winsys.create_screen(); + stw_dev->stw_winsys = stw_winsys; + + stw_dev->screen = stw_winsys->create_screen(); if(!stw_dev->screen) goto error1; @@ -61,7 +63,7 @@ error1: } -static void +void st_cleanup(void) { DHGLRC dhglrc; @@ -76,18 +78,3 @@ st_cleanup(void) stw_dev = NULL; } - - -BOOL WINAPI -DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) -{ - switch (fdwReason) { - case DLL_PROCESS_ATTACH: - return st_init(); - - case DLL_PROCESS_DETACH: - st_cleanup(); - break; - } - return TRUE; -} diff --git a/src/mesa/state_tracker/wgl/stw_device.h b/src/mesa/state_tracker/wgl/stw_device.h index 49f79ac9c70..e2020bf0554 100644 --- a/src/mesa/state_tracker/wgl/stw_device.h +++ b/src/mesa/state_tracker/wgl/stw_device.h @@ -44,6 +44,8 @@ struct drv_context struct stw_device { + const struct stw_winsys *stw_winsys; + struct pipe_screen *screen; struct drv_context ctx_array[DRV_CONTEXT_MAX]; diff --git a/src/mesa/state_tracker/wgl/stw_framebuffer.c b/src/mesa/state_tracker/wgl/stw_framebuffer.c index 57b89eee966..1ecafa451ee 100644 --- a/src/mesa/state_tracker/wgl/stw_framebuffer.c +++ b/src/mesa/state_tracker/wgl/stw_framebuffer.c @@ -25,9 +25,8 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> + #include "main/context.h" #include "pipe/p_format.h" #include "state_tracker/st_context.h" diff --git a/src/mesa/state_tracker/wgl/stw_icd.c b/src/mesa/state_tracker/wgl/stw_icd.c index 17bdbd15fac..1dddc242097 100644 --- a/src/mesa/state_tracker/wgl/stw_icd.c +++ b/src/mesa/state_tracker/wgl/stw_icd.c @@ -29,12 +29,12 @@ #include <stdio.h> #include "GL/gl.h" -#include "GL/mesa_wgl.h" #include "pipe/p_debug.h" #include "stw_device.h" #include "stw_icd.h" +#include "stw_wgl.h" static HGLRC diff --git a/src/mesa/state_tracker/wgl/stw_icd.h b/src/mesa/state_tracker/wgl/stw_icd.h index 7e2edca16ec..8e676fb5b74 100644 --- a/src/mesa/state_tracker/wgl/stw_icd.h +++ b/src/mesa/state_tracker/wgl/stw_icd.h @@ -31,9 +31,7 @@ #include <windows.h> - #include "GL/gl.h" -#include "GL/mesa_wgl.h" typedef ULONG DHGLRC; diff --git a/src/mesa/state_tracker/wgl/stw_wgl.c b/src/mesa/state_tracker/wgl/stw_wgl.c index 6cace957458..0528c369fc8 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl.c +++ b/src/mesa/state_tracker/wgl/stw_wgl.c @@ -25,8 +25,6 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> #include "pipe/p_debug.h" diff --git a/src/mesa/state_tracker/wgl/stw_wgl_pixelformat.h b/src/mesa/state_tracker/wgl/stw_wgl.h index ee875c7a1d2..b86cc240f28 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_pixelformat.h +++ b/src/mesa/state_tracker/wgl/stw_wgl.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2009 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,26 +10,54 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#ifndef WGL_PIXELFORMAT_H -#define WGL_PIXELFORMAT_H +#ifndef STW_WGL_H_ +#define STW_WGL_H_ -WINGDIAPI int APIENTRY -wglGetPixelFormat( - HDC hdc ); -#endif /* WGL_PIXELFORMAT_H */ +#include <windows.h> + +#include "GL/gl.h" + + +/* + * Undeclared APIs exported by opengl32.dll + */ + +WINGDIAPI BOOL WINAPI +wglSwapBuffers(HDC hdc); + +WINGDIAPI int WINAPI +wglChoosePixelFormat(HDC hdc, + CONST PIXELFORMATDESCRIPTOR *ppfd); + +WINGDIAPI int WINAPI +wglDescribePixelFormat(HDC hdc, + int iPixelFormat, + UINT nBytes, + LPPIXELFORMATDESCRIPTOR ppfd); + +WINGDIAPI int WINAPI +wglGetPixelFormat(HDC hdc); + +WINGDIAPI BOOL WINAPI +wglSetPixelFormat(HDC hdc, + int iPixelFormat, + CONST PIXELFORMATDESCRIPTOR *ppfd); + + +#endif /* STW_WGL_H_ */ diff --git a/src/mesa/state_tracker/wgl/stw_wgl_arbextensionsstring.c b/src/mesa/state_tracker/wgl/stw_wgl_arbextensionsstring.c index fe3a3c1daa9..04865796ec8 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_arbextensionsstring.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_arbextensionsstring.c @@ -25,9 +25,8 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> + #include "stw_wgl_arbextensionsstring.h" WINGDIAPI const char * APIENTRY diff --git a/src/mesa/state_tracker/wgl/stw_wgl_arbpixelformat.c b/src/mesa/state_tracker/wgl/stw_wgl_arbpixelformat.c index 14a7c5e1e06..344bb15d3c1 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_arbpixelformat.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_arbpixelformat.c @@ -25,8 +25,6 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> #include "pipe/p_compiler.h" diff --git a/src/mesa/state_tracker/wgl/stw_wgl_context.c b/src/mesa/state_tracker/wgl/stw_wgl_context.c index 59b47200be4..0c13c6b68ab 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_context.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_context.c @@ -25,8 +25,6 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> #include "main/mtypes.h" @@ -41,7 +39,7 @@ #include "stw_pixelformat.h" #include "stw_wgl_arbmultisample.h" #include "stw_wgl_context.h" -#include "stw_wgl_pixelformat.h" +#include "stw_wgl.h" static struct wgl_context *ctx_head = NULL; @@ -107,7 +105,7 @@ wglCreateContext( return NULL; } - pipe = stw_winsys.create_context( stw_dev->screen ); + pipe = stw_dev->stw_winsys->create_context( stw_dev->screen ); if (!pipe) { _mesa_destroy_visual( visual ); FREE( ctx ); diff --git a/src/mesa/state_tracker/wgl/stw_wgl_getprocaddress.c b/src/mesa/state_tracker/wgl/stw_wgl_getprocaddress.c index ec4f1513cb8..ec92d2dfce2 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_getprocaddress.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_getprocaddress.c @@ -25,9 +25,8 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> + #include "glapi/glapi.h" #include "stw_wgl_arbextensionsstring.h" #include "stw_wgl_arbpixelformat.h" diff --git a/src/mesa/state_tracker/wgl/stw_wgl_pixelformat.c b/src/mesa/state_tracker/wgl/stw_wgl_pixelformat.c index bfc085093a8..7a8a2e22e49 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_pixelformat.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_pixelformat.c @@ -25,14 +25,12 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "stw_pixelformat.h" -#include "stw_wgl_pixelformat.h" +#include "stw_wgl.h" static uint currentpixelformat = 0; diff --git a/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c b/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c index a4dffc5fa0e..bd86501ac04 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c @@ -25,9 +25,8 @@ * **************************************************************************/ -#define _GDI32_ - #include <windows.h> + #include "pipe/p_winsys.h" #include "pipe/p_screen.h" #include "pipe/p_context.h" @@ -36,7 +35,7 @@ #include "stw_winsys.h" #include "stw_device.h" #include "stw_framebuffer.h" -#include "stw_wgl_context.h" +#include "stw_wgl.h" WINGDIAPI BOOL APIENTRY wglSwapBuffers( @@ -56,9 +55,9 @@ wglSwapBuffers( surf = st_get_framebuffer_surface( fb->stfb, ST_SURFACE_BACK_LEFT ); - stw_winsys.flush_frontbuffer(stw_dev->screen->winsys, - surf, - hdc ); + stw_dev->stw_winsys->flush_frontbuffer(stw_dev->screen->winsys, + surf, + hdc ); return TRUE; } diff --git a/src/mesa/state_tracker/wgl/stw_winsys.h b/src/mesa/state_tracker/wgl/stw_winsys.h index 68f1c7b16b7..8557327ccdd 100644 --- a/src/mesa/state_tracker/wgl/stw_winsys.h +++ b/src/mesa/state_tracker/wgl/stw_winsys.h @@ -30,9 +30,12 @@ #include <windows.h> /* for HDC */ +#include "pipe/p_compiler.h" + struct pipe_screen; struct pipe_context; struct pipe_winsys; +struct pipe_surface; struct stw_winsys { @@ -48,6 +51,10 @@ struct stw_winsys HDC hDC ); }; -extern const struct stw_winsys stw_winsys; +boolean +st_init(const struct stw_winsys *stw_winsys); + +void +st_cleanup(void); #endif /* STW_WINSYS_H */ |