diff options
Diffstat (limited to 'src')
43 files changed, 18518 insertions, 21 deletions
diff --git a/src/mesa/drivers/dri/r600/Lindent b/src/mesa/drivers/dri/r600/Lindent new file mode 100755 index 00000000000..7d8d8896e30 --- /dev/null +++ b/src/mesa/drivers/dri/r600/Lindent @@ -0,0 +1,2 @@ +#!/bin/sh +indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@" diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile new file mode 100644 index 00000000000..e1fc4061ad6 --- /dev/null +++ b/src/mesa/drivers/dri/r600/Makefile @@ -0,0 +1,118 @@ +# src/mesa/drivers/dri/r300/Makefile + +TOP = ../../../../.. +include $(TOP)/configs/current + +CFLAGS += $(RADEON_CFLAGS) + +LIBNAME = r600_dri.so + +MINIGLX_SOURCES = server/radeon_dri.c + +ifeq ($(USING_EGL), 1) +EGL_SOURCES = server/radeon_egl.c +endif + +COMMON_SOURCES = \ + ../../common/driverfuncs.c \ + ../common/mm.c \ + ../common/utils.c \ + ../common/texmem.c \ + ../common/vblank.c \ + ../common/xmlconfig.c \ + ../common/dri_util.c + +RADEON_COMMON_SOURCES = \ + radeon_texture.c \ + radeon_common_context.c \ + radeon_common.c \ + radeon_dma.c \ + radeon_lock.c \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_mipmap_tree.c \ + radeon_span.c \ + radeon_fbo.c + +DRIVER_SOURCES = \ + radeon_screen.c \ + r600_context.c \ + r600_ioctl.c \ + r600_cmdbuf.c \ + r600_state.c \ + r600_render.c \ + r600_tex.c \ + r600_texstate.c \ + radeon_program.c \ + radeon_program_alu.c \ + radeon_program_pair.c \ + radeon_nqssadce.c \ + r600_vertprog.c \ + r600_fragprog.c \ + r600_fragprog_swizzle.c \ + r600_fragprog_emit.c \ + r700_fragprog.c \ + r700_fragprog_emit.c \ + r600_shader.c \ + r600_emit.c \ + r600_swtcl.c \ + $(RADEON_COMMON_SOURCES) \ + $(EGL_SOURCES) + +C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) + +DRIVER_DEFINES = -DCOMPILE_R600 -DR200_MERGED=0 \ + -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600 \ +# -DRADEON_BO_TRACK \ + -Wall + +SYMLINKS = \ + server/radeon_dri.c \ + server/radeon_dri.h \ + server/radeon.h \ + server/radeon_macros.h \ + server/radeon_reg.h \ + server/radeon_egl.c + +COMMON_SYMLINKS = \ + radeon_chipset.h \ + radeon_screen.c \ + radeon_screen.h \ + radeon_span.h \ + radeon_span.c \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_bo_legacy.h \ + radeon_cs_legacy.h \ + radeon_bocs_wrapper.h \ + radeon_lock.c \ + radeon_lock.h \ + radeon_common.c \ + radeon_common.h \ + radeon_common_context.c \ + radeon_common_context.h \ + radeon_cmdbuf.h \ + radeon_dma.c \ + radeon_dma.h \ + radeon_mipmap_tree.c \ + radeon_mipmap_tree.h \ + radeon_texture.c \ + radeon_texture.h \ + radeon_fbo.c + +DRI_LIB_DEPS += $(RADEON_LDFLAGS) + +##### TARGETS ##### + +include ../Makefile.template + +server: + mkdir -p server + +$(SYMLINKS): server + @[ -e $@ ] || ln -sf ../../radeon/$@ server/ + +$(COMMON_SYMLINKS): + @[ -e $@ ] || ln -sf ../radeon/$@ ./ + +symlinks: $(SYMLINKS) $(COMMON_SYMLINKS) diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c new file mode 100644 index 00000000000..1ae6dc82bd4 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -0,0 +1,669 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <[email protected]> + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/simple_list.h" +#include "swrast/swrast.h" + +#include "drm.h" +#include "radeon_drm.h" + +#include "r600_context.h" +#include "r600_ioctl.h" +#include "radeon_reg.h" +#include "r600_reg.h" +#include "r600_cmdbuf.h" +#include "r600_emit.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_mipmap_tree.h" +#include "r600_state.h" +#include "radeon_reg.h" + +#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 +# define RADEON_ONE_REG_WR (1 << 15) + +/** # of dwords reserved for additional instructions that may need to be written + * during flushing. + */ +#define SPACE_FOR_FLUSHING 4 + +static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) +{ + if (r300->radeon.radeonScreen->kernel_mm) { + return ((((*pkt) >> 16) & 0x3FFF) + 1); + } else { + drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt; + return t->packet0.count; + } + return 0; +} + +#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) + +void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw, i; + + if (!r300->radeon.radeonScreen->kernel_mm) { + uint32_t dwords; + dwords = (*atom->check) (ctx, atom); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + return; + } + + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + ndw = cmd.vpu.count * 4; + if (ndw) { + + if (r300->vap_flush_needed) { + BEGIN_BATCH_NO_AUTOSTATE(15 + ndw); + + /* flush processing vertices */ + OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0); + OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + r300->vap_flush_needed = GL_FALSE; + } else { + BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); + } + OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); + for (i = 0; i < ndw; i++) { + OUT_BATCH(atom->cmd[i+1]); + } + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); + } +} + +void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw, i, sz; + int type, clamp, stride; + + if (!r300->radeon.radeonScreen->kernel_mm) { + uint32_t dwords; + dwords = (*atom->check) (ctx, atom); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + return; + } + + cmd.u = atom->cmd[0]; + sz = cmd.r500fp.count; + addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; + type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); + clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); + + addr |= (type << 16); + addr |= (clamp << 17); + + stride = type ? 4 : 6; + + ndw = sz * stride; + if (ndw) { + + BEGIN_BATCH_NO_AUTOSTATE(3 + ndw); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); + for (i = 0; i < ndw; i++) { + OUT_BATCH(atom->cmd[i+1]); + } + END_BATCH(); + } +} + +static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int notexture = 0; + + if (numtmus) { + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + + if (!t) + notexture = 1; + } + + if (r300->radeon.radeonScreen->kernel_mm && notexture) { + return; + } + BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus); + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + if (t && !t->image_override) { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else if (!t) { + OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); + } else { /* override cases */ + if (t->bo) { + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + OUT_BATCH(t->override_offset); + } + else + OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]); + } + } + END_BATCH(); + } +} + +static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + uint32_t offset = r300->radeon.state.color.draw_offset; + + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R300_COLOR_TILE_ENABLE; + + BEGIN_BATCH_NO_AUTOSTATE(8); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); + OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH(0); + OUT_BATCH((rrb->width << R300_SCISSORS_X_SHIFT) | + (rrb->height << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | + (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET) << R300_SCISSORS_X_SHIFT) | + ((rrb->height + R300_SCISSORS_OFFSET) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + } + } +} + +static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t zbpitch; + + rrb = radeon_get_depthbuffer(&r300->radeon); + if (!rrb) + return; + + zbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + zbpitch |= R300_DEPTHMACROTILE_ENABLE; + } + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ + zbpitch |= R300_DEPTHMICROTILE_TILED; + } + + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch); + END_BATCH(); +} + +static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t format = 0; + + rrb = radeon_get_depthbuffer(&r300->radeon); + if (!rrb) + format = 0; + else { + if (rrb->cpp == 2) + format = R300_DEPTHFORMAT_16BIT_INT_Z; + else if (rrb->cpp == 4) + format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + } + + OUT_BATCH(atom->cmd[0]); + atom->cmd[1] &= ~0xf; + atom->cmd[1] |= format; + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + OUT_BATCH(atom->cmd[3]); + OUT_BATCH(atom->cmd[4]); +} + +static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) +{ + return atom->cmd_size; +} + +static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int cnt; + if (atom->cmd[0] == CP_PACKET2) { + return 0; + } + cnt = packet0_count(r300, atom->cmd); + return cnt ? cnt + 1 : 0; +} + +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) +{ + int cnt; + + cnt = vpu_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + +int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) +{ + int cnt; + + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 6) + 1 : 0; +} + +int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) +{ + int cnt; + + cnt = r500fp_count(atom->cmd); + return cnt ? (cnt * 4) + 1 : 0; +} + +#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \ + do { \ + r300->hw.ATOM.cmd_size = (SZ); \ + r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); \ + r300->hw.ATOM.name = #ATOM; \ + r300->hw.ATOM.idx = (IDX); \ + r300->hw.ATOM.check = check_##CHK; \ + r300->hw.ATOM.dirty = GL_FALSE; \ + r300->radeon.hw.max_state_size += (SZ); \ + insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \ + } while (0) +/** + * Allocate memory for the command buffer and initialize the state atom + * list. Note that the initial hardware state is set by r300InitState(). + */ +void r300InitCmdBuf(r300ContextPtr r300) +{ + int mtu; + int has_tcl = 1; + int is_r500 = 0; + int i; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & DEBUG_TEXTURE) { + fprintf(stderr, "Using %d maximum texture units..\n", mtu); + } + + /* Setup the atom linked list */ + make_empty_list(&r300->radeon.hw.atomlist); + r300->radeon.hw.atomlist.name = "atom-list"; + + /* Initialize state atoms */ + ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); + r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6); + ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1); + if (is_r500) { + ALLOC_STATE(vap_index_offset, always, 2, 0); + r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[1] = 0; + } + ALLOC_STATE(vte, always, 3, 0); + r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2); + ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); + r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2); + ALLOC_STATE(vap_cntl_status, always, 2, 0); + r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1); + ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); + r300->hw.vir[0].cmd[R300_VIR_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1); + ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); + r300->hw.vir[1].cmd[R300_VIR_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); + ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2); + ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); + r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); + + if (has_tcl) { + ALLOC_STATE(vap_clip_cntl, always, 2, 0); + r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1); + ALLOC_STATE(vap_clip, always, 5, 0); + r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4); + ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); + r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1); + } + + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); + r300->hw.vof.cmd[R300_VOF_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2); + + if (has_tcl) { + ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); + r300->hw.pvs.cmd[R300_PVS_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3); + } + + ALLOC_STATE(gb_enable, always, 2, 0); + r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); + ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); + r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5); + ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); + r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); + ALLOC_STATE(ga_point_s0, always, 5, 0); + r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4); + ALLOC_STATE(ga_triangle_stipple, always, 2, 0); + r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1); + ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); + r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1); + ALLOC_STATE(ga_point_minmax, always, 4, 0); + r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3); + ALLOC_STATE(lcntl, always, 2, 0); + r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); + ALLOC_STATE(ga_line_stipple, always, 4, 0); + r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); + ALLOC_STATE(shade, always, 5, 0); + r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4); + ALLOC_STATE(polygon_mode, always, 4, 0); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3); + ALLOC_STATE(fogp, always, 3, 0); + r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2); + ALLOC_STATE(zbias_cntl, always, 2, 0); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1); + ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); + r300->hw.zbs.cmd[R300_ZBS_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + ALLOC_STATE(occlusion_cntl, always, 2, 0); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1); + ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1); + ALLOC_STATE(su_depth_scale, always, 3, 0); + r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2); + ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); + r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2); + if (is_r500) { + ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16); + for (i = 0; i < 8; i++) { + r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); + } + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1); + } else { + ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8); + ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1); + } + ALLOC_STATE(sc_hyperz, always, 3, 0); + r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2); + ALLOC_STATE(sc_screendoor, always, 2, 0); + r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); + ALLOC_STATE(us_out_fmt, always, 6, 0); + r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5); + + if (is_r500) { + ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; + r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3); + r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ + + ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = + cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); + r300->hw.r500fp.emit = emit_r500fp; + ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = + cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); + r300->hw.r500fp_const.emit = emit_r500fp; + } else { + ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4); + + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0); + + ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1); + ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1); + ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1); + ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1); + ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0); + } + ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1); + ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3); + ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2); + ALLOC_STATE(fg_depth_src, always, 2, 0); + r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1); + ALLOC_STATE(rb3d_cctl, always, 2, 0); + r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1); + ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); + r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2); + ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0); + r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1); + if (is_r500) { + ALLOC_STATE(blend_color, always, 3, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2); + } else { + ALLOC_STATE(blend_color, always, 2, 0); + r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1); + } + ALLOC_STATE(rop, always, 2, 0); + r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); + ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); + r300->hw.cb.emit = &emit_cb_offset; + ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); + r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); + ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); + r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); + r300->hw.zs.cmd[R300_ZS_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); + + ALLOC_STATE(zstencil_format, always, 5, 0); + r300->hw.zstencil_format.cmd[0] = + cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); + r300->hw.zstencil_format.emit = emit_zstencil_format; + + ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); + r300->hw.zb.emit = emit_zb_offset; + ALLOC_STATE(zb_depthclearvalue, always, 2, 0); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); + ALLOC_STATE(unk4F30, always, 3, 0); + r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2); + ALLOC_STATE(zb_hiz_offset, always, 2, 0); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1); + ALLOC_STATE(zb_hiz_pitch, always, 2, 0); + r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1); + + /* VPU only on TCL */ + if (has_tcl) { + int i; + ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); + r300->hw.vpi.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); + r300->hw.vpi.emit = emit_vpu; + + if (is_r500) { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); + r300->hw.vps.emit = emit_vpu; + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R500_PVS_UCP_START + i, 1); + r300->hw.vpucp[i].emit = emit_vpu; + } + } else { + ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); + r300->hw.vpp.emit = emit_vpu; + + ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); + r300->hw.vps.emit = emit_vpu; + + for (i = 0; i < 6; i++) { + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R300_PVS_UCP_START + i, 1); + r300->hw.vpucp[i].emit = emit_vpu; + } + } + } + + /* Textures */ + ALLOC_STATE(tex.filter, variable, mtu + 1, 0); + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0); + + ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0); + + ALLOC_STATE(tex.size, variable, mtu + 1, 0); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0); + + ALLOC_STATE(tex.format, variable, mtu + 1, 0); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0); + + ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); + + ALLOC_STATE(tex.offset, variable, 1, 0); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); + r300->hw.tex.offset.emit = &emit_tex_offsets; + + ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0); + + ALLOC_STATE(tex.border_color, variable, mtu + 1, 0); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); + + r300->radeon.hw.is_dirty = GL_TRUE; + r300->radeon.hw.all_dirty = GL_TRUE; + + rcommonInitCmdBuf(&r300->radeon); +} diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h new file mode 100644 index 00000000000..eeaca96fdf9 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h @@ -0,0 +1,50 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <[email protected]> + */ + +#ifndef __R600_CMDBUF_H__ +#define __R600_CMDBUF_H__ + +#include "r600_context.h" + +extern void r300InitCmdBuf(r300ContextPtr r300); + +void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom); +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom); + +void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom); +int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom); +int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom); + +#endif /* __R600_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c new file mode 100644 index 00000000000..0b351b1237f --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -0,0 +1,472 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <[email protected]> + * + * \author Nicolai Haehnle <[email protected]> + */ + +#include "main/glheader.h" +#include "main/api_arrayelt.h" +#include "main/context.h" +#include "main/simple_list.h" +#include "main/imports.h" +#include "main/matrix.h" +#include "main/extensions.h" +#include "main/state.h" +#include "main/bufferobj.h" +#include "main/texobj.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "tnl/t_vp_build.h" + +#include "drivers/common/driverfuncs.h" + +#include "r600_context.h" +#include "radeon_context.h" +#include "radeon_span.h" +#include "r600_cmdbuf.h" +#include "r600_state.h" +#include "r600_ioctl.h" +#include "r600_tex.h" +#include "r600_emit.h" +#include "r600_swtcl.h" +#include "radeon_bocs_wrapper.h" + + +#include "vblank.h" +#include "utils.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ + +/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ +int future_hw_tcl_on = 1; +int hw_tcl_on = 1; + +#define need_GL_VERSION_2_0 +#define need_GL_ARB_point_parameters +#define need_GL_ARB_vertex_program +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_fog_coord +#define need_GL_EXT_gpu_program_parameters +#define need_GL_EXT_secondary_color +#define need_GL_EXT_stencil_two_side +#define need_GL_ATI_separate_stencil +#define need_GL_NV_vertex_program + +#include "extension_helper.h" + + +const struct dri_extension card_extensions[] = { + /* *INDENT-OFF* */ + {"GL_ARB_depth_texture", NULL}, + {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_shadow", NULL}, + {"GL_ARB_shadow_ambient", NULL}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_crossbar", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_packed_depth_stencil", NULL}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_shadow_funcs", NULL}, + {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_EXT_texture_mirror_clamp", NULL}, + {"GL_EXT_texture_rectangle", NULL}, + {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, + {"GL_ATI_texture_env_combine3", NULL}, + {"GL_ATI_texture_mirror_once", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_MESAX_texture_float", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_SGIS_generate_mipmap", NULL}, + {NULL, NULL} + /* *INDENT-ON* */ +}; + + +const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { NULL, NULL } +}; + +/** + * The GL 2.0 functions are needed to make display lists work with + * functions added by GL_ATI_separate_stencil. + */ +const struct dri_extension gl_20_extension[] = { + {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +}; + + +extern struct tnl_pipeline_stage _r300_render_stage; +extern const struct tnl_pipeline_stage _r300_tcl_stage; + +static const struct tnl_pipeline_stage *r300_pipeline[] = { + + /* Try and go straight to t&l + */ + &_r300_tcl_stage, + + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_vertex_program_stage, + + /* Try again to go to tcl? + * - no good for asymmetric-twoside (do with multipass) + * - no good for asymmetric-unfilled (do with multipass) + * - good for material + * - good for texgen + * - need to manipulate a bit of state + * + * - worth it/not worth it? + */ + + /* Else do them here. + */ + &_r300_render_stage, + &_tnl_render_stage, /* FALLBACK */ + 0, +}; + +static void r300RunPipeline(GLcontext * ctx) +{ + _mesa_lock_context_textures(ctx); + + if (ctx->NewState) + _mesa_update_state_locked(ctx); + + _tnl_run_pipeline(ctx); + _mesa_unlock_context_textures(ctx); +} + +static void r300_get_lock(radeonContextPtr rmesa) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + + if (sarea->ctx_owner != rmesa->dri.hwContext) { + sarea->ctx_owner = rmesa->dri.hwContext; + if (!rmesa->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); + } +} + +static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ + /* please flush pipe do all pending work */ + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_SCREENDOOR, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_SCREENDOOR, 1)); + radeon_cs_write_dword(cs, 0x00FFFFFF); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_HYPERZ, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_US_CONFIG, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_ZB_CNTL, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D)); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_RB3D_DSTCACHE_CTLSTAT, 1)); + radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_ZB_ZCACHE_CTLSTAT, 1)); + radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); + radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, + R300_WAIT_3D | R300_WAIT_3D_CLEAN)); +} + +static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + BATCH_LOCALS(radeon); + + r300->vap_flush_needed = GL_TRUE; + + cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); + END_BATCH(); + end_3d(radeon); +} + +static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + if (mode) + r300->radeon.Fallback |= bit; + else + r300->radeon.Fallback &= ~bit; +} + +static void r300_init_vtbl(radeonContextPtr radeon) +{ + radeon->vtbl.get_lock = r300_get_lock; + radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset; + radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header; + radeon->vtbl.swtcl_flush = r300_swtcl_flush; + radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; + radeon->vtbl.fallback = r300_fallback; +} + + +/* Create the device specific rendering context. + */ +GLboolean r300CreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); + struct dd_function_table functions; + r300ContextPtr r300; + GLcontext *ctx; + int tcl_mode; + + assert(glVisual); + assert(driContextPriv); + assert(screen); + + /* Allocate the R300 context */ + r300 = (r300ContextPtr) CALLOC(sizeof(*r300)); + if (!r300) + return GL_FALSE; + + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + hw_tcl_on = future_hw_tcl_on = 0; + + r300_init_vtbl(&r300->radeon); + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r300"); + r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, + "def_max_anisotropy"); + + /* Init default driver functions then plug in our R300-specific functions + * (the texture functions are especially important) + */ + _mesa_init_driver_functions(&functions); + r300InitIoctlFuncs(&functions); + r300InitStateFuncs(&functions); + r300InitTextureFuncs(&functions); + r300InitShaderFuncs(&functions); + + if (!radeonInitContext(&r300->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { + FREE(r300); + return GL_FALSE; + } + + /* Init r300 context data */ + /* Set the maximum texture size small enough that we can guarentee that + * all texture units can bind a maximal texture and have them both in + * texturable memory at once. + */ + + ctx = r300->radeon.glCtx; + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = + MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; + } + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; + ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + + /* Needs further modifications */ +#if 0 + ctx->Const.MaxArrayLockSize = + ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); +#endif + + ctx->Const.MaxDrawBuffers = 1; + + /* Initialize the software rasterizer and helper modules. + */ + _swrast_CreateContext(ctx); + _vbo_CreateContext(ctx); + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + _swsetup_Wakeup(ctx); + _ae_create_context(ctx); + + /* Install the customized pipeline: + */ + _tnl_destroy_pipeline(ctx); + _tnl_install_pipeline(ctx, r300_pipeline); + + /* Try and keep materials and vertices separate: + */ +/* _tnl_isolate_materials(ctx, GL_TRUE); */ + + /* Configure swrast and TNL to match hardware characteristics: + */ + _swrast_allow_pixel_fog(ctx, GL_FALSE); + _swrast_allow_vertex_fog(ctx, GL_TRUE); + _tnl_allow_pixel_fog(ctx, GL_FALSE); + _tnl_allow_vertex_fog(ctx, GL_TRUE); + + /* currently bogus data */ + if (screen->chip_flags & RADEON_CHIPSET_TCL) { + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } + + ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = + PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = + PFS_MAX_TEX_INDIRECT; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r300->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + + if (driQueryOptionb + (&r300->radeon.optionCache, "disable_stencil_two_side")) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (r300->radeon.glCtx->Mesa_DXTn + && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else + if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) + { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } + + r300->disable_lowimpact_fallback = + driQueryOptionb(&r300->radeon.optionCache, + "disable_lowimpact_fallback"); + radeon_fbo_init(&r300->radeon); + radeonInitSpanFuncs( ctx ); + r300InitCmdBuf(r300); + r300InitState(r300); + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + r300InitSwtcl(ctx); + + TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline; + + tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode"); + if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D acceleration\n"); +#if R200_MERGED + FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1); +#endif + } + if (tcl_mode == DRI_CONF_TCL_SW || + !(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + r300->radeon.radeonScreen->chip_flags &= + ~RADEON_CHIPSET_TCL; + fprintf(stderr, "Disabling HW TCL support\n"); + } + TCL_FALLBACK(r300->radeon.glCtx, + RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + } + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h new file mode 100644 index 00000000000..0d713f76351 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -0,0 +1,716 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <[email protected]> + * \author Nicolai Haehnle <[email protected]> + */ + +#ifndef __R600_CONTEXT_H__ +#define __R600_CONTEXT_H__ + +#include "tnl/t_vertex.h" +#include "drm.h" +#include "radeon_drm.h" +#include "dri_util.h" +#include "texmem.h" +#include "radeon_common.h" + +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/colormac.h" + +struct r300_context; +typedef struct r300_context r300ContextRec; +typedef struct r300_context *r300ContextPtr; + + +#include "main/mm.h" + +/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . + I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble + with other compilers ... GLUE! +*/ +#define WARN_ONCE(a, ...) { \ + static int warn##__LINE__=1; \ + if(warn##__LINE__){ \ + fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \ + fprintf(stderr, "File %s function %s line %d\n", \ + __FILE__, __FUNCTION__, __LINE__); \ + fprintf(stderr, a, ## __VA_ARGS__);\ + fprintf(stderr, "***************************************************************************\n"); \ + warn##__LINE__=0;\ + } \ + } + +#include "r600_vertprog.h" +#include "r700_fragprog.h" + + + +/************ DMA BUFFERS **************/ + +/* The blit width for texture uploads + */ +#define R300_BLIT_WIDTH_BYTES 1024 +#define R300_MAX_TEXTURE_UNITS 8 + +struct r300_texture_state { + int tc_count; /* number of incoming texture coordinates from VAP */ +}; + + +#define R300_VPT_CMD_0 0 +#define R300_VPT_XSCALE 1 +#define R300_VPT_XOFFSET 2 +#define R300_VPT_YSCALE 3 +#define R300_VPT_YOFFSET 4 +#define R300_VPT_ZSCALE 5 +#define R300_VPT_ZOFFSET 6 +#define R300_VPT_CMDSIZE 7 + +#define R300_VIR_CMD_0 0 /* vir is variable size (at least 1) */ +#define R300_VIR_CNTL_0 1 +#define R300_VIR_CNTL_1 2 +#define R300_VIR_CNTL_2 3 +#define R300_VIR_CNTL_3 4 +#define R300_VIR_CNTL_4 5 +#define R300_VIR_CNTL_5 6 +#define R300_VIR_CNTL_6 7 +#define R300_VIR_CNTL_7 8 +#define R300_VIR_CMDSIZE 9 + +#define R300_VIC_CMD_0 0 +#define R300_VIC_CNTL_0 1 +#define R300_VIC_CNTL_1 2 +#define R300_VIC_CMDSIZE 3 + +#define R300_VOF_CMD_0 0 +#define R300_VOF_CNTL_0 1 +#define R300_VOF_CNTL_1 2 +#define R300_VOF_CMDSIZE 3 + +#define R300_PVS_CMD_0 0 +#define R300_PVS_CNTL_1 1 +#define R300_PVS_CNTL_2 2 +#define R300_PVS_CNTL_3 3 +#define R300_PVS_CMDSIZE 4 + +#define R300_GB_MISC_CMD_0 0 +#define R300_GB_MISC_MSPOS_0 1 +#define R300_GB_MISC_MSPOS_1 2 +#define R300_GB_MISC_TILE_CONFIG 3 +#define R300_GB_MISC_SELECT 4 +#define R300_GB_MISC_AA_CONFIG 5 +#define R300_GB_MISC_CMDSIZE 6 + +#define R300_TXE_CMD_0 0 +#define R300_TXE_ENABLE 1 +#define R300_TXE_CMDSIZE 2 + +#define R300_PS_CMD_0 0 +#define R300_PS_POINTSIZE 1 +#define R300_PS_CMDSIZE 2 + +#define R300_ZBS_CMD_0 0 +#define R300_ZBS_T_FACTOR 1 +#define R300_ZBS_T_CONSTANT 2 +#define R300_ZBS_W_FACTOR 3 +#define R300_ZBS_W_CONSTANT 4 +#define R300_ZBS_CMDSIZE 5 + +#define R300_CUL_CMD_0 0 +#define R300_CUL_CULL 1 +#define R300_CUL_CMDSIZE 2 + +#define R300_RC_CMD_0 0 +#define R300_RC_CNTL_0 1 +#define R300_RC_CNTL_1 2 +#define R300_RC_CMDSIZE 3 + +#define R300_RI_CMD_0 0 +#define R300_RI_INTERP_0 1 +#define R300_RI_INTERP_1 2 +#define R300_RI_INTERP_2 3 +#define R300_RI_INTERP_3 4 +#define R300_RI_INTERP_4 5 +#define R300_RI_INTERP_5 6 +#define R300_RI_INTERP_6 7 +#define R300_RI_INTERP_7 8 +#define R300_RI_CMDSIZE 9 + +#define R500_RI_CMDSIZE 17 + +#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */ +#define R300_RR_INST_0 1 +#define R300_RR_INST_1 2 +#define R300_RR_INST_2 3 +#define R300_RR_INST_3 4 +#define R300_RR_INST_4 5 +#define R300_RR_INST_5 6 +#define R300_RR_INST_6 7 +#define R300_RR_INST_7 8 +#define R300_RR_CMDSIZE 9 + +#define R300_FP_CMD_0 0 +#define R300_FP_CNTL0 1 +#define R300_FP_CNTL1 2 +#define R300_FP_CNTL2 3 +#define R300_FP_CMD_1 4 +#define R300_FP_NODE0 5 +#define R300_FP_NODE1 6 +#define R300_FP_NODE2 7 +#define R300_FP_NODE3 8 +#define R300_FP_CMDSIZE 9 + +#define R500_FP_CMD_0 0 +#define R500_FP_CNTL 1 +#define R500_FP_PIXSIZE 2 +#define R500_FP_CMD_1 3 +#define R500_FP_CODE_ADDR 4 +#define R500_FP_CODE_RANGE 5 +#define R500_FP_CODE_OFFSET 6 +#define R500_FP_CMD_2 7 +#define R500_FP_FC_CNTL 8 +#define R500_FP_CMDSIZE 9 + +#define R300_FPT_CMD_0 0 +#define R300_FPT_INSTR_0 1 +#define R300_FPT_CMDSIZE 65 + +#define R300_FPI_CMD_0 0 +#define R300_FPI_INSTR_0 1 +#define R300_FPI_CMDSIZE 65 +/* R500 has space for 512 instructions - 6 dwords per instruction */ +#define R500_FPI_CMDSIZE (512*6+1) + +#define R300_FPP_CMD_0 0 +#define R300_FPP_PARAM_0 1 +#define R300_FPP_CMDSIZE (32*4+1) +/* R500 has spcae for 256 constants - 4 dwords per constant */ +#define R500_FPP_CMDSIZE (256*4+1) + +#define R300_FOGS_CMD_0 0 +#define R300_FOGS_STATE 1 +#define R300_FOGS_CMDSIZE 2 + +#define R300_FOGC_CMD_0 0 +#define R300_FOGC_R 1 +#define R300_FOGC_G 2 +#define R300_FOGC_B 3 +#define R300_FOGC_CMDSIZE 4 + +#define R300_FOGP_CMD_0 0 +#define R300_FOGP_SCALE 1 +#define R300_FOGP_START 2 +#define R300_FOGP_CMDSIZE 3 + +#define R300_AT_CMD_0 0 +#define R300_AT_ALPHA_TEST 1 +#define R300_AT_UNKNOWN 2 +#define R300_AT_CMDSIZE 3 + +#define R300_BLD_CMD_0 0 +#define R300_BLD_CBLEND 1 +#define R300_BLD_ABLEND 2 +#define R300_BLD_CMDSIZE 3 + +#define R300_CMK_CMD_0 0 +#define R300_CMK_COLORMASK 1 +#define R300_CMK_CMDSIZE 2 + +#define R300_CB_CMD_0 0 +#define R300_CB_OFFSET 1 +#define R300_CB_CMD_1 2 +#define R300_CB_PITCH 3 +#define R300_CB_CMDSIZE 4 + +#define R300_ZS_CMD_0 0 +#define R300_ZS_CNTL_0 1 +#define R300_ZS_CNTL_1 2 +#define R300_ZS_CNTL_2 3 +#define R300_ZS_CMDSIZE 4 + +#define R300_ZB_CMD_0 0 +#define R300_ZB_OFFSET 1 +#define R300_ZB_PITCH 2 +#define R300_ZB_CMDSIZE 3 + +#define R300_VAP_CNTL_FLUSH 0 +#define R300_VAP_CNTL_FLUSH_1 1 +#define R300_VAP_CNTL_CMD 2 +#define R300_VAP_CNTL_INSTR 3 +#define R300_VAP_CNTL_SIZE 4 + +#define R300_VPI_CMD_0 0 +#define R300_VPI_INSTR_0 1 +#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */ + +#define R300_VPP_CMD_0 0 +#define R300_VPP_PARAM_0 1 +#define R300_VPP_CMDSIZE 1025 /* 256 4-component parameters */ + +#define R300_VPUCP_CMD_0 0 +#define R300_VPUCP_X 1 +#define R300_VPUCP_Y 2 +#define R300_VPUCP_Z 3 +#define R300_VPUCP_W 4 +#define R300_VPUCP_CMDSIZE 5 /* 256 4-component parameters */ + +#define R300_VPS_CMD_0 0 +#define R300_VPS_ZERO_0 1 +#define R300_VPS_ZERO_1 2 +#define R300_VPS_POINTSIZE 3 +#define R300_VPS_ZERO_3 4 +#define R300_VPS_CMDSIZE 5 + + /* the layout is common for all fields inside tex */ +#define R300_TEX_CMD_0 0 +#define R300_TEX_VALUE_0 1 +/* We don't really use this, instead specify mtu+1 dynamically +#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1) +*/ + +/** + * Cache for hardware register state. + */ +struct r300_hw_state { + struct radeon_state_atom vpt; /* viewport (1D98) */ + struct radeon_state_atom vap_cntl; + struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ + struct radeon_state_atom vof; /* VAP output format register 0x2090 */ + struct radeon_state_atom vte; /* (20B0) */ + struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ + struct radeon_state_atom vap_cntl_status; + struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */ + struct radeon_state_atom vic; /* vap input control (2180) */ + struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ + struct radeon_state_atom vap_clip_cntl; + struct radeon_state_atom vap_clip; + struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ + struct radeon_state_atom pvs; /* pvs_cntl (22D0) */ + struct radeon_state_atom gb_enable; /* (4008) */ + struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ + struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ + struct radeon_state_atom ga_triangle_stipple; /* (4214) */ + struct radeon_state_atom ps; /* pointsize (421C) */ + struct radeon_state_atom ga_point_minmax; /* (4230) */ + struct radeon_state_atom lcntl; /* line control */ + struct radeon_state_atom ga_line_stipple; /* (4260) */ + struct radeon_state_atom shade; + struct radeon_state_atom polygon_mode; + struct radeon_state_atom fogp; /* fog parameters (4294) */ + struct radeon_state_atom ga_soft_reset; /* (429C) */ + struct radeon_state_atom zbias_cntl; + struct radeon_state_atom zbs; /* zbias (42A4) */ + struct radeon_state_atom occlusion_cntl; + struct radeon_state_atom cul; /* cull cntl (42B8) */ + struct radeon_state_atom su_depth_scale; /* (42C0) */ + struct radeon_state_atom rc; /* rs control (4300) */ + struct radeon_state_atom ri; /* rs interpolators (4310) */ + struct radeon_state_atom rr; /* rs route (4330) */ + struct radeon_state_atom sc_hyperz; /* (43A4) */ + struct radeon_state_atom sc_screendoor; /* (43E8) */ + struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */ + struct radeon_state_atom fpt; /* texi - (4620) */ + struct radeon_state_atom us_out_fmt; /* (46A4) */ + struct radeon_state_atom r500fp; /* r500 fp instructions */ + struct radeon_state_atom r500fp_const; /* r500 fp constants */ + struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ + struct radeon_state_atom fogs; /* fog state (4BC0) */ + struct radeon_state_atom fogc; /* fog color (4BC8) */ + struct radeon_state_atom at; /* alpha test (4BD4) */ + struct radeon_state_atom fg_depth_src; /* (4BD8) */ + struct radeon_state_atom fpp; /* 0x4C00 and following */ + struct radeon_state_atom rb3d_cctl; /* (4E00) */ + struct radeon_state_atom bld; /* blending (4E04) */ + struct radeon_state_atom cmk; /* colormask (4E0C) */ + struct radeon_state_atom blend_color; /* constant blend color */ + struct radeon_state_atom rop; /* ropcntl */ + struct radeon_state_atom cb; /* colorbuffer (4E28) */ + struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */ + struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ + struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ + struct radeon_state_atom zs; /* zstencil control (4F00) */ + struct radeon_state_atom zstencil_format; + struct radeon_state_atom zb; /* z buffer (4F20) */ + struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ + struct radeon_state_atom unk4F30; /* (4F30) */ + struct radeon_state_atom zb_hiz_offset; /* (4F44) */ + struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + + struct radeon_state_atom vpi; /* vp instructions */ + struct radeon_state_atom vpp; /* vp parameters */ + struct radeon_state_atom vps; /* vertex point size (?) */ + struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */ + /* 8 texture units */ + /* the state is grouped by function and not by + texture unit. This makes single unit updates + really awkward - we are much better off + updating the whole thing at once */ + struct { + struct radeon_state_atom filter; + struct radeon_state_atom filter_1; + struct radeon_state_atom size; + struct radeon_state_atom format; + struct radeon_state_atom pitch; + struct radeon_state_atom offset; + struct radeon_state_atom chroma_key; + struct radeon_state_atom border_color; + } tex; + struct radeon_state_atom txe; /* tex enable (4104) */ + + radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; +}; + +/** + * State cache + */ + +/* Vertex shader state */ + +/* Perhaps more if we store programs in vmem? */ +/* drm_r300_cmd_header_t->vpu->count is unsigned char */ +#define VSF_MAX_FRAGMENT_LENGTH (255*4) + +/* Can be tested with colormat currently. */ +#define VSF_MAX_FRAGMENT_TEMPS (14) + +#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) +#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) + +struct r300_vertex_shader_fragment { + int length; + union { + GLuint d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + GLuint i[VSF_MAX_FRAGMENT_LENGTH]; + } body; +}; + +struct r300_vertex_shader_state { + struct r300_vertex_shader_fragment program; +}; + +extern int hw_tcl_on; + +#define COLOR_IS_RGBA +#define TAG(x) r300##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) +#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) + +/* Should but doesnt work */ +//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) + +/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. + * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. + */ + +struct r300_vertex_program_key { + GLuint InputsRead; + GLuint OutputsWritten; + GLuint OutputsAdded; +}; + +struct r300_vertex_program { + struct r300_vertex_program *next; + struct r300_vertex_program_key key; + int translated; + + struct r300_vertex_shader_fragment program; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int wpos_idx; + int inputs[VERT_ATTRIB_MAX]; + int outputs[VERT_RESULT_MAX]; + int native; + int ref_count; + int use_ref_count; +}; + +struct r300_vertex_program_cont { + struct gl_vertex_program mesa_program; /* Must be first */ + struct r300_vertex_shader_fragment params; + struct r300_vertex_program *progs; +}; + +#define PFS_MAX_ALU_INST 64 +#define PFS_MAX_TEX_INST 64 +#define PFS_MAX_TEX_INDIRECT 4 +#define PFS_NUM_TEMP_REGS 32 +#define PFS_NUM_CONST_REGS 16 + +struct r300_pfs_compile_state; + + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; + + +struct r300_fragment_program_node { + int tex_offset; /**< first tex instruction */ + int tex_end; /**< last tex instruction, relative to tex_offset */ + int alu_offset; /**< first ALU instruction */ + int alu_end; /**< last ALU instruction, relative to alu_offset */ + int flags; +}; + +/** + * Stores an R300 fragment program in its compiled-to-hardware form. + */ +struct r300_fragment_program_code { + struct { + int length; /**< total # of texture instructions used */ + GLuint inst[PFS_MAX_TEX_INST]; + } tex; + + struct { + int length; /**< total # of ALU instructions used */ + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + } inst[PFS_MAX_ALU_INST]; + } alu; + + struct r300_fragment_program_node node[4]; + int cur_node; + int first_node_has_tex; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; +}; + +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ +struct r300_fragment_program { + struct gl_fragment_program mesa_program; + + GLboolean translated; + GLboolean error; + + struct r300_fragment_program_external_state state; + struct r300_fragment_program_code code; + + GLboolean WritesDepth; + GLuint optimization; +}; + +struct r500_pfs_compile_state; + +struct r500_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; + +struct r500_fragment_program_code { + struct { + GLuint inst0; + GLuint inst1; + GLuint inst2; + GLuint inst3; + GLuint inst4; + GLuint inst5; + } inst[512]; + + int inst_offset; + int inst_end; + + /** + * Remember which program register a given hardware constant + * belongs to. + */ + struct prog_src_register constant[PFS_NUM_CONST_REGS]; + int const_nr; + + int max_temp_idx; +}; + +struct r500_fragment_program { + struct gl_fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + + struct r500_fragment_program_external_state state; + struct r500_fragment_program_code code; + + GLboolean writes_depth; + + GLuint optimization; +}; + +#define R300_MAX_AOS_ARRAYS 16 + +#define REG_COORDS 0 +#define REG_COLOR0 1 +#define REG_TEX0 2 + +struct r300_state { + struct r300_texture_state texture; + int sw_tcl_inputs[VERT_ATTRIB_MAX]; + struct r300_vertex_shader_state vertex_shader; + + + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + They are the same as tnl->render_inputs for fixed pipeline */ + +}; + +#define R300_FALLBACK_NONE 0 +#define R300_FALLBACK_TCL 1 +#define R300_FALLBACK_RAST 2 + +/* r300_swtcl.c + */ +struct r300_swtcl_info { + /* + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; + + struct vertex_attribute{ + GLuint attr; + GLubyte format; + GLubyte dst_loc; + GLuint swizzle; + GLubyte write_mask; + } vert_attrs[VERT_ATTRIB_MAX]; + + GLubyte vertex_attr_count; +}; + + +/** + * \brief R300 context structure. + */ +struct r300_context { + struct radeon_context radeon; /* parent class, must be first */ + + struct r300_hw_state hw; + + struct r300_state state; + struct gl_vertex_program *curr_vp; + struct r300_vertex_program *selected_vp; + + /* Vertex buffers + */ + GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; + GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + + GLboolean disable_lowimpact_fallback; + + struct r300_swtcl_info swtcl; + GLboolean vap_flush_needed; +}; + +#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) + +extern void r300DestroyContext(__DRIcontextPrivate * driContextPriv); +extern GLboolean r300CreateContext(const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + +extern void r300SelectVertexShader(r300ContextPtr r300); +extern void r300InitShaderFuncs(struct dd_function_table *functions); +extern int r300VertexProgUpdateParams(GLcontext * ctx, + struct r300_vertex_program_cont *vp, + float *dst); + +#define RADEON_D_CAPTURE 0 +#define RADEON_D_PLAYBACK 1 +#define RADEON_D_PLAYBACK_RAW 2 +#define RADEON_D_T 3 + +#define r300PackFloat32 radeonPackFloat32 +#define r300PackFloat24 radeonPackFloat24 + +#endif /* __R300_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c new file mode 100644 index 00000000000..4d16ad81060 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -0,0 +1,364 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <[email protected]> + */ + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/colormac.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/image.h" + +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" + +#include "r600_context.h" +#include "r600_state.h" +#include "r600_emit.h" +#include "r600_ioctl.h" + + +#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ + SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ + SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ + SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ + SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE +#error Cannot change these! +#endif + +#define DEBUG_ALL DEBUG_VERTS + +#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ + (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) + +GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr) +{ + GLuint i, dw; + + /* type, inputs, stop bit, size */ + for (i = 0; i < nr; i += 2) { + /* make sure input is valid, would lockup the gpu */ + assert(inputs[tab[i]] != -1); + dw = (R300_SIGNED | DW_SIZE(i)); + if (i + 1 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + assert(inputs[tab[i + 1]] != -1); + dw |= (R300_SIGNED | + DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT; + if (i + 2 == nr) { + dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } + } + dst[i >> 1] = dw; + } + + return (nr + 1) >> 1; +} + +static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) +{ + return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); +} + +GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) +{ + GLuint i, dw; + + for (i = 0; i < nr; i += 2) { + dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; + if (i + 1 < nr) { + dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | + R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; + } + dst[i >> 1] = dw; + } + + return (nr + 1) >> 1; +} + +GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) +{ + /* No idea what this value means. I have seen other values written to + * this register... */ + return 0x5555; +} + +GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint i, vic_1 = 0; + + if (InputsRead & (1 << VERT_ATTRIB_POS)) + vic_1 |= R300_INPUT_CNTL_POS; + + if (InputsRead & (1 << VERT_ATTRIB_NORMAL)) + vic_1 |= R300_INPUT_CNTL_NORMAL; + + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + vic_1 |= R300_INPUT_CNTL_COLOR; + + rmesa->state.texture.tc_count = 0; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { + rmesa->state.texture.tc_count++; + vic_1 |= R300_INPUT_CNTL_TC0 << i; + } + + return vic_1; +} + +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) +{ + GLuint ret = 0; + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL0)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_BFC0) + || OutputsWritten & (1 << VERT_RESULT_BFC1)) + ret |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + + return ret; +} + +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) +{ + GLuint i, ret = 0, first_free_texcoord = 0; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { + ret |= (4 << (3 * i)); + ++first_free_texcoord; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + if (first_free_texcoord > 8) { + fprintf(stderr, "\tout of free texcoords to write fog coord\n"); + _mesa_exit(-1); + } + ret |= 4 << (3 * first_free_texcoord); + } + + return ret; +} + +/* Emit vertex data to GART memory + * Route inputs to the vertex processor + * This function should never return R300_FALLBACK_TCL when using software tcl. + */ +int r300EmitArrays(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + GLuint nr; + GLuint count = vb->Count; + GLuint i; + GLuint InputsRead = 0, OutputsWritten = 0; + int *inputs = NULL; + int vir_inputs[VERT_ATTRIB_MAX]; + GLint tab[VERT_ATTRIB_MAX]; + int swizzle[VERT_ATTRIB_MAX][4]; + struct r300_vertex_program *prog = + (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + + if (hw_tcl_on) { + inputs = prog->inputs; + InputsRead = prog->key.InputsRead; + OutputsWritten = prog->key.OutputsWritten; + } else { + inputs = rmesa->state.sw_tcl_inputs; + + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); + + vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; + + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); + + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + } + + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + } + + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) { + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + } + } + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + inputs[i] = nr++; + } else { + inputs[i] = -1; + } + } + + /* Fixed, apply to vir0 only */ + memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); + inputs = vir_inputs; + if (InputsRead & VERT_ATTRIB_POS) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + } + + assert(InputsRead); + assert(OutputsWritten); + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + tab[nr++] = i; + } + } + + if (nr > R300_MAX_AOS_ARRAYS) { + return R300_FALLBACK_TCL; + } + + for (i = 0; i < nr; i++) { + int ci; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; + swizzle[i][3] = SWIZZLE_ONE; + + for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { + swizzle[i][ci] = ci; + } + rcommon_emit_vector(ctx, &rmesa->radeon.tcl.aos[i], + vb->AttribPtr[tab[i]]->data, + vb->AttribPtr[tab[i]]->size, + vb->AttribPtr[tab[i]]->stride, count); + } + + /* Setup INPUT_ROUTE. */ + if (rmesa->radeon.radeonScreen->kernel_mm) { + R300_STATECHANGE(rmesa, vir[0]); + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[0].cmd[0] |= + (r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16; + R300_STATECHANGE(rmesa, vir[1]); + rmesa->hw.vir[1].cmd[0] |= + (r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr) & 0x3FFF) << 16; + } else { + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + vb->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + } + + /* Setup INPUT_CNTL. */ + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + + /* Setup OUTPUT_VTX_FMT. */ + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = + r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = + r300VAPOutputCntl1(ctx, OutputsWritten); + + rmesa->radeon.tcl.aos_count = nr; + + return R300_FALLBACK_NONE; +} + +void r300EmitCacheFlush(r300ContextPtr rmesa) +{ + BATCH_LOCALS(&rmesa->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + END_BATCH(); + COMMIT_BATCH(); +} diff --git a/src/mesa/drivers/dri/r600/r600_emit.h b/src/mesa/drivers/dri/r600/r600_emit.h new file mode 100644 index 00000000000..14c17c0c6d5 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_emit.h @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2005 Vladimir Dergachev. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Vladimir Dergachev <[email protected]> + * Nicolai Haehnle <[email protected]> + * Aapo Tahkola <[email protected]> + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ + +/* This files defines functions for accessing R300 hardware. + */ +#ifndef __R600_EMIT_H__ +#define __R600_EMIT_H__ + +#include "main/glheader.h" +#include "r600_context.h" +#include "r600_cmdbuf.h" +#include "radeon_reg.h" + +static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) +{ + if (!rscrn->kernel_mm) { + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.packet0.cmd_type = R300_CMD_PACKET0; + cmd.packet0.count = count; + cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; + cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); + + return cmd.u; + } + if (count) { + return CP_PACKET0(reg, count - 1); + } + return CP_PACKET2; +} + +static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.vpu.cmd_type = R300_CMD_VPU; + cmd.vpu.count = count; + cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; + cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + +static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn, + int addr, int count, int type, int clamp) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.r500fp.cmd_type = R300_CMD_R500FP; + cmd.r500fp.count = count; + cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; + cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0; + cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0; + cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF); + + return cmd.u; +} + +static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.packet3.cmd_type = R300_CMD_PACKET3; + cmd.packet3.packet = packet; + + return cmd.u; +} + +static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, + unsigned short count) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + + cmd.delay.cmd_type = R300_CMD_CP_DELAY; + cmd.delay.count = count; + + return cmd.u; +} + +static INLINE uint32_t cmdwait(struct radeon_screen *rscrn, + unsigned char flags) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.wait.cmd_type = R300_CMD_WAIT; + cmd.wait.flags = flags; + + return cmd.u; +} + +static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn) +{ + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.header.cmd_type = R300_CMD_END3D; + + return cmd.u; +} + +/** + * Write the header of a packet3 to the command buffer. + * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. + */ +#define OUT_BATCH_PACKET3(packet, num_extra) do {\ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\ + R300_CMD_PACKET3_RAW)); \ + } else b_l_rmesa->cmdbuf.cs->section_cdw++;\ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } while(0) + +/** + * Must be sent to switch to 2d commands + */ +void static INLINE end_3d(radeonContextPtr radeon) +{ + BATCH_LOCALS(radeon); + + if (!radeon->radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdpacify(radeon->radeonScreen)); + END_BATCH(); + } +} + +void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count)); + END_BATCH(); + } +} + +void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) +{ + BATCH_LOCALS(radeon); + uint32_t wait_until; + + if (!radeon->radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdwait(radeon->radeonScreen, flags)); + END_BATCH(); + } else { + switch(flags) { + case R300_WAIT_2D: + wait_until = (1 << 14); + break; + case R300_WAIT_3D: + wait_until = (1 << 15); + break; + case R300_NEW_WAIT_2D_3D: + wait_until = (1 << 14) | (1 << 15); + break; + case R300_NEW_WAIT_2D_2D_CLEAN: + wait_until = (1 << 14) | (1 << 16) | (1 << 18); + break; + case R300_NEW_WAIT_3D_3D_CLEAN: + wait_until = (1 << 15) | (1 << 17) | (1 << 18); + break; + case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: + wait_until = (1 << 14) | (1 << 16) | (1 << 18); + wait_until |= (1 << 15) | (1 << 17) | (1 << 18); + break; + default: + return; + } + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_BATCH(wait_until); + END_BATCH(); + } +} + +extern int r300EmitArrays(GLcontext * ctx); + +extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); +extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); + +extern void r300EmitCacheFlush(r300ContextPtr rmesa); + +extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr); +extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); +extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); + +#endif diff --git a/src/mesa/drivers/dri/r600/r600_fragprog.c b/src/mesa/drivers/dri/r600/r600_fragprog.c new file mode 100644 index 00000000000..4d2dddc32a0 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_fragprog.c @@ -0,0 +1,699 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Fragment program compiler. Perform transformations on the intermediate + * representation until the program is in a form where we can translate + * it more or less directly into machine-readable form. + * + * \author Ben Skeggs <[email protected]> + * \author Jerome Glisse <[email protected]> + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r600_context.h" +#include "r600_fragprog.h" +#include "r600_fragprog_swizzle.h" +#include "r600_state.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" + + +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. + */ +static GLboolean transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } + + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonFindFreeTemporary(t); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens); + + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MUL; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + } + + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + + +static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +} + + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) + return; + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + + +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + +void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp) +{ + struct r300_fragment_program_external_state state; + + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); + } + + if (!fp->translated) { + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; + compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + } + + insert_WPOS_trailer(&compiler); + + struct radeon_program_transformation transformations[] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform( + r300->radeon.glCtx, + compiler.program, + 3, transformations); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + } + + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + } + + if (!r300FragmentProgramEmit(&compiler)) + fp->error = GL_TRUE; + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); + fp->mesa_program.Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); + + if (!fp->error) + fp->translated = GL_TRUE; + if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300FragmentProgramDump(fp, &fp->code); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + } + + update_params(r300, fp); +} + +/* just some random things... */ +void r300FragmentProgramDump( + struct r300_fragment_program *fp, + struct r300_fragment_program_code *code) +{ + int n, i, j; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + + for (n = 0; n < (code->cur_node + 1); n++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " + "alu_end: %d, tex_end: %d, flags: %08x\n", n, + code->node[n].alu_offset, + code->node[n].tex_offset, + code->node[n].alu_end, code->node[n].tex_end, + code->node[n].flags); + + if (n > 0 || code->first_node_has_tex) { + fprintf(stderr, " TEX:\n"); + for (i = code->node[n].tex_offset; + i <= code->node[n].tex_offset + code->node[n].tex_end; + ++i) { + const char *instr; + + switch ((code->tex. + inst[i] >> R300_TEX_INST_SHIFT) & + 15) { + case R300_TEX_OP_LD: + instr = "TEX"; + break; + case R300_TEX_OP_KIL: + instr = "KIL"; + break; + case R300_TEX_OP_TXP: + instr = "TXP"; + break; + case R300_TEX_OP_TXB: + instr = "TXB"; + break; + default: + instr = "UNKNOWN"; + } + + fprintf(stderr, + " %s t%i, %c%i, texture[%i] (%08x)\n", + instr, + (code->tex. + inst[i] >> R300_DST_ADDR_SHIFT) & 31, + 't', + (code->tex. + inst[i] >> R300_SRC_ADDR_SHIFT) & 31, + (code->tex. + inst[i] & R300_TEX_ID_MASK) >> + R300_TEX_ID_SHIFT, + code->tex.inst[i]); + } + } + + for (i = code->node[n].alu_offset; + i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { + char srcc[3][10], dstc[20]; + char srca[3][10], dsta[20]; + char argc[3][20]; + char arga[3][20]; + char flags[5], tmp[10]; + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].inst1 >> (j * 6); + int rega = code->alu.inst[i].inst3 >> (j * 6); + + sprintf(srcc[j], "%c%i", + (regc & 32) ? 'c' : 't', regc & 31); + sprintf(srca[j], "%c%i", + (rega & 32) ? 'c' : 't', rega & 31); + } + + dstc[0] = 0; + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(dstc, "t%i.%s ", + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, + flags); + } + sprintf(flags, "%s%s%s", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + (code->alu.inst[i]. + inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + if (flags[0] != 0) { + sprintf(tmp, "o%i.%s", + (code->alu.inst[i]. + inst1 >> R300_ALU_DSTC_SHIFT) & 31, + flags); + strcat(dstc, tmp); + } + + dsta[0] = 0; + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { + sprintf(dsta, "t%i.w ", + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); + } + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { + sprintf(tmp, "o%i.w ", + (code->alu.inst[i]. + inst3 >> R300_ALU_DSTA_SHIFT) & 31); + strcat(dsta, tmp); + } + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { + strcat(dsta, "Z"); + } + + fprintf(stderr, + "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" + " w: %3s %3s %3s -> %-20s (%08x)\n", i, + srcc[0], srcc[1], srcc[2], dstc, + code->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].inst3); + + for (j = 0; j < 3; ++j) { + int regc = code->alu.inst[i].inst0 >> (j * 7); + int rega = code->alu.inst[i].inst2 >> (j * 7); + int d; + char buf[20]; + + d = regc & 31; + if (d < 12) { + switch (d % 4) { + case R300_ALU_ARGC_SRC0C_XYZ: + sprintf(buf, "%s.xyz", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_XXX: + sprintf(buf, "%s.xxx", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_YYY: + sprintf(buf, "%s.yyy", + srcc[d / 4]); + break; + case R300_ALU_ARGC_SRC0C_ZZZ: + sprintf(buf, "%s.zzz", + srcc[d / 4]); + break; + } + } else if (d < 15) { + sprintf(buf, "%s.www", srca[d - 12]); + } else if (d == 20) { + sprintf(buf, "0.0"); + } else if (d == 21) { + sprintf(buf, "1.0"); + } else if (d == 22) { + sprintf(buf, "0.5"); + } else if (d >= 23 && d < 32) { + d -= 23; + switch (d / 3) { + case 0: + sprintf(buf, "%s.yzx", + srcc[d % 3]); + break; + case 1: + sprintf(buf, "%s.zxy", + srcc[d % 3]); + break; + case 2: + sprintf(buf, "%s.Wzy", + srcc[d % 3]); + break; + } + } else { + sprintf(buf, "%i", d); + } + + sprintf(argc[j], "%s%s%s%s", + (regc & 32) ? "-" : "", + (regc & 64) ? "|" : "", + buf, (regc & 64) ? "|" : ""); + + d = rega & 31; + if (d < 9) { + sprintf(buf, "%s.%c", srcc[d / 3], + 'x' + (char)(d % 3)); + } else if (d < 12) { + sprintf(buf, "%s.w", srca[d - 9]); + } else if (d == 16) { + sprintf(buf, "0.0"); + } else if (d == 17) { + sprintf(buf, "1.0"); + } else if (d == 18) { + sprintf(buf, "0.5"); + } else { + sprintf(buf, "%i", d); + } + + sprintf(arga[j], "%s%s%s%s", + (rega & 32) ? "-" : "", + (rega & 64) ? "|" : "", + buf, (rega & 64) ? "|" : ""); + } + + fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" + " w: %8s %8s %8s op: %08x\n", + argc[0], argc[1], argc[2], + code->alu.inst[i].inst0, arga[0], arga[1], + arga[2], code->alu.inst[i].inst2); + } + } +} diff --git a/src/mesa/drivers/dri/r600/r600_fragprog.h b/src/mesa/drivers/dri/r600/r600_fragprog.h new file mode 100644 index 00000000000..38139850a31 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_fragprog.h @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R600_FRAGPROG_H_ +#define __R600_FRAGPROG_H_ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r600_context.h" +#include "radeon_program.h" + +#define DRI_CONF_FP_OPTIMIZATION_SPEED 0 +#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 + +#if 1 + +/** + * Fragment program helper macros + */ + +/* Produce unshifted source selectors */ +#define FP_TMP(idx) (idx) +#define FP_CONST(idx) ((idx) | (1 << 5)) + +/* Produce source/dest selector dword */ +#define FP_SELC_MASK_NO 0 +#define FP_SELC_MASK_X 1 +#define FP_SELC_MASK_Y 2 +#define FP_SELC_MASK_XY 3 +#define FP_SELC_MASK_Z 4 +#define FP_SELC_MASK_XZ 5 +#define FP_SELC_MASK_YZ 6 +#define FP_SELC_MASK_XYZ 7 + +#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTC_SHIFT) | \ + (FP_SELC_MASK_##regmask << 23) | \ + (FP_SELC_MASK_##outmask << 26) | \ + ((src0) << R300_ALU_SRC0C_SHIFT) | \ + ((src1) << R300_ALU_SRC1C_SHIFT) | \ + ((src2) << R300_ALU_SRC2C_SHIFT)) + +#define FP_SELA_MASK_NO 0 +#define FP_SELA_MASK_W 1 + +#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \ + (((destidx) << R300_ALU_DSTA_SHIFT) | \ + (FP_SELA_MASK_##regmask << 23) | \ + (FP_SELA_MASK_##outmask << 24) | \ + ((src0) << R300_ALU_SRC0A_SHIFT) | \ + ((src1) << R300_ALU_SRC1A_SHIFT) | \ + ((src2) << R300_ALU_SRC2A_SHIFT)) + +/* Produce unshifted argument selectors */ +#define FP_ARGC(source) R300_ALU_ARGC_##source +#define FP_ARGA(source) R300_ALU_ARGA_##source +#define FP_ABS(arg) ((arg) | (1 << 6)) +#define FP_NEG(arg) ((arg) ^ (1 << 5)) + +/* Produce instruction dword */ +#define FP_INSTRC(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTC_##opcode | \ + ((arg0) << R300_ALU_ARG0C_SHIFT) | \ + ((arg1) << R300_ALU_ARG1C_SHIFT) | \ + ((arg2) << R300_ALU_ARG2C_SHIFT)) + +#define FP_INSTRA(opcode,arg0,arg1,arg2) \ + (R300_ALU_OUTA_##opcode | \ + ((arg0) << R300_ALU_ARG0A_SHIFT) | \ + ((arg1) << R300_ALU_ARG1A_SHIFT) | \ + ((arg2) << R300_ALU_ARG2A_SHIFT)) + +#endif + +struct r300_fragment_program; + +extern void r300TranslateFragmentShader(r300ContextPtr r300, + struct r300_fragment_program *fp); + + +/** + * Used internally by the r300 fragment program code to store compile-time + * only data. + */ +struct r300_fragment_program_compiler { + r300ContextPtr r300; + struct r300_fragment_program *fp; + struct r300_fragment_program_code *code; + struct gl_program *program; +}; + +extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); + + +extern void r300FragmentProgramDump( + struct r300_fragment_program *fp, + struct r300_fragment_program_code *code); + +#endif diff --git a/src/mesa/drivers/dri/r600/r600_fragprog_emit.c b/src/mesa/drivers/dri/r600/r600_fragprog_emit.c new file mode 100644 index 00000000000..f03559b6cb3 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_fragprog_emit.c @@ -0,0 +1,344 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + * + * \todo FogOption + */ + +#include "r600_fragprog.h" + +#include "radeon_program_pair.h" +#include "r600_fragprog_swizzle.h" +#include "r600_reg.h" + + +#define PROG_CODE \ + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r300_fragment_program_code *code = c->code + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == index) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = index; + } + + return GL_TRUE; +} + + +/** + * Mark a temporary register as used. + */ +static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + + +static GLuint translate_rgb_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTC_CMP; + case OPCODE_DP3: return R300_ALU_OUTC_DP3; + case OPCODE_DP4: return R300_ALU_OUTC_DP4; + case OPCODE_FRC: return R300_ALU_OUTC_FRC; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTC_MAD; + case OPCODE_MAX: return R300_ALU_OUTC_MAX; + case OPCODE_MIN: return R300_ALU_OUTC_MIN; + case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + } +} + +static GLuint translate_alpha_opcode(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R300_ALU_OUTA_CMP; + case OPCODE_DP3: return R300_ALU_OUTA_DP4; + case OPCODE_DP4: return R300_ALU_OUTA_DP4; + case OPCODE_EX2: return R300_ALU_OUTA_EX2; + case OPCODE_FRC: return R300_ALU_OUTA_FRC; + case OPCODE_LG2: return R300_ALU_OUTA_LG2; + default: + error("translate_rgb_opcode(%i): Unknown opcode", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R300_ALU_OUTA_MAD; + case OPCODE_MAX: return R300_ALU_OUTA_MAX; + case OPCODE_MIN: return R300_ALU_OUTA_MIN; + case OPCODE_RCP: return R300_ALU_OUTA_RCP; + case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + } +} + +/** + * Emit one paired ALU instruction. + */ +static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +{ + PROG_CODE; + + if (code->alu.length >= PFS_MAX_ALU_INST) { + error("Too many ALU instructions"); + return GL_FALSE; + } + + int ip = code->alu.length++; + int j; + code->node[code->cur_node].alu_end++; + + code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode); + code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode); + + for(j = 0; j < 3; ++j) { + GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); + if (!inst->RGB.Src[j].Constant) + use_temporary(code, inst->RGB.Src[j].Index); + code->alu.inst[ip].inst1 |= src << (6*j); + + src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); + if (!inst->Alpha.Src[j].Constant) + use_temporary(code, inst->Alpha.Src[j].Index); + code->alu.inst[ip].inst3 |= src << (6*j); + + GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + arg |= inst->RGB.Arg[j].Abs << 6; + arg |= inst->RGB.Arg[j].Negate << 5; + code->alu.inst[ip].inst0 |= arg << (7*j); + + arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); + arg |= inst->Alpha.Arg[j].Abs << 6; + arg |= inst->Alpha.Arg[j].Negate << 5; + code->alu.inst[ip].inst2 |= arg << (7*j); + } + + if (inst->RGB.Saturate) + code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + if (inst->Alpha.Saturate) + code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + + if (inst->RGB.WriteMask) { + use_temporary(code, inst->RGB.DestIndex); + code->alu.inst[ip].inst1 |= + (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); + } + if (inst->RGB.OutputWriteMask) { + code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + + if (inst->Alpha.WriteMask) { + use_temporary(code, inst->Alpha.DestIndex); + code->alu.inst[ip].inst3 |= + (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + } + if (inst->Alpha.OutputWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; + code->node[code->cur_node].flags |= R300_RGBA_OUT; + } + if (inst->Alpha.DepthWriteMask) { + code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; + code->node[code->cur_node].flags |= R300_W_OUT; + c->fp->WritesDepth = GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Finish the current node without advancing to the next one. + */ +static GLboolean finish_node(struct r300_fragment_program_compiler *c) +{ + struct r300_fragment_program_code *code = c->code; + struct r300_fragment_program_node *node = &code->node[code->cur_node]; + + if (node->alu_end < 0) { + /* Generate a single NOP for this node */ + struct radeon_pair_instruction inst; + _mesa_bzero(&inst, sizeof(inst)); + if (!emit_alu(c, &inst)) + return GL_FALSE; + } + + if (node->tex_end < 0) { + if (code->cur_node == 0) { + node->tex_end = 0; + } else { + error("Node %i has no TEX instructions", code->cur_node); + return GL_FALSE; + } + } else { + if (code->cur_node == 0) + code->first_node_has_tex = 1; + } + + return GL_TRUE; +} + + +/** + * Begin a block of texture instructions. + * Create the necessary indirection. + */ +static GLboolean begin_tex(void* data) +{ + PROG_CODE; + + if (code->cur_node == 0) { + if (code->node[0].alu_end < 0 && + code->node[0].tex_end < 0) + return GL_TRUE; + } + + if (code->cur_node == 3) { + error("Too many texture indirections"); + return GL_FALSE; + } + + if (!finish_node(c)) + return GL_FALSE; + + struct r300_fragment_program_node *node = &code->node[++code->cur_node]; + node->alu_offset = code->alu.length; + node->alu_end = -1; + node->tex_offset = code->tex.length; + node->tex_end = -1; + return GL_TRUE; +} + + +static GLboolean emit_tex(void* data, struct prog_instruction* inst) +{ + PROG_CODE; + + if (code->tex.length >= PFS_MAX_TEX_INST) { + error("Too many TEX instructions"); + return GL_FALSE; + } + + GLuint unit = inst->TexSrcUnit; + GLuint dest = inst->DstReg.Index; + GLuint opcode; + + switch(inst->Opcode) { + case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + default: + error("Unknown texture opcode %i", inst->Opcode); + return GL_FALSE; + } + + if (inst->Opcode == OPCODE_KIL) { + unit = 0; + dest = 0; + } else { + use_temporary(code, dest); + } + + use_temporary(code, inst->SrcReg[0].Index); + + code->node[code->cur_node].tex_end++; + code->tex.inst[code->tex.length++] = + (inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | + (dest << R300_DST_ADDR_SHIFT) | + (unit << R300_TEX_ID_SHIFT) | + (opcode << R300_TEX_INST_SHIFT); + return GL_TRUE; +} + + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = &emit_const, + .EmitPaired = &emit_alu, + .EmitTex = &emit_tex, + .BeginTexBlock = &begin_tex, + .MaxHwTemps = PFS_NUM_TEMP_REGS +}; + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) +{ + struct r300_fragment_program_code *code = compiler->code; + + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + + if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if (!finish_node(compiler)) + return GL_FALSE; + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r600/r600_fragprog_swizzle.c b/src/mesa/drivers/dri/r600/r600_fragprog_swizzle.c new file mode 100644 index 00000000000..16c5fc8ec7a --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_fragprog_swizzle.c @@ -0,0 +1,227 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * Utilities to deal with the somewhat odd restriction on R300 fragment + * program swizzles. + */ + +#include "r600_fragprog_swizzle.h" + +#include "r600_reg.h" +#include "radeon_nqssadce.h" + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) + +struct swizzle_data { + GLuint hash; /**< swizzle value this matches */ + GLuint base; /**< base value for hw swizzle */ + GLuint stride; /**< difference in base between arg0/1/2 */ +}; + +static const struct swizzle_data native_swizzles[] = { + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} +}; + +static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); + + +/** + * Find a native RGB swizzle that matches the given swizzle. + * Returns 0 if none found. + */ +static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +{ + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data* sd = &native_swizzles[i]; + for(comp = 0; comp < 3; ++comp) { + GLuint swz = GET_SWZ(swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz != GET_SWZ(sd->hash, comp)) + break; + } + if (comp == 3) + return sd; + } + + return 0; +} + + +/** + * Check whether the given instruction supports the swizzle and negate + * combinations in the given source register. + */ +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +{ + if (reg.Abs) + reg.NegateBase = 0; + + if (opcode == OPCODE_KIL || + opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP) { + int j; + + if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs)) + return GL_FALSE; + + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(reg.Swizzle, j); + if (swz == SWIZZLE_NIL) + continue; + if (swz != j) + return GL_FALSE; + } + + return GL_TRUE; + } + + GLuint relevant = 0; + int j; + + for(j = 0; j < 3; ++j) + if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + relevant |= 1 << j; + + if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant) + return GL_FALSE; + + if (!lookup_native_swizzle(reg.Swizzle)) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Generate MOV dst, src using only native swizzles. + */ +void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +{ + if (src.Abs) + src.NegateBase = 0; + + while(dst.WriteMask) { + const struct swizzle_data *best_swizzle = 0; + GLuint best_matchcount = 0; + GLuint best_matchmask = 0; + GLboolean rgbnegate; + int i, comp; + + for(i = 0; i < num_native_swizzles; ++i) { + const struct swizzle_data *sd = &native_swizzles[i]; + GLuint matchcount = 0; + GLuint matchmask = 0; + for(comp = 0; comp < 3; ++comp) { + if (!GET_BIT(dst.WriteMask, comp)) + continue; + GLuint swz = GET_SWZ(src.Swizzle, comp); + if (swz == SWIZZLE_NIL) + continue; + if (swz == GET_SWZ(sd->hash, comp)) { + matchcount++; + matchmask |= 1 << comp; + } + } + if (matchcount > best_matchcount) { + best_swizzle = sd; + best_matchcount = matchcount; + best_matchmask = matchmask; + if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + break; + } + } + + if ((src.NegateBase & best_matchmask) != 0) { + best_matchmask &= src.NegateBase; + rgbnegate = !src.NegateAbs; + } else { + rgbnegate = src.NegateAbs; + } + + struct prog_instruction *inst; + + _mesa_insert_instructions(s->Program, s->IP, 1); + inst = s->Program->Instructions + s->IP++; + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); + inst->SrcReg[0] = src; + /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + + dst.WriteMask &= ~inst->DstReg.WriteMask; + } +} + + +/** + * Translate an RGB (XYZ) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +{ + const struct swizzle_data* sd = lookup_native_swizzle(swizzle); + + if (!sd) { + _mesa_printf("Not a native swizzle: %08x\n", swizzle); + return 0; + } + + return sd->base + src*sd->stride; +} + + +/** + * Translate an Alpha (W) swizzle into the hardware code for the given + * instruction source. + */ +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +{ + if (swizzle < 3) + return swizzle + 3*src; + + switch(swizzle) { + case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + default: return R300_ALU_ARGA_ONE; + } +} diff --git a/src/mesa/drivers/dri/r600/r600_fragprog_swizzle.h b/src/mesa/drivers/dri/r600/r600_fragprog_swizzle.h new file mode 100644 index 00000000000..4a0f8cce4f1 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_fragprog_swizzle.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R600_FRAGPROG_SWIZZLE_H_ +#define __R600_FRAGPROG_SWIZZLE_H_ + +#include "main/glheader.h" +#include "shader/prog_instruction.h" + +struct nqssadce_state; + +GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + +GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); +GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); + +#endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r600/r600_ioctl.c b/src/mesa/drivers/dri/r600/r600_ioctl.c new file mode 100644 index 00000000000..c75354ae45c --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_ioctl.c @@ -0,0 +1,667 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. +Copyright (C) 2004 Nicolai Haehnle. +All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <[email protected]> + * + * \author Nicolai Haehnle <[email protected]> + */ + +#include <sched.h> +#include <errno.h> + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/context.h" +#include "swrast/swrast.h" + +#include "radeon_common.h" +#include "radeon_lock.h" +#include "r600_context.h" +#include "r600_ioctl.h" +#include "r600_cmdbuf.h" +#include "r600_state.h" +#include "r600_vertprog.h" +#include "radeon_reg.h" +#include "r600_emit.h" +#include "r600_fragprog.h" +#include "r600_context.h" + +#include "vblank.h" + +#define R200_3D_DRAW_IMMD_2 0xC0003500 + +#define CLEARBUFFER_COLOR 0x1 +#define CLEARBUFFER_DEPTH 0x2 +#define CLEARBUFFER_STENCIL 0x4 + +static void r300EmitClearState(GLcontext * ctx); + +static void r300UserClear(GLcontext *ctx, GLuint mask) +{ + radeon_clear_tris(ctx, mask); +} + +static void r300ClearBuffer(r300ContextPtr r300, int flags, + struct radeon_renderbuffer *rrb, + struct radeon_renderbuffer *rrbd) +{ + BATCH_LOCALS(&r300->radeon); + GLcontext *ctx = r300->radeon.glCtx; + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + GLuint cbpitch = 0; + r300ContextPtr rmesa = r300; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", + __FUNCTION__, rrb, dPriv->x, dPriv->y, + dPriv->w, dPriv->h); + + if (rrb) { + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ + cbpitch |= R300_COLOR_TILE_ENABLE; + } + } + + /* TODO in bufmgr */ + cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + end_3d(&rmesa->radeon); + + if (flags & CLEARBUFFER_COLOR) { + assert(rrb != 0); + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); + END_BATCH(); + } +#if 1 + if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { + assert(rrbd != 0); + cbpitch = (rrbd->pitch / rrbd->cpp); + if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ + cbpitch |= R300_DEPTHMACROTILE_ENABLE; + } + if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ + cbpitch |= R300_DEPTHMICROTILE_TILED; + } + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); + END_BATCH(); + } +#endif + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); + if (flags & CLEARBUFFER_COLOR) { + OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); + } else { + OUT_BATCH(0); + } + + + { + uint32_t t1, t2; + + t1 = 0x0; + t2 = 0x0; + + if (flags & CLEARBUFFER_DEPTH) { + t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; + t2 |= + (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); + } + + if (flags & CLEARBUFFER_STENCIL) { + t1 |= R300_STENCIL_ENABLE; + t2 |= + (R300_ZS_ALWAYS << + R300_S_FRONT_FUNC_SHIFT) | + (R300_ZS_REPLACE << + R300_S_FRONT_SFAIL_OP_SHIFT) | + (R300_ZS_REPLACE << + R300_S_FRONT_ZPASS_OP_SHIFT) | + (R300_ZS_REPLACE << + R300_S_FRONT_ZFAIL_OP_SHIFT); + } + + OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); + OUT_BATCH(t1); + OUT_BATCH(t2); + OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT) | + (ctx->Stencil.Clear & R300_STENCILREF_MASK)); + END_BATCH(); + } + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(9); + OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR)); + OUT_BATCH_FLOAT32(dPriv->w / 2.0); + OUT_BATCH_FLOAT32(dPriv->h / 2.0); + OUT_BATCH_FLOAT32(ctx->Depth.Clear); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); + END_BATCH(); + } else { + OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | + (1 << R300_PRIM_NUM_VERTICES_SHIFT)); + OUT_BATCH_FLOAT32(dPriv->w / 2.0); + OUT_BATCH_FLOAT32(dPriv->h / 2.0); + OUT_BATCH_FLOAT32(ctx->Depth.Clear); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); + } + + r300EmitCacheFlush(rmesa); + cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + + R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, cmk); + R300_STATECHANGE(r300, zs); +} + +static void r300EmitClearState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + int i; + int has_tcl = 1; + int is_r500 = 0; + GLuint vap_cntl; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; + + /* State atom dirty tracking is a little subtle here. + * + * On the one hand, we need to make sure base state is emitted + * here if we start with an empty batch buffer, otherwise clear + * works incorrectly with multiple processes. Therefore, the first + * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. + * + * On the other hand, implicit state emission clears the state atom + * dirty bits, so we have to call R300_STATECHANGE later than the + * first BEGIN_BATCH. + * + * The final trickiness is that, because we change state, we need + * to ensure that any stored swtcl primitives are flushed properly + * before we start changing state. See the R300_NEWPRIM in r300Clear + * for this. + */ + BEGIN_BATCH(31); + OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); + if (!has_tcl) + OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); + else + OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); + + OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, + ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE0_SHIFT) | + (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | + ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) + << R300_SWIZZLE1_SHIFT))); + + /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ + OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); + OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); + + /* comes from fglrx startup of clear */ + OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); + OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA); + OUT_BATCH(0x8); + + OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); + + OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); + OUT_BATCH(0); /* no textures */ + + OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); + + OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(dPriv->x); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(dPriv->y); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(0.0); + + OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); + + OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + END_BATCH(); + + R300_STATECHANGE(r300, vir[0]); + R300_STATECHANGE(r300, fogs); + R300_STATECHANGE(r300, vir[1]); + R300_STATECHANGE(r300, vic); + R300_STATECHANGE(r300, vte); + R300_STATECHANGE(r300, vof); + R300_STATECHANGE(r300, txe); + R300_STATECHANGE(r300, vpt); + R300_STATECHANGE(r300, at); + R300_STATECHANGE(r300, bld); + R300_STATECHANGE(r300, ps); + + if (has_tcl) { + R300_STATECHANGE(r300, vap_clip_cntl); + + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); + END_BATCH(); + } + + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, + ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | + ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); + END_BATCH(); + + if (!is_r500) { + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); + for (i = 0; i < 8; ++i) + OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0x0); + + OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); + END_BATCH(); + } else { + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); + for (i = 0; i < 8; ++i) { + OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + } + + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0x0); + + OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); + END_BATCH(); + } + + if (!is_r500) { + R300_STATECHANGE(r300, fp); + R300_STATECHANGE(r300, fpi[0]); + R300_STATECHANGE(r300, fpi[1]); + R300_STATECHANGE(r300, fpi[2]); + R300_STATECHANGE(r300, fpi[3]); + + BEGIN_BATCH(17); + OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(R300_RGBA_OUT); + + OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, + FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, + FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, + FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, + FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + END_BATCH(); + } else { + struct radeon_state_atom r500fp; + uint32_t _cmd[10]; + + R300_STATECHANGE(r300, fp); + R300_STATECHANGE(r300, r500fp); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); + OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_BATCH(0x0); + OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); + OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); + OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); + OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); + END_BATCH(); + + r500fp.check = check_r500fp; + r500fp.cmd = _cmd; + r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0); + r500fp.cmd[1] = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP; + r500fp.cmd[2] = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST; + r500fp.cmd[3] = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST; + r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B; + r500fp.cmd[5] = R500_ALPHA_OP_CMP | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_A; + r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + r500fp.cmd[7] = 0; + emit_r500fp(ctx, &r500fp); + } + + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); + + if (has_tcl) { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT)); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + } + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + + R300_STATECHANGE(r300, vap_cntl); + + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); + END_BATCH(); + + if (has_tcl) { + struct radeon_state_atom vpu; + uint32_t _cmd[10]; + R300_STATECHANGE(r300, pvs); + R300_STATECHANGE(r300, vpi); + + BEGIN_BATCH(4); + OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | + (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (1 << R300_PVS_LAST_INST_SHIFT)); + OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); + OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + END_BATCH(); + + vpu.check = check_vpu; + vpu.cmd = _cmd; + vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2); + + vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, + 0, 0xf, PVS_DST_REG_OUT); + vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + vpu.cmd[4] = 0x0; + + vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, + PVS_DST_REG_OUT); + vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, + + VSF_FLAG_NONE); + vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + vpu.cmd[8] = 0x0; + + r300->vap_flush_needed = GL_TRUE; + emit_vpu(ctx, &vpu); + } +} + +static void r300KernelClear(GLcontext *ctx, GLuint flags) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + struct radeon_framebuffer *rfb = dPriv->driverPrivate; + struct radeon_renderbuffer *rrb; + struct radeon_renderbuffer *rrbd; + int bits = 0; + + /* Make sure it fits there. */ + rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__); + if (flags || bits) + r300EmitClearState(ctx); + rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); + if (rrbd && (flags & BUFFER_BIT_DEPTH)) + bits |= CLEARBUFFER_DEPTH; + + if (rrbd && (flags & BUFFER_BIT_STENCIL)) + bits |= CLEARBUFFER_STENCIL; + + if (flags & BUFFER_BIT_COLOR0) { + rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); + r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL); + bits = 0; + } + + if (flags & BUFFER_BIT_FRONT_LEFT) { + rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); + bits = 0; + } + + if (flags & BUFFER_BIT_BACK_LEFT) { + rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT); + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); + bits = 0; + } + + if (bits) + r300ClearBuffer(r300, bits, NULL, rrbd); + + COMMIT_BATCH(); +} + +/** + * Buffer clear + */ +static void r300Clear(GLcontext * ctx, GLbitfield mask) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); + GLbitfield swrast_mask = 0, tri_mask = 0; + int i; + struct gl_framebuffer *fb = ctx->DrawBuffer; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "r300Clear\n"); + + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + LOCK_HARDWARE(&r300->radeon); + UNLOCK_HARDWARE(&r300->radeon); + if (dPriv->numClipRects == 0) + return; + } + + /* Flush swtcl vertices if necessary, because we will change hardware + * state during clear. See also the state-related comment in + * r300EmitClearState. + */ + R300_NEWPRIM(r300); + + if (colorMask == ~0) + tri_mask |= (mask & BUFFER_BITS_COLOR); + + + /* HW stencil */ + if (mask & BUFFER_BIT_STENCIL) { + tri_mask |= BUFFER_BIT_STENCIL; + } + + /* HW depth */ + if (mask & BUFFER_BIT_DEPTH) { + tri_mask |= BUFFER_BIT_DEPTH; + } + + /* If we're doing a tri pass for depth/stencil, include a likely color + * buffer with it. + */ + + for (i = 0; i < BUFFER_COUNT; i++) { + GLuint bufBit = 1 << i; + if ((tri_mask) & bufBit) { + if (!fb->Attachment[i].Renderbuffer->ClassID) { + tri_mask &= ~bufBit; + swrast_mask |= bufBit; + } + } + } + + /* SW fallback clearing */ + swrast_mask = mask & ~tri_mask; + + if (tri_mask) { + if (r300->radeon.radeonScreen->kernel_mm) + r300UserClear(ctx, tri_mask); + else + r300KernelClear(ctx, tri_mask); + } + if (swrast_mask) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", + __FUNCTION__, swrast_mask); + _swrast_Clear(ctx, swrast_mask); + } +} + + +void r300InitIoctlFuncs(struct dd_function_table *functions) +{ + functions->Clear = r300Clear; + functions->Finish = radeonFinish; + functions->Flush = radeonFlush; +} diff --git a/src/mesa/drivers/dri/r600/r600_ioctl.h b/src/mesa/drivers/dri/r600/r600_ioctl.h new file mode 100644 index 00000000000..63beab92a40 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_ioctl.h @@ -0,0 +1,44 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <[email protected]> + * Nicolai Haehnle <[email protected]> + */ + +#ifndef __R600_IOCTL_H__ +#define __R600_IOCTL_H__ + +#include "r600_context.h" +#include "radeon_drm.h" + +extern void r300InitIoctlFuncs(struct dd_function_table *functions); + +#endif /* __R600_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_reg.h b/src/mesa/drivers/dri/r600/r600_reg.h new file mode 100644 index 00000000000..ed552d09bbc --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_reg.h @@ -0,0 +1,3265 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* *INDENT-OFF* */ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER 0x180 +# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 + + +#define R300_MC_INIT_GFX_LAT_TIMER 0x154 +# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 + +/* + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ + +#define R300_SE_VPORT_XSCALE 0x1D98 +#define R300_SE_VPORT_XOFFSET 0x1D9C +#define R300_SE_VPORT_YSCALE 0x1DA0 +#define R300_SE_VPORT_YOFFSET 0x1DA4 +#define R300_SE_VPORT_ZSCALE 0x1DA8 +#define R300_SE_VPORT_ZOFFSET 0x1DAC + +#define R300_VAP_PORT_IDX0 0x2040 +/* + * Vertex Array Processing (VAP) Control + */ +#define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) + +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ +#define R300_VAP_VF_CNTL 0x2084 +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex + generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 + +#define R500_VAP_INDEX_OFFSET 0x208c + +#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 +# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) + +#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 + +#define R300_SE_VTE_CNTL 0x20b0 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) + +/* BEGIN: Vertex data assembly - lots of uncertainties */ + +/* gap */ + +/* Maximum Vertex Indx Clamp */ +#define R300_VAP_VF_MAX_VTX_INDX 0x2134 +/* Minimum Vertex Indx Clamp */ +#define R300_VAP_VF_MIN_VTX_INDX 0x2138 + +/** Vertex assembler/processor control status */ +#define R300_VAP_CNTL_STATUS 0x2140 +/* No swap at all (default) */ +# define R300_VC_NO_SWAP (0 << 0) +/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */ +# define R300_VC_16BIT_SWAP (1 << 0) +/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */ +# define R300_VC_32BIT_SWAP (2 << 0) +/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */ +# define R300_VC_HALF_DWORD_SWAP (3 << 0) +/* The TCL engine will not be used (as it is logically or even physically removed) */ +# define R300_VAP_TCL_BYPASS (1 << 8) +/* Read only flag if TCL engine is busy. */ +# define R300_VAP_PVS_BUSY (1 << 11) +/* TODO: gap for MAX_MPS */ +/* Read only flag if the vertex store is busy. */ +# define R300_VAP_VS_BUSY (1 << 24) +/* Read only flag if the reciprocal engine is busy. */ +# define R300_VAP_RCP_BUSY (1 << 25) +/* Read only flag if the viewport transform engine is busy. */ +# define R300_VAP_VTE_BUSY (1 << 26) +/* Read only flag if the memory interface unit is busy. */ +# define R300_VAP_MUI_BUSY (1 << 27) +/* Read only flag if the vertex cache is busy. */ +# define R300_VAP_VC_BUSY (1 << 28) +/* Read only flag if the vertex fetcher is busy. */ +# define R300_VAP_VF_BUSY (1 << 29) +/* Read only flag if the register pipeline is busy. */ +# define R300_VAP_REGPIPE_BUSY (1 << 30) +/* Read only flag if the VAP engine is busy. */ +# define R300_VAP_VAP_BUSY (1 << 31) + +/* gap */ + +/* Where do we get our vertex data? + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ + +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C +/* gap */ + +/* Notes: + * - always set up to produce at least two attributes: + * if vertex program uses only position, fglrx will set normal, too + * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 +# define R300_INPUT_CNTL_POS 0x00000001 +# define R300_INPUT_CNTL_NORMAL 0x00000002 +# define R300_INPUT_CNTL_COLOR 0x00000004 +# define R300_INPUT_CNTL_TC0 0x00000400 +# define R300_INPUT_CNTL_TC1 0x00000800 +# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ +# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ +# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ +# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ +# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ +# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ + +/* Programmable Stream Control Signed Normalize Control */ +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 + +/* gap */ + +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ +# define R300_INPUT_ROUTE_SELECT_X 0 +# define R300_INPUT_ROUTE_SELECT_Y 1 +# define R300_INPUT_ROUTE_SELECT_Z 2 +# define R300_INPUT_ROUTE_SELECT_W 3 +# define R300_INPUT_ROUTE_SELECT_ZERO 4 +# define R300_INPUT_ROUTE_SELECT_ONE 5 + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc + +/* END: Vertex data assembly */ + +/* gap */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 + +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif + +/* gap */ + +#define R300_VAP_PVS_UPLOAD_DATA 0x2208 + +/* END: Upload vertex program and data */ + +/* gap */ + +/* I do not know the purpose of this register. However, I do know that + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + * + * 2007-11-05: This register is the user clip plane control register, but there + * also seems to be a rendering mode control; the NORMAL/CLEAR defines. + * + * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view + */ +#define R300_VAP_CLIP_CNTL 0x221C +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) + +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c + +/* gap */ + +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 + +/* This register is used to define the number of core clocks to wait for a + * vertex to be received by the VAP input controller (while the primitive + * path is backed up) before forcing any accumulated vertices to be submitted + * to the vertex processing path. + */ +#define VAP_PVS_VTX_TIMEOUT_REG 0x2288 +# define R300_2288_R300 0x00750000 /* -- nh */ +# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ + +/* gap */ + +/* Addresses are relative to the vertex program instruction area of the + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + * position calculations. + */ +#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 +# define R300_PVS_FIRST_INST_SHIFT 0 +# define R300_PVS_XYZW_VALID_INST_SHIFT 10 +# define R300_PVS_LAST_INST_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R300_VAP_PVS_CONST_CNTL 0x22D4 +# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 +# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 +#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for + * immediate vertices + */ +#define R300_VAP_VTX_COLOR_R 0x2464 +#define R300_VAP_VTX_COLOR_G 0x2468 +#define R300_VAP_VTX_COLOR_B 0x246C +#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1 0x2494 +#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2 0x24A4 +#define R300_VAP_VTX_POS_0_Z_2 0x24A8 +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT 0x24AC + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + * and are here so we can use one register file instead of several + * - Vladimir + */ +#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 +# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) +# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 + +/* UNK30 seems to enables point to quad transformation on textures + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + * Specifies top of Raster pipe specific enable controls. + */ +#define R300_GB_ENABLE 0x4008 +# define R300_GB_POINT_STUFF_DISABLE (0 << 0) +# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */ +# define R300_GB_LINE_STUFF_DISABLE (0 << 1) +# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */ +# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */ +# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4) +# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */ +# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */ + + /* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */ +#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */ +#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */ +# define R300_GB_TEX0_SOURCE_SHIFT 16 +# define R300_GB_TEX1_SOURCE_SHIFT 18 +# define R300_GB_TEX2_SOURCE_SHIFT 20 +# define R300_GB_TEX3_SOURCE_SHIFT 22 +# define R300_GB_TEX4_SOURCE_SHIFT 24 +# define R300_GB_TEX5_SOURCE_SHIFT 26 +# define R300_GB_TEX6_SOURCE_SHIFT 28 +# define R300_GB_TEX7_SOURCE_SHIFT 30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0 0x4010 + /* shifts - each of the fields is 4 bits */ +# define R300_GB_MSPOS0__MS_X0_SHIFT 0 +# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 +# define R300_GB_MSPOS0__MS_X1_SHIFT 8 +# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 +# define R300_GB_MSPOS0__MS_X2_SHIFT 16 +# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 +# define R300_GB_MSPOS0__MSBD0_Y 24 +# define R300_GB_MSPOS0__MSBD0_X 28 + +#define R300_GB_MSPOS1 0x4014 +# define R300_GB_MSPOS1__MS_X3_SHIFT 0 +# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 +# define R300_GB_MSPOS1__MS_X4_SHIFT 8 +# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 +# define R300_GB_MSPOS1__MS_X5_SHIFT 16 +# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 +# define R300_GB_MSPOS1__MSBD1 24 + +/* Specifies the graphics pipeline configuration for rasterization. */ +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_DISABLE (0 << 0) +# define R300_GB_TILE_ENABLE (1 << 0) +# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */ +# define R300_GB_TILE_SIZE_8 (0 << 4) +# define R300_GB_TILE_SIZE_16 (1 << 4) +# define R300_GB_TILE_SIZE_32 (2 << 4) +# define R300_GB_SUPER_SIZE_1 (0 << 6) +# define R300_GB_SUPER_SIZE_2 (1 << 6) +# define R300_GB_SUPER_SIZE_4 (2 << 6) +# define R300_GB_SUPER_SIZE_8 (3 << 6) +# define R300_GB_SUPER_SIZE_16 (4 << 6) +# define R300_GB_SUPER_SIZE_32 (5 << 6) +# define R300_GB_SUPER_SIZE_64 (6 << 6) +# define R300_GB_SUPER_SIZE_128 (7 << 6) +# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ +# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ +# define R300_GB_SUPER_TILE_A (0 << 15) +# define R300_GB_SUPER_TILE_B (1 << 15) +# define R300_GB_SUBPIXEL_1_12 (0 << 16) +# define R300_GB_SUBPIXEL_1_16 (1 << 16) +# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) + +/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ +#define R300_GB_FIFO_SIZE 0x4024 + /* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32 0 +#define R300_GB_FIFO_SIZE_64 1 +#define R300_GB_FIFO_SIZE_128 2 +#define R300_GB_FIFO_SIZE_256 3 +# define R300_SC_IFIFO_SIZE_SHIFT 0 +# define R300_SC_TZFIFO_SIZE_SHIFT 2 +# define R300_SC_BFIFO_SIZE_SHIFT 4 + +# define R300_US_OFIFO_SIZE_SHIFT 12 +# define R300_US_WFIFO_SIZE_SHIFT 14 + /* the following use the same constants as above, but meaning is + is times 2 (i.e. instead of 32 words it means 64 */ +# define R300_RS_TFIFO_SIZE_SHIFT 6 +# define R300_RS_CFIFO_SIZE_SHIFT 8 +# define R300_US_RAM_SIZE_SHIFT 10 + /* watermarks, 3 bits wide */ +# define R300_RS_HIGHWATER_COL_SHIFT 16 +# define R300_RS_HIGHWATER_TEX_SHIFT 19 +# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ +# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 + +#define GB_Z_PEQ_CONFIG 0x4028 +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) + +/* Specifies various polygon specific selects (fog, depth, perspective). */ +#define R300_GB_SELECT 0x401c +# define R300_GB_FOG_SELECT_C0A (0 << 0) +# define R300_GB_FOG_SELECT_C1A (1 << 0) +# define R300_GB_FOG_SELECT_C2A (2 << 0) +# define R300_GB_FOG_SELECT_C3A (3 << 0) +# define R300_GB_FOG_SELECT_1_1_W (4 << 0) +# define R300_GB_FOG_SELECT_Z (5 << 0) +# define R300_GB_DEPTH_SELECT_Z (0 << 3) +# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) +# define R300_GB_W_SELECT_1_W (0 << 4) +# define R300_GB_W_SELECT_1 (1 << 4) +# define R300_GB_FOG_STUFF_DISABLE (0 << 5) +# define R300_GB_FOG_STUFF_ENABLE (1 << 5) +# define R300_GB_FOG_STUFF_TEX_SHIFT 6 +# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0 +# define R300_GB_FOG_STUFF_COMP_SHIFT 10 +# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00 + +/* Specifies the graphics pipeline configuration for antialiasing. */ +#define GB_AA_CONFIG 0x4020 +# define GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) + +/* Selects which of 4 pipes are active. */ +#define GB_PIPE_SELECT 0x402c +# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define GB_PIPE_SELECT_MAX_PIPE 12 +# define GB_PIPE_SELECT_BAD_PIPES 14 +# define GB_PIPE_SELECT_CONFIG_PIPES 18 + + +/* Specifies the sizes of the various FIFO`s in the sc/rs. */ +#define GB_FIFO_SIZE1 0x4070 +/* High water mark for SC input fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +/* High water mark for SC input fifo (B) */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +/* High water mark for RS colors' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +/* High water mark for RS textures' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 + +/* This table specifies the source location and format for up to 16 texture + * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) + */ +#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_1 0x4078 +#define R500_RS_IP_2 0x407C +#define R500_RS_IP_3 0x4080 +#define R500_RS_IP_4 0x4084 +#define R500_RS_IP_5 0x4088 +#define R500_RS_IP_6 0x408C +#define R500_RS_IP_7 0x4090 +#define R500_RS_IP_8 0x4094 +#define R500_RS_IP_9 0x4098 +#define R500_RS_IP_10 0x409C +#define R500_RS_IP_11 0x40A0 +#define R500_RS_IP_12 0x40A4 +#define R500_RS_IP_13 0x40A8 +#define R500_RS_IP_14 0x40AC +#define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 +# define R500_RS_COL_PTR(x) ((x) << 24) +# define R500_RS_COL_FMT(x) ((x) << 27) +/* gap */ +#define R500_RS_IP_OFFSET_DIS (0 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) + +/* gap */ + +/* Zero to flush caches. */ +#define R300_TX_INVALTAGS 0x4100 +#define R300_TX_FLUSH 0x0 + +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE 0x4104 +# define R300_TX_ENABLE_0 (1 << 0) +# define R300_TX_ENABLE_1 (1 << 1) +# define R300_TX_ENABLE_2 (1 << 2) +# define R300_TX_ENABLE_3 (1 << 3) +# define R300_TX_ENABLE_4 (1 << 4) +# define R300_TX_ENABLE_5 (1 << 5) +# define R300_TX_ENABLE_6 (1 << 6) +# define R300_TX_ENABLE_7 (1 << 7) +# define R300_TX_ENABLE_8 (1 << 8) +# define R300_TX_ENABLE_9 (1 << 9) +# define R300_TX_ENABLE_10 (1 << 10) +# define R300_TX_ENABLE_11 (1 << 11) +# define R300_TX_ENABLE_12 (1 << 12) +# define R300_TX_ENABLE_13 (1 << 13) +# define R300_TX_ENABLE_14 (1 << 14) +# define R300_TX_ENABLE_15 (1 << 15) + +#define R500_TX_FILTER_4 0x4110 +# define R500_TX_WEIGHT_1_SHIFT (0) +# define R500_TX_WEIGHT_0_SHIFT (11) +# define R500_TX_WEIGHT_PAIR (1<<22) +# define R500_TX_PHASE_SHIFT (23) +# define R500_TX_DIRECTION_HORIZONTAL (0<<27) +# define R500_TX_DIRECTION_VERITCAL (1<<27) + +/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_S0 0x4200 + +/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_T0 0x4204 + +/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_S1 0x4208 + +/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_T1 0x420c + +/* Specifies amount to shift integer position of vertex (screen space) before + * converting to float for triangle stipple. + */ +#define R300_GA_TRIANGLE_STIPPLE 0x4214 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 + +/* The pointsize is given in multiples of 6. The pointsize can be enormous: + * Clear() renders a single point that fills the entire framebuffer. + * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in + * 8b precision). + */ +#define R300_GA_POINT_SIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK 0x0000ffff +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK 0xffff0000 +# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) + +/* Blue fill color */ +#define R500_GA_FILL_R 0x4220 + +/* Blue fill color */ +#define R500_GA_FILL_G 0x4224 + +/* Blue fill color */ +#define R500_GA_FILL_B 0x4228 + +/* Alpha fill color */ +#define R500_GA_FILL_A 0x422c + + +/* Specifies maximum and minimum point & sprite sizes for per vertex size + * specification. The lower part (15:0) is MIN and (31:16) is max. + */ +#define R300_GA_POINT_MINMAX 0x4230 +# define R300_GA_POINT_MINMAX_MIN_SHIFT 0 +# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0) +# define R300_GA_POINT_MINMAX_MAX_SHIFT 16 +# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16) + +/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b + * subprecision); (16.0) fixed format. + * + * The line width is given in multiples of 6. + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ +#define R300_GA_LINE_CNTL 0x4234 +# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 +# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff +# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ +# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ +# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) +# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) +/** TODO: looks wrong */ +# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) +/** TODO: looks wrong */ +# define R300_LINE_CNT_HO (1 << 16) +/** TODO: looks wrong */ +# define R300_LINE_CNT_VE (1 << 17) + +/* Line Stipple configuration information. */ +#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc + +/* Used to load US instructions and constants */ +#define R500_GA_US_VECTOR_INDEX 0x4250 +# define R500_GA_US_VECTOR_INDEX_SHIFT 0 +# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff +# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) +# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) +# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) +# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) + +/* Data register for loading US instructions and constants */ +#define R500_GA_US_VECTOR_DATA 0x4254 + +/* Specifies color properties and mappings of textures. */ +#define R500_GA_COLOR_CONTROL_PS3 0x4258 +# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) +# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) +# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) +# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) +# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) +# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) +# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) +# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) +# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) +# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) +# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) +# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) +# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) +# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) +# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) +# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) +# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) +# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) +# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) +# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) +# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) +# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) +# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) +# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) +# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) +# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) +# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) +# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) +# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) +# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) +# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) +# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) +# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) +# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) + +/* Returns idle status of various G3D block, captured when GA_IDLE written or + * when hard or soft reset asserted. + */ +#define R500_GA_IDLE 0x425c +# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) +# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) +# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) +# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) +# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +# define R500_GA_IDLE_SU_IDLE (0 << 24) +# define R500_GA_IDLE_GA_IDLE (0 << 25) +# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) + +/* Current value of stipple accumulator. */ +#define R300_GA_LINE_STIPPLE_VALUE 0x4260 + +/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S0 0x4264 +/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S1 0x4268 + +/* GA Input fifo high water marks */ +#define R500_GA_FIFO_CNTL 0x4270 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 +# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE 0x4274 +# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) +# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) +# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ +# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ +# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) +# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ + +#define R300_GA_COLOR_CONTROL 0x4278 +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16) + +/** TODO: might be candidate for removal */ +# define R300_RE_SHADE_MODEL_SMOOTH ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) +/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ +# define R300_RE_SHADE_MODEL_FLAT ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + +/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_RG 0x427c +# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0 +# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff +# define GA_SOLID_RG_COLOR_RED_SHIFT 16 +# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000 +/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_BA 0x4280 +# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0 +# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff +# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16 +# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000 + +/* Polygon Mode + * Dangerous + */ +#define R300_GA_POLY_MODE 0x4288 +# define R300_GA_POLY_MODE_DISABLE (0 << 0) +# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +/* reserved */ +# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) +/* reserved */ +# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) +/* reserved */ + +/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ +#define R300_GA_ROUND_MODE 0x428c +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) +# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) +# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) +# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 + +/* Specifies x & y offsets for vertex data after conversion to FP. + * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b + * subprecision). + */ +#define R300_GA_OFFSET 0x4290 +# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff +# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 + +/* Specifies the scale to apply to fog. */ +#define R300_GA_FOG_SCALE 0x4294 +/* Specifies the offset to apply to fog. */ +#define R300_GA_FOG_OFFSET 0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET 0x429c + +/* Not sure why there are duplicate of factor and constant values. + * My best guess so far is that there are seperate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ +#define R300_SU_TEX_WRAP 0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + * One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ +#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 +# define R300_FRONT_ENABLE (1 << 0) +# define R300_BACK_ENABLE (1 << 1) +# define R300_PARA_ENABLE (1 << 2) + +#define R300_SU_CULL_MODE 0x42B8 +# define R300_CULL_FRONT (1 << 0) +# define R300_CULL_BACK (1 << 1) +# define R300_FRONT_FACE_CCW (0 << 2) +# define R300_FRONT_FACE_CW (1 << 2) + +/* SU Depth Scale value */ +#define R300_SU_DEPTH_SCALE 0x42c0 +/* SU Depth Offset value */ +#define R300_SU_DEPTH_OFFSET 0x42c4 + + +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ +#define R300_RS_COUNT 0x4300 +# define R300_IT_COUNT_SHIFT 0 +# define R300_IT_COUNT_MASK 0x0000007f +# define R300_IC_COUNT_SHIFT 7 +# define R300_IC_COUNT_MASK 0x00000780 +# define R300_W_ADDR_SHIFT 12 +# define R300_W_ADDR_MASK 0x0003f000 +# define R300_HIRES_DIS (0 << 18) +# define R300_HIRES_EN (1 << 18) + +#define R300_RS_INST_COUNT 0x4304 +# define R300_RS_INST_COUNT_SHIFT 0 +# define R300_RS_INST_COUNT_MASK 0x0000000f +# define R300_RS_TX_OFFSET_SHIFT 5 +# define R300_RS_TX_OFFSET_MASK 0x000000e0 + +/* gap */ + +/* Only used for texture coordinates. + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ +#define R300_RS_IP_0 0x4310 +#define R300_RS_IP_1 0x4314 +#define R300_RS_IP_2 0x4318 +#define R300_RS_IP_3 0x431C +# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ +# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ +# define R300_RS_TEX_PTR(x) ((x) << 0) +# define R300_RS_COL_PTR(x) ((x) << 6) +# define R300_RS_COL_FMT(x) ((x) << 9) +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 1 +# define R300_RS_COL_FMT_RGB1 2 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 +# define R300_RS_SEL_S(x) ((x) << 13) +# define R300_RS_SEL_T(x) ((x) << 16) +# define R300_RS_SEL_R(x) ((x) << 19) +# define R300_RS_SEL_Q(x) ((x) << 22) +# define R300_RS_SEL_C0 0 +# define R300_RS_SEL_C1 1 +# define R300_RS_SEL_C2 2 +# define R300_RS_SEL_C3 3 +# define R300_RS_SEL_K0 4 +# define R300_RS_SEL_K1 5 + + +/* */ +#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_1 0x4324 +#define R500_RS_INST_2 0x4328 +#define R500_RS_INST_3 0x432c +#define R500_RS_INST_4 0x4330 +#define R500_RS_INST_5 0x4334 +#define R500_RS_INST_6 0x4338 +#define R500_RS_INST_7 0x433c +#define R500_RS_INST_8 0x4340 +#define R500_RS_INST_9 0x4344 +#define R500_RS_INST_10 0x4348 +#define R500_RS_INST_11 0x434c +#define R500_RS_INST_12 0x4350 +#define R500_RS_INST_13 0x4354 +#define R500_RS_INST_14 0x4358 +#define R500_RS_INST_15 0x435c +#define R500_RS_INST_TEX_ID_SHIFT 0 +#define R500_RS_INST_TEX_CN_WRITE (1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT 5 +#define R500_RS_INST_COL_ID_SHIFT 12 +#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) +#define R500_RS_INST_COL_CN_WRITE (1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT 18 +#define R500_RS_INST_TEX_ADJ (1 << 25) +#define R500_RS_INST_W_CN (1 << 26) +#define R500_RS_INST_TEX_ID(x) ((x) << R500_RS_INST_TEX_ID_SHIFT) +#define R500_RS_INST_TEX_ADDR(x) ((x) << R500_RS_INST_TEX_ADDR_SHIFT) +#define R500_RS_INST_COL_ID(x) ((x) << R500_RS_INST_COL_ID_SHIFT) +#define R500_RS_INST_COL_ADDR(x) ((x) << R500_RS_INST_COL_ADDR_SHIFT) + +/* These DWORDs control how vertex data is routed into fragment program + * registers, after interpolators. + */ +#define R300_RS_INST_0 0x4330 +#define R300_RS_INST_1 0x4334 +#define R300_RS_INST_2 0x4338 +#define R300_RS_INST_3 0x433C /* GUESS */ +#define R300_RS_INST_4 0x4340 /* GUESS */ +#define R300_RS_INST_5 0x4344 /* GUESS */ +#define R300_RS_INST_6 0x4348 /* GUESS */ +#define R300_RS_INST_7 0x434C /* GUESS */ +# define R300_RS_INST_TEX_ID(x) ((x) << 0) +# define R300_RS_INST_TEX_CN_WRITE (1 << 3) +# define R300_RS_INST_TEX_ADDR_SHIFT 6 +# define R300_RS_INST_TEX_ADDR(x) ((x) << R300_RS_INST_TEX_ADDR_SHIFT) +# define R300_RS_INST_COL_ID(x) ((x) << 11) +# define R300_RS_INST_COL_CN_WRITE (1 << 14) +# define R300_RS_INST_COL_ADDR_SHIFT 17 +# define R300_RS_INST_COL_ADDR(x) ((x) << R300_RS_INST_COL_ADDR_SHIFT) +# define R300_RS_INST_TEX_ADJ (1 << 22) +# define R300_RS_COL_BIAS_UNUSED_SHIFT 23 + +/* END: Rasterization / Interpolators - many guesses */ + +/* Hierarchical Z Enable */ +#define R300_SC_HYPERZ 0x43a4 +# define R300_SC_HYPERZ_DISABLE (0 << 0) +# define R300_SC_HYPERZ_ENABLE (1 << 0) +# define R300_SC_HYPERZ_MIN (0 << 1) +# define R300_SC_HYPERZ_MAX (1 << 1) +# define R300_SC_HYPERZ_ADJ_256 (0 << 2) +# define R300_SC_HYPERZ_ADJ_128 (1 << 2) +# define R300_SC_HYPERZ_ADJ_64 (2 << 2) +# define R300_SC_HYPERZ_ADJ_32 (3 << 2) +# define R300_SC_HYPERZ_ADJ_16 (4 << 2) +# define R300_SC_HYPERZ_ADJ_8 (5 << 2) +# define R300_SC_HYPERZ_ADJ_4 (6 << 2) +# define R300_SC_HYPERZ_ADJ_2 (7 << 2) +# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) +# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5) +# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) +# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) + +#define R300_SC_EDGERULE 0x43a8 + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ +#define R300_SC_CLIPRECT_TL_0 0x43B0 +#define R300_SC_CLIPRECT_BR_0 0x43B4 +#define R300_SC_CLIPRECT_TL_1 0x43B8 +#define R300_SC_CLIPRECT_BR_1 0x43BC +#define R300_SC_CLIPRECT_TL_2 0x43C0 +#define R300_SC_CLIPRECT_BR_2 0x43C4 +#define R300_SC_CLIPRECT_TL_3 0x43C8 +#define R300_SC_CLIPRECT_BR_3 0x43CC +# define R300_CLIPRECT_OFFSET 1440 +# define R300_CLIPRECT_MASK 0x1FFF +# define R300_CLIPRECT_X_SHIFT 0 +# define R300_CLIPRECT_X_MASK (0x1FFF << 0) +# define R300_CLIPRECT_Y_SHIFT 13 +# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) +#define R300_SC_CLIP_RULE 0x43D0 +# define R300_CLIP_OUT (1 << 0) +# define R300_CLIP_0 (1 << 1) +# define R300_CLIP_1 (1 << 2) +# define R300_CLIP_10 (1 << 3) +# define R300_CLIP_2 (1 << 4) +# define R300_CLIP_20 (1 << 5) +# define R300_CLIP_21 (1 << 6) +# define R300_CLIP_210 (1 << 7) +# define R300_CLIP_3 (1 << 8) +# define R300_CLIP_30 (1 << 9) +# define R300_CLIP_31 (1 << 10) +# define R300_CLIP_310 (1 << 11) +# define R300_CLIP_32 (1 << 12) +# define R300_CLIP_320 (1 << 13) +# define R300_CLIP_321 (1 << 14) +# define R300_CLIP_3210 (1 << 15) + +/* gap */ + +#define R300_SC_SCISSORS_TL 0x43E0 +#define R300_SC_SCISSORS_BR 0x43E4 +# define R300_SCISSORS_OFFSET 1440 +# define R300_SCISSORS_X_SHIFT 0 +# define R300_SCISSORS_X_MASK (0x1FFF << 0) +# define R300_SCISSORS_Y_SHIFT 13 +# define R300_SCISSORS_Y_MASK (0x1FFF << 13) + +/* Screen door sample mask */ +#define R300_SC_SCREENDOOR 0x43e8 + +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ + +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ +#define R300_TX_FILTER0_0 0x4400 +#define R300_TX_FILTER0_1 0x4404 +#define R300_TX_FILTER0_2 0x4408 +#define R300_TX_FILTER0_3 0x440c +#define R300_TX_FILTER0_4 0x4410 +#define R300_TX_FILTER0_5 0x4414 +#define R300_TX_FILTER0_6 0x4418 +#define R300_TX_FILTER0_7 0x441c +#define R300_TX_FILTER0_8 0x4420 +#define R300_TX_FILTER0_9 0x4424 +#define R300_TX_FILTER0_10 0x4428 +#define R300_TX_FILTER0_11 0x442c +#define R300_TX_FILTER0_12 0x4430 +#define R300_TX_FILTER0_13 0x4434 +#define R300_TX_FILTER0_14 0x4438 +#define R300_TX_FILTER0_15 0x443c +# define R300_TX_REPEAT 0 +# define R300_TX_MIRRORED 1 +# define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_MIRROR_ONCE_TO_EDGE 3 +# define R300_TX_CLAMP 4 +# define R300_TX_MIRROR_ONCE 5 +# define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_MIRROR_ONCE_TO_BORDER 7 +# define R300_TX_WRAP_S_SHIFT 0 +# define R300_TX_WRAP_S_MASK (7 << 0) +# define R300_TX_WRAP_T_SHIFT 3 +# define R300_TX_WRAP_T_MASK (7 << 3) +# define R300_TX_WRAP_R_SHIFT 6 +# define R300_TX_WRAP_R_MASK (7 << 6) +# define R300_TX_MAG_FILTER_4 (0 << 9) +# define R300_TX_MAG_FILTER_NEAREST (1 << 9) +# define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_ANISO (3 << 9) +# define R300_TX_MAG_FILTER_MASK (3 << 9) +# define R300_TX_MIN_FILTER_NEAREST (1 << 11) +# define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_ANISO (3 << 11) +# define R300_TX_MIN_FILTER_MASK (3 << 11) +# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) +# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_MASK (7 << 21) + +#define R300_TX_FILTER1_0 0x4440 +# define R300_CHROMA_KEY_MODE_DISABLE 0 +# define R300_CHROMA_KEY_FORCE 1 +# define R300_CHROMA_KEY_BLEND 2 +# define R300_MC_ROUND_NORMAL (0<<2) +# define R300_MC_ROUND_MPEG4 (1<<2) +# define R300_LOD_BIAS_SHIFT 3 +# define R300_LOD_BIAS_MASK 0x1ff8 +# define R300_EDGE_ANISO_EDGE_DIAG (0<<13) +# define R300_EDGE_ANISO_EDGE_ONLY (1<<13) +# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) +# define R300_MC_COORD_TRUNCATE_MPEG (1<<14) +# define R300_TX_TRI_PERF_0_8 (0<<15) +# define R300_TX_TRI_PERF_1_8 (1<<15) +# define R300_TX_TRI_PERF_1_4 (2<<15) +# define R300_TX_TRI_PERF_3_8 (3<<15) +# define R300_ANISO_THRESHOLD_MASK (7<<17) + +# define R500_MACRO_SWITCH (1<<22) +# define R500_BORDER_FIX (1<<31) + +#define R300_TX_SIZE_0 0x4480 +# define R300_TX_WIDTHMASK_SHIFT 0 +# define R300_TX_WIDTHMASK_MASK (2047 << 0) +# define R300_TX_HEIGHTMASK_SHIFT 11 +# define R300_TX_HEIGHTMASK_MASK (2047 << 11) +# define R300_TX_DEPTHMASK_SHIFT 22 +# define R300_TX_DEPTHMASK_MASK (0xf << 22) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) +# define R300_TX_SIZE_PROJECTED (1<<30) +# define R300_TX_SIZE_TXPITCH_EN (1<<31) +#define R300_TX_FORMAT_0 0x44C0 + /* The interpretation of the format word by Wladimir van der Laan */ + /* The X, Y, Z and W refer to the layout of the components. + They are given meanings as R, G, B and Alpha by the swizzle + specification */ +# define R300_TX_FORMAT_X8 0x0 +# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 +# define R300_TX_FORMAT_X16 0x1 +# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 +# define R300_TX_FORMAT_Y4X4 0x2 +# define R300_TX_FORMAT_Y8X8 0x3 +# define R300_TX_FORMAT_Y16X16 0x4 +# define R300_TX_FORMAT_Z3Y3X2 0x5 +# define R300_TX_FORMAT_Z5Y6X5 0x6 +# define R300_TX_FORMAT_Z6Y5X5 0x7 +# define R300_TX_FORMAT_Z11Y11X10 0x8 +# define R300_TX_FORMAT_Z10Y11X11 0x9 +# define R300_TX_FORMAT_W4Z4Y4X4 0xA +# define R300_TX_FORMAT_W1Z5Y5X5 0xB +# define R300_TX_FORMAT_W8Z8Y8X8 0xC +# define R300_TX_FORMAT_W2Z10Y10X10 0xD +# define R300_TX_FORMAT_W16Z16Y16X16 0xE +# define R300_TX_FORMAT_DXT1 0xF +# define R300_TX_FORMAT_DXT3 0x10 +# define R300_TX_FORMAT_DXT5 0x11 +# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ +# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ +# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ +# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + + /* These two values are wrong, but they're the only values that + * produce any even vaguely correct results. Can r300 only do 16-bit + * depth textures? + */ +# define R300_TX_FORMAT_X24_Y8 0x1e +# define R300_TX_FORMAT_X32 0x1e + + /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_3D (1 << 25) +# define R300_TX_FORMAT_CUBIC_MAP (2 << 25) + + /* gap */ + /* Floating point formats */ + /* Note - hardware supports both 16 and 32 bit floating point */ +# define R300_TX_FORMAT_FL_I16 0x18 +# define R300_TX_FORMAT_FL_I16A16 0x19 +# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A +# define R300_TX_FORMAT_FL_I32 0x1B +# define R300_TX_FORMAT_FL_I32A32 0x1C +# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D + /* alpha modes, convenience mostly */ + /* if you have alpha, pick constant appropriate to the + number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# define R300_TX_FORMAT_ALPHA_1CH 0x000 +# define R300_TX_FORMAT_ALPHA_2CH 0x200 +# define R300_TX_FORMAT_ALPHA_4CH 0x600 +# define R300_TX_FORMAT_ALPHA_NONE 0xA00 + /* Swizzling */ + /* constants */ +# define R300_TX_FORMAT_X 0 +# define R300_TX_FORMAT_Y 1 +# define R300_TX_FORMAT_Z 2 +# define R300_TX_FORMAT_W 3 +# define R300_TX_FORMAT_ZERO 4 +# define R300_TX_FORMAT_ONE 5 + /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_Z 6 + /* 2.0*W, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 + +# define R300_TX_FORMAT_B_SHIFT 18 +# define R300_TX_FORMAT_G_SHIFT 15 +# define R300_TX_FORMAT_R_SHIFT 12 +# define R300_TX_FORMAT_A_SHIFT 9 + /* Convenience macro to take care of layout and swizzling */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ + ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ + | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ + | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ + | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ + | (R300_TX_FORMAT_##FMT) \ + ) + /* These can be ORed with result of R300_EASY_TX_FORMAT() + We don't really know what they do. Take values from a + constant color ? */ +# define R300_TX_FORMAT_CONST_X (1<<5) +# define R300_TX_FORMAT_CONST_Y (2<<5) +# define R300_TX_FORMAT_CONST_Z (4<<5) +# define R300_TX_FORMAT_CONST_W (8<<5) + +# define R300_TX_FORMAT_YUV_MODE 0x00800000 + +#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ +# define R300_TX_PITCHMASK_SHIFT 0 +# define R300_TX_PITCHMASK_MASK (2047 << 0) +# define R500_TXFORMAT_MSB (1 << 14) +# define R500_TXWIDTH_BIT11 (1 << 15) +# define R500_TXHEIGHT_BIT11 (1 << 16) +# define R500_POW2FIX2FLT (1 << 17) +# define R500_SEL_FILTER4_TC0 (0 << 18) +# define R500_SEL_FILTER4_TC1 (1 << 18) +# define R500_SEL_FILTER4_TC2 (2 << 18) +# define R500_SEL_FILTER4_TC3 (3 << 18) + +#define R300_TX_OFFSET_0 0x4540 +#define R300_TX_OFFSET_1 0x4544 +#define R300_TX_OFFSET_2 0x4548 +#define R300_TX_OFFSET_3 0x454C +#define R300_TX_OFFSET_4 0x4550 +#define R300_TX_OFFSET_5 0x4554 +#define R300_TX_OFFSET_6 0x4558 +#define R300_TX_OFFSET_7 0x455C + /* BEGIN: Guess from R200 */ +# define R300_TXO_ENDIAN_NO_SWAP (0 << 0) +# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) +# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MICRO_TILE_LINEAR (0 << 3) +# define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_OFFSET_MASK 0xffffffe0 +# define R300_TXO_OFFSET_SHIFT 5 + /* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0 0x4580 +#define R300_TX_CHROMA_KEY_1 0x4584 +#define R300_TX_CHROMA_KEY_2 0x4588 +#define R300_TX_CHROMA_KEY_3 0x458c +#define R300_TX_CHROMA_KEY_4 0x4590 +#define R300_TX_CHROMA_KEY_5 0x4594 +#define R300_TX_CHROMA_KEY_6 0x4598 +#define R300_TX_CHROMA_KEY_7 0x459c +#define R300_TX_CHROMA_KEY_8 0x45a0 +#define R300_TX_CHROMA_KEY_9 0x45a4 +#define R300_TX_CHROMA_KEY_10 0x45a8 +#define R300_TX_CHROMA_KEY_11 0x45ac +#define R300_TX_CHROMA_KEY_12 0x45b0 +#define R300_TX_CHROMA_KEY_13 0x45b4 +#define R300_TX_CHROMA_KEY_14 0x45b8 +#define R300_TX_CHROMA_KEY_15 0x45bc +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ + +/* Border Color */ +#define R300_TX_BORDER_COLOR_0 0x45c0 +#define R300_TX_BORDER_COLOR_1 0x45c4 +#define R300_TX_BORDER_COLOR_2 0x45c8 +#define R300_TX_BORDER_COLOR_3 0x45cc +#define R300_TX_BORDER_COLOR_4 0x45d0 +#define R300_TX_BORDER_COLOR_5 0x45d4 +#define R300_TX_BORDER_COLOR_6 0x45d8 +#define R300_TX_BORDER_COLOR_7 0x45dc +#define R300_TX_BORDER_COLOR_8 0x45e0 +#define R300_TX_BORDER_COLOR_9 0x45e4 +#define R300_TX_BORDER_COLOR_10 0x45e8 +#define R300_TX_BORDER_COLOR_11 0x45ec +#define R300_TX_BORDER_COLOR_12 0x45f0 +#define R300_TX_BORDER_COLOR_13 0x45f4 +#define R300_TX_BORDER_COLOR_14 0x45f8 +#define R300_TX_BORDER_COLOR_15 0x45fc + + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ +#define R300_US_CONFIG 0x4600 +# define R300_PFS_CNTL_LAST_NODES_SHIFT 0 +# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) +# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) +#define R300_US_PIXSIZE 0x4604 +/* There is an unshifted value here which has so far always been equal to the + * index of the highest used temporary register. + */ +#define R300_US_CODE_OFFSET 0x4608 +# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 +# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_CNTL_ALU_END_SHIFT 6 +# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) +# define R300_PFS_CNTL_TEX_END_SHIFT 18 +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) + +/* gap */ + +/* Nodes are stored backwards. The last active node is always stored in + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + */ +#define R300_US_CODE_ADDR_0 0x4610 +#define R300_US_CODE_ADDR_1 0x4614 +#define R300_US_CODE_ADDR_2 0x4618 +#define R300_US_CODE_ADDR_3 0x461C +# define R300_ALU_START_SHIFT 0 +# define R300_ALU_START_MASK (63 << 0) +# define R300_ALU_SIZE_SHIFT 6 +# define R300_ALU_SIZE_MASK (63 << 6) +# define R300_TEX_START_SHIFT 12 +# define R300_TEX_START_MASK (31 << 12) +# define R300_TEX_SIZE_SHIFT 17 +# define R300_TEX_SIZE_MASK (31 << 17) +# define R300_RGBA_OUT (1 << 22) +# define R300_W_OUT (1 << 23) + +/* TEX + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ +#define R300_US_TEX_INST_0 0x4620 +# define R300_SRC_ADDR_SHIFT 0 +# define R300_SRC_ADDR_MASK (31 << 0) +# define R300_DST_ADDR_SHIFT 6 +# define R300_DST_ADDR_MASK (31 << 6) +# define R300_TEX_ID_SHIFT 11 +# define R300_TEX_ID_MASK (15 << 11) +# define R300_TEX_INST_SHIFT 15 +# define R300_TEX_OP_NOP 0 +# define R300_TEX_OP_LD 1 +# define R300_TEX_OP_KIL 2 +# define R300_TEX_OP_TXP 3 +# define R300_TEX_OP_TXB 4 +# define R300_TEX_INST_MASK (7 << 15) + +/* Output format from the unfied shader */ +#define R300_US_OUT_FMT 0x46A4 +# define R300_US_OUT_FMT_C4_8 (0 << 0) +# define R300_US_OUT_FMT_C4_10 (1 << 0) +# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R300_US_OUT_FMT_C_16 (3 << 0) +# define R300_US_OUT_FMT_C2_16 (4 << 0) +# define R300_US_OUT_FMT_C4_16 (5 << 0) +# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) +# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) +# define R300_US_OUT_FMT_C2_4 (8 << 0) +# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) +# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) +# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) +# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) +# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) +/* reserved */ +# define R300_US_OUT_FMT_UNUSED (15 << 0) +# define R300_US_OUT_FMT_C_16_FP (16 << 0) +# define R300_US_OUT_FMT_C2_16_FP (17 << 0) +# define R300_US_OUT_FMT_C4_16_FP (18 << 0) +# define R300_US_OUT_FMT_C_32_FP (19 << 0) +# define R300_US_OUT_FMT_C2_32_FP (20 << 0) +# define R300_US_OUT_FMT_C4_32_FP (20 << 0) + +/* ALU + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + * - DP4: Use OUTC_DP4, OUTA_DP4 + * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 + * - CMP: If ARG2 < 0, return ARG1, else return ARG0 + * - FLR: use FRC+MAD + * - XPD: use MAD+MAD + * - SGE, SLT: use MAD+CMP + * - RSQ: use ABS modifier for argument + * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + * (e.g. RCP) into color register + * - apparently, there's no quick DST operation + * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this all makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + * - Argument order is the same as in ARB_fragment_program. + * - Operation is MAD + * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + * - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ +#define R300_US_ALU_RGB_ADDR_0 0x46C0 +# define R300_ALU_SRC0C_SHIFT 0 +# define R300_ALU_SRC0C_MASK (31 << 0) +# define R300_ALU_SRC0C_CONST (1 << 5) +# define R300_ALU_SRC1C_SHIFT 6 +# define R300_ALU_SRC1C_MASK (31 << 6) +# define R300_ALU_SRC1C_CONST (1 << 11) +# define R300_ALU_SRC2C_SHIFT 12 +# define R300_ALU_SRC2C_MASK (31 << 12) +# define R300_ALU_SRC2C_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTC_SHIFT 18 +# define R300_ALU_DSTC_MASK (31 << 18) +# define R300_ALU_DSTC_REG_MASK_SHIFT 23 +# define R300_ALU_DSTC_REG_X (1 << 23) +# define R300_ALU_DSTC_REG_Y (1 << 24) +# define R300_ALU_DSTC_REG_Z (1 << 25) +# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_ALU_DSTC_OUTPUT_X (1 << 26) +# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) +# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) + +#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 +# define R300_ALU_SRC0A_SHIFT 0 +# define R300_ALU_SRC0A_MASK (31 << 0) +# define R300_ALU_SRC0A_CONST (1 << 5) +# define R300_ALU_SRC1A_SHIFT 6 +# define R300_ALU_SRC1A_MASK (31 << 6) +# define R300_ALU_SRC1A_CONST (1 << 11) +# define R300_ALU_SRC2A_SHIFT 12 +# define R300_ALU_SRC2A_MASK (31 << 12) +# define R300_ALU_SRC2A_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTA_SHIFT 18 +# define R300_ALU_DSTA_MASK (31 << 18) +# define R300_ALU_DSTA_REG (1 << 23) +# define R300_ALU_DSTA_OUTPUT (1 << 24) +# define R300_ALU_DSTA_DEPTH (1 << 27) + +#define R300_US_ALU_RGB_INST_0 0x48C0 +# define R300_ALU_ARGC_SRC0C_XYZ 0 +# define R300_ALU_ARGC_SRC0C_XXX 1 +# define R300_ALU_ARGC_SRC0C_YYY 2 +# define R300_ALU_ARGC_SRC0C_ZZZ 3 +# define R300_ALU_ARGC_SRC1C_XYZ 4 +# define R300_ALU_ARGC_SRC1C_XXX 5 +# define R300_ALU_ARGC_SRC1C_YYY 6 +# define R300_ALU_ARGC_SRC1C_ZZZ 7 +# define R300_ALU_ARGC_SRC2C_XYZ 8 +# define R300_ALU_ARGC_SRC2C_XXX 9 +# define R300_ALU_ARGC_SRC2C_YYY 10 +# define R300_ALU_ARGC_SRC2C_ZZZ 11 +# define R300_ALU_ARGC_SRC0A 12 +# define R300_ALU_ARGC_SRC1A 13 +# define R300_ALU_ARGC_SRC2A 14 +# define R300_ALU_ARGC_SRCP_XYZ 15 +# define R300_ALU_ARGC_SRCP_XXX 16 +# define R300_ALU_ARGC_SRCP_YYY 17 +# define R300_ALU_ARGC_SRCP_ZZZ 18 +# define R300_ALU_ARGC_SRCP_WWW 19 +# define R300_ALU_ARGC_ZERO 20 +# define R300_ALU_ARGC_ONE 21 +# define R300_ALU_ARGC_HALF 22 +# define R300_ALU_ARGC_SRC0C_YZX 23 +# define R300_ALU_ARGC_SRC1C_YZX 24 +# define R300_ALU_ARGC_SRC2C_YZX 25 +# define R300_ALU_ARGC_SRC0C_ZXY 26 +# define R300_ALU_ARGC_SRC1C_ZXY 27 +# define R300_ALU_ARGC_SRC2C_ZXY 28 +# define R300_ALU_ARGC_SRC0CA_WZY 29 +# define R300_ALU_ARGC_SRC1CA_WZY 30 +# define R300_ALU_ARGC_SRC2CA_WZY 31 + +# define R300_ALU_ARG0C_SHIFT 0 +# define R300_ALU_ARG0C_MASK (31 << 0) +# define R300_ALU_ARG0C_NOP (0 << 5) +# define R300_ALU_ARG0C_NEG (1 << 5) +# define R300_ALU_ARG0C_ABS (2 << 5) +# define R300_ALU_ARG0C_NAB (3 << 5) +# define R300_ALU_ARG1C_SHIFT 7 +# define R300_ALU_ARG1C_MASK (31 << 7) +# define R300_ALU_ARG1C_NOP (0 << 12) +# define R300_ALU_ARG1C_NEG (1 << 12) +# define R300_ALU_ARG1C_ABS (2 << 12) +# define R300_ALU_ARG1C_NAB (3 << 12) +# define R300_ALU_ARG2C_SHIFT 14 +# define R300_ALU_ARG2C_MASK (31 << 14) +# define R300_ALU_ARG2C_NOP (0 << 19) +# define R300_ALU_ARG2C_NEG (1 << 19) +# define R300_ALU_ARG2C_ABS (2 << 19) +# define R300_ALU_ARG2C_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTC_MAD (0 << 23) +# define R300_ALU_OUTC_DP3 (1 << 23) +# define R300_ALU_OUTC_DP4 (2 << 23) +# define R300_ALU_OUTC_D2A (3 << 23) +# define R300_ALU_OUTC_MIN (4 << 23) +# define R300_ALU_OUTC_MAX (5 << 23) +# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CMP (8 << 23) +# define R300_ALU_OUTC_FRC (9 << 23) +# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) + +# define R300_ALU_OUTC_MOD_NOP (0 << 27) +# define R300_ALU_OUTC_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTC_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTC_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTC_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTC_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTC_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTC_CLAMP (1 << 30) +# define R300_ALU_INSERT_NOP (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0 0x49C0 +# define R300_ALU_ARGA_SRC0C_X 0 +# define R300_ALU_ARGA_SRC0C_Y 1 +# define R300_ALU_ARGA_SRC0C_Z 2 +# define R300_ALU_ARGA_SRC1C_X 3 +# define R300_ALU_ARGA_SRC1C_Y 4 +# define R300_ALU_ARGA_SRC1C_Z 5 +# define R300_ALU_ARGA_SRC2C_X 6 +# define R300_ALU_ARGA_SRC2C_Y 7 +# define R300_ALU_ARGA_SRC2C_Z 8 +# define R300_ALU_ARGA_SRC0A 9 +# define R300_ALU_ARGA_SRC1A 10 +# define R300_ALU_ARGA_SRC2A 11 +# define R300_ALU_ARGA_SRCP_X 12 +# define R300_ALU_ARGA_SRCP_Y 13 +# define R300_ALU_ARGA_SRCP_Z 14 +# define R300_ALU_ARGA_SRCP_W 15 + +# define R300_ALU_ARGA_ZERO 16 +# define R300_ALU_ARGA_ONE 17 +# define R300_ALU_ARGA_HALF 18 +# define R300_ALU_ARG0A_SHIFT 0 +# define R300_ALU_ARG0A_MASK (31 << 0) +# define R300_ALU_ARG0A_NOP (0 << 5) +# define R300_ALU_ARG0A_NEG (1 << 5) +# define R300_ALU_ARG0A_ABS (2 << 5) +# define R300_ALU_ARG0A_NAB (3 << 5) +# define R300_ALU_ARG1A_SHIFT 7 +# define R300_ALU_ARG1A_MASK (31 << 7) +# define R300_ALU_ARG1A_NOP (0 << 12) +# define R300_ALU_ARG1A_NEG (1 << 12) +# define R300_ALU_ARG1A_ABS (2 << 12) +# define R300_ALU_ARG1A_NAB (3 << 12) +# define R300_ALU_ARG2A_SHIFT 14 +# define R300_ALU_ARG2A_MASK (31 << 14) +# define R300_ALU_ARG2A_NOP (0 << 19) +# define R300_ALU_ARG2A_NEG (1 << 19) +# define R300_ALU_ARG2A_ABS (2 << 19) +# define R300_ALU_ARG2A_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTA_MAD (0 << 23) +# define R300_ALU_OUTA_DP4 (1 << 23) +# define R300_ALU_OUTA_MIN (2 << 23) +# define R300_ALU_OUTA_MAX (3 << 23) +# define R300_ALU_OUTA_CND (5 << 23) +# define R300_ALU_OUTA_CMP (6 << 23) +# define R300_ALU_OUTA_FRC (7 << 23) +# define R300_ALU_OUTA_EX2 (8 << 23) +# define R300_ALU_OUTA_LG2 (9 << 23) +# define R300_ALU_OUTA_RCP (10 << 23) +# define R300_ALU_OUTA_RSQ (11 << 23) + +# define R300_ALU_OUTA_MOD_NOP (0 << 27) +# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTA_CLAMP (1 << 30) +/* END: Fragment program instruction set */ + +/* Fog: Fog Blending Enable */ +#define R300_FG_FOG_BLEND 0x4bc0 +# define R300_FG_FOG_BLEND_DISABLE (0 << 0) +# define R300_FG_FOG_BLEND_ENABLE (1 << 0) +# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) +# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) +# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) +# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) +# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) + +/* Fog: Red Component of Fog Color */ +#define R300_FG_FOG_COLOR_R 0x4bc8 +/* Fog: Green Component of Fog Color */ +#define R300_FG_FOG_COLOR_G 0x4bcc +/* Fog: Blue Component of Fog Color */ +#define R300_FG_FOG_COLOR_B 0x4bd0 +# define R300_FG_FOG_COLOR_MASK 0x000003ff + +/* Fog: Constant Factor for Fog Blending */ +#define R300_FG_FOG_FACTOR 0x4bc4 +# define FG_FOG_FACTOR_MASK 0x000003ff + +/* Fog: Alpha function */ +#define R300_FG_ALPHA_FUNC 0x4bd4 +# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff +# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) +# define R300_FG_ALPHA_FUNC_LESS (1 << 8) +# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) +# define R300_FG_ALPHA_FUNC_LE (3 << 8) +# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) +# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) +# define R300_FG_ALPHA_FUNC_GE (6 << 8) +# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) +# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) + +# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) +# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) + +# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) +# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) +# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) +# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) + +# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) +# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) + +# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) +# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) + +# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) +# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) + + +/* Fog: Where does the depth come from? */ +#define R300_FG_DEPTH_SRC 0x4bd8 +# define R300_FG_DEPTH_SRC_SCAN (0 << 0) +# define R300_FG_DEPTH_SRC_SHADER (1 << 0) + +/* Fog: Alpha Compare Value */ +#define R500_FG_ALPHA_VALUE 0x4be0 +# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff + +/* gap */ + +/* Fragment program parameters in 7.16 floating point */ +#define R300_PFS_PARAM_0_X 0x4C00 +#define R300_PFS_PARAM_0_Y 0x4C04 +#define R300_PFS_PARAM_0_Z 0x4C08 +#define R300_PFS_PARAM_0_W 0x4C0C +/* last consts */ +#define R300_PFS_PARAM_31_X 0x4DF0 +#define R300_PFS_PARAM_31_Y 0x4DF4 +#define R300_PFS_PARAM_31_Z 0x4DF8 +#define R300_PFS_PARAM_31_W 0x4DFC + +/* Unpipelined. */ +#define R300_RB3D_CCTL 0x4e00 +# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) +# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) +# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) +# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) +# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10) +/* reserved */ +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12) +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14) + + +/* Notes: + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + * the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + * are set to the same + * function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ +#define R300_RB3D_CBLEND 0x4E04 +#define R300_RB3D_ABLEND 0x4E08 +/* the following only appear in CBLEND */ +# define R300_ALPHA_BLEND_ENABLE (1 << 0) +# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) +# define R300_READ_ENABLE (1 << 2) +# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) + +/* the following are shared between CBLEND and ABLEND */ +# define R300_FCN_MASK (3 << 12) +# define R300_COMB_FCN_ADD_CLAMP (0 << 12) +# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) +# define R300_COMB_FCN_SUB_CLAMP (2 << 12) +# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) +# define R300_COMB_FCN_MIN (4 << 12) +# define R300_COMB_FCN_MAX (5 << 12) +# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) +# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) +# define R300_BLEND_GL_ZERO (32) +# define R300_BLEND_GL_ONE (33) +# define R300_BLEND_GL_SRC_COLOR (34) +# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +# define R300_BLEND_GL_DST_COLOR (36) +# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) +# define R300_BLEND_GL_SRC_ALPHA (38) +# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +# define R300_BLEND_GL_DST_ALPHA (40) +# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) +# define R300_BLEND_GL_CONST_COLOR (43) +# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +# define R300_BLEND_GL_CONST_ALPHA (45) +# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +# define R300_BLEND_MASK (63) +# define R300_SRC_BLEND_SHIFT (16) +# define R300_DST_BLEND_SHIFT (24) + +/* Constant color used by the blender. Pipelined through the blender. + * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE, + * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead. + */ +#define R300_RB3D_BLEND_COLOR 0x4E10 + + +/* 3D Color Channel Mask. If all the channels used in the current color format + * are disabled, then the cb will discard all the incoming quads. Pipelined + * through the blender. + */ +#define RB3D_COLOR_CHANNEL_MASK 0x4E0C +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15) + +/* Clear color that is used when the color mask is set to 00. Unpipelined. + * Program this register with a 32-bit value in ARGB8888 or ARGB2101010 + * formats, ignoring the fields. + */ +#define RB3D_COLOR_CLEAR_VALUE 0x4e14 + +/* gap */ + +/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_CLR 0x4e20 + +/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_MSK 0x4e24 + +/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ +#define R300_RB3D_COLOROFFSET0 0x4E28 +# define R300_COLOROFFSET_MASK 0xFFFFFFE0 +/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ +#define R300_RB3D_COLOROFFSET1 0x4E2C +/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ +#define R300_RB3D_COLOROFFSET2 0x4E30 +/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */ +#define R300_RB3D_COLOROFFSET3 0x4E34 + +/* Color buffer format and tiling control for all the multibuffers and the + * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any + * of the registers are changed. + * + * Bit 16: Larger tiles + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ +#define R300_RB3D_COLORPITCH0 0x4E38 +# define R300_COLORPITCH_MASK 0x00003FFE +# define R300_COLOR_TILE_DISABLE (0 << 16) +# define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_MICROTILE_DISABLE (0 << 17) +# define R300_COLOR_MICROTILE_ENABLE (1 << 17) +# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) +# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) +# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) +# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) +# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) +# define R500_COLOR_FORMAT_UV1010 (1 << 21) +# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ +# define R300_COLOR_FORMAT_ARGB1555 (3 << 21) +# define R300_COLOR_FORMAT_RGB565 (4 << 21) +# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) +# define R300_COLOR_FORMAT_ARGB8888 (6 << 21) +# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) +/* reserved */ +# define R300_COLOR_FORMAT_I8 (9 << 21) +# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21) +# define R300_COLOR_FORMAT_VYUY (11 << 21) +# define R300_COLOR_FORMAT_YVYU (12 << 21) +# define R300_COLOR_FORMAT_UV88 (13 << 21) +# define R500_COLOR_FORMAT_I10 (14 << 21) +# define R300_COLOR_FORMAT_ARGB4444 (15 << 21) +#define R300_RB3D_COLORPITCH1 0x4E3C +#define R300_RB3D_COLORPITCH2 0x4E40 +#define R300_RB3D_COLORPITCH3 0x4E44 + +/* gap */ + +/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then + * a flush or free will not occur upon a write to this register, but a sync + * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE + * are zero but DC_FINISH is one, then a sync will be sent immediately -- the + * cb will not wait for all the previous operations to complete before sending + * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to + * zero. + * + * Set to 0A before 3D operations, set to 02 afterwards. + */ +#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) + +#define R300_RB3D_DITHER_CTL 0x4E50 +# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0) +/* reserved */ +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2) +/* reserved */ + +/* Resolve buffer destination address. The cache must be empty before changing + * this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 +# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before + * changing this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_PITCH 0x4e84 +# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Control. Unpipelined */ +#define R300_RB3D_AARESOLVE_CTL 0x4e88 +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) + + +/* Discard src pixels less than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +/* Discard src pixels greater than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 + +/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_ROPCNTL 0x4e18 +# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004 +# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8) +# define R300_RB3D_ROPCNTL_ROP_SHIFT 8 + +/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_CLRCMP_FLIPE 0x4e1c + +/* Sets the fifo sizes */ +#define R500_RB3D_FIFO_SIZE 0x4ef4 +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 +# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 + +/* gap */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6 - vd + */ +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 + /* functions */ +# define R300_ZS_NEVER 0 +# define R300_ZS_LESS 1 +# define R300_ZS_LEQUAL 2 +# define R300_ZS_EQUAL 3 +# define R300_ZS_GEQUAL 4 +# define R300_ZS_GREATER 5 +# define R300_ZS_NOTEQUAL 6 +# define R300_ZS_ALWAYS 7 +# define R300_ZS_MASK 7 + /* operations */ +# define R300_ZS_KEEP 0 +# define R300_ZS_ZERO 1 +# define R300_ZS_REPLACE 2 +# define R300_ZS_INCR 3 +# define R300_ZS_DECR 4 +# define R300_ZS_INVERT 5 +# define R300_ZS_INCR_WRAP 6 +# define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 + /* front and back refer to operations done for front + and back faces, i.e. separate stencil function support */ +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 + +/* gap */ + +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) +/* reserved up to (15 << 0) */ +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) + +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) + +/* gap */ + +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) + +#define R300_ZB_BW_CNTL 0x4f1c +# define R300_HIZ_DISABLE (0 << 0) +# define R300_HIZ_ENABLE (1 << 0) +# define R300_HIZ_MIN (0 << 1) +# define R300_HIZ_MAX (1 << 1) +# define R300_FAST_FILL_DISABLE (0 << 2) +# define R300_FAST_FILL_ENABLE (1 << 2) +# define R300_RD_COMP_DISABLE (0 << 3) +# define R300_RD_COMP_ENABLE (1 << 3) +# define R300_WR_COMP_DISABLE (0 << 4) +# define R300_WR_COMP_ENABLE (1 << 4) +# define R300_ZB_CB_CLEAR_RMW (0 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) + + +/* gap */ + +/* Z Buffer Address Offset. + * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. + */ +#define R300_ZB_DEPTHOFFSET 0x4f20 + +/* Z Buffer Pitch and Endian Control */ +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) + +/* Z Buffer Clear Value */ +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 + +/* Hierarchical Z Memory Offset */ +#define R300_ZB_HIZ_OFFSET 0x4f44 + +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 + +/* Hierarchical Z Data */ +#define R300_ZB_HIZ_DWORD 0x4f4c + +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 + +/* Hierarchical Z Pitch */ +#define R300_ZB_HIZ_PITCH 0x4f54 + +/* Z Buffer Z Pass Counter Data */ +#define R300_ZB_ZPASS_DATA 0x4f58 + +/* Z Buffer Z Pass Counter Address */ +#define R300_ZB_ZPASS_ADDR 0x4f5c + +/* Depth buffer X and Y coordinate offset */ +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 + +/* Sets the fifo sizes */ +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) + +/* Stencil Reference Value and Mask for backfacing quads */ +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 + +/** + * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION + * + * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector + * Engine instruction or a Math Engine instruction. + */ + +/*\{*/ + +enum { + /* R3XX */ + VECTOR_NO_OP = 0, + VE_DOT_PRODUCT = 1, + VE_MULTIPLY = 2, + VE_ADD = 3, + VE_MULTIPLY_ADD = 4, + VE_DISTANCE_VECTOR = 5, + VE_FRACTION = 6, + VE_MAXIMUM = 7, + VE_MINIMUM = 8, + VE_SET_GREATER_THAN_EQUAL = 9, + VE_SET_LESS_THAN = 10, + VE_MULTIPLYX2_ADD = 11, + VE_MULTIPLY_CLAMP = 12, + VE_FLT2FIX_DX = 13, + VE_FLT2FIX_DX_RND = 14, + /* R5XX */ + VE_PRED_SET_EQ_PUSH = 15, + VE_PRED_SET_GT_PUSH = 16, + VE_PRED_SET_GTE_PUSH = 17, + VE_PRED_SET_NEQ_PUSH = 18, + VE_COND_WRITE_EQ = 19, + VE_COND_WRITE_GT = 20, + VE_COND_WRITE_GTE = 21, + VE_COND_WRITE_NEQ = 22, + VE_COND_MUX_EQ = 23, + VE_COND_MUX_GT = 24, + VE_COND_MUX_GTE = 25, + VE_SET_GREATER_THAN = 26, + VE_SET_EQUAL = 27, + VE_SET_NOT_EQUAL = 28, +}; + +enum { + /* R3XX */ + MATH_NO_OP = 0, + ME_EXP_BASE2_DX = 1, + ME_LOG_BASE2_DX = 2, + ME_EXP_BASEE_FF = 3, + ME_LIGHT_COEFF_DX = 4, + ME_POWER_FUNC_FF = 5, + ME_RECIP_DX = 6, + ME_RECIP_FF = 7, + ME_RECIP_SQRT_DX = 8, + ME_RECIP_SQRT_FF = 9, + ME_MULTIPLY = 10, + ME_EXP_BASE2_FULL_DX = 11, + ME_LOG_BASE2_FULL_DX = 12, + ME_POWER_FUNC_FF_CLAMP_B = 13, + ME_POWER_FUNC_FF_CLAMP_B1 = 14, + ME_POWER_FUNC_FF_CLAMP_01 = 15, + ME_SIN = 16, + ME_COS = 17, + /* R5XX */ + ME_LOG_BASE2_IEEE = 18, + ME_RECIP_IEEE = 19, + ME_RECIP_SQRT_IEEE = 20, + ME_PRED_SET_EQ = 21, + ME_PRED_SET_GT = 22, + ME_PRED_SET_GTE = 23, + ME_PRED_SET_NEQ = 24, + ME_PRED_SET_CLR = 25, + ME_PRED_SET_INV = 26, + ME_PRED_SET_POP = 27, + ME_PRED_SET_RESTORE = 28, +}; + +enum { + /* R3XX */ + PVS_MACRO_OP_2CLK_MADD = 0, + PVS_MACRO_OP_2CLK_M2X_ADD = 1, +}; + +enum { + PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ + PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ +}; + +enum { + PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_DST_REG_A0 = 1, /* Address Register Storage */ + PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ + PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ + PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ +}; + +enum { + PVS_SRC_SELECT_X = 0, /* Select X Component */ + PVS_SRC_SELECT_Y = 1, /* Select Y Component */ + PVS_SRC_SELECT_Z = 2, /* Select Z Component */ + PVS_SRC_SELECT_W = 3, /* Select W Component */ + PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ + PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ +}; + +/* PVS Opcode & Destination Operand Description */ + +enum { + PVS_DST_OPCODE_MASK = 0x3f, + PVS_DST_OPCODE_SHIFT = 0, + PVS_DST_MATH_INST_MASK = 0x1, + PVS_DST_MATH_INST_SHIFT = 6, + PVS_DST_MACRO_INST_MASK = 0x1, + PVS_DST_MACRO_INST_SHIFT = 7, + PVS_DST_REG_TYPE_MASK = 0xf, + PVS_DST_REG_TYPE_SHIFT = 8, + PVS_DST_ADDR_MODE_1_MASK = 0x1, + PVS_DST_ADDR_MODE_1_SHIFT = 12, + PVS_DST_OFFSET_MASK = 0x7f, + PVS_DST_OFFSET_SHIFT = 13, + PVS_DST_WE_X_MASK = 0x1, + PVS_DST_WE_X_SHIFT = 20, + PVS_DST_WE_Y_MASK = 0x1, + PVS_DST_WE_Y_SHIFT = 21, + PVS_DST_WE_Z_MASK = 0x1, + PVS_DST_WE_Z_SHIFT = 22, + PVS_DST_WE_W_MASK = 0x1, + PVS_DST_WE_W_SHIFT = 23, + PVS_DST_VE_SAT_MASK = 0x1, + PVS_DST_VE_SAT_SHIFT = 24, + PVS_DST_ME_SAT_MASK = 0x1, + PVS_DST_ME_SAT_SHIFT = 25, + PVS_DST_PRED_ENABLE_MASK = 0x1, + PVS_DST_PRED_ENABLE_SHIFT = 26, + PVS_DST_PRED_SENSE_MASK = 0x1, + PVS_DST_PRED_SENSE_SHIFT = 27, + PVS_DST_DUAL_MATH_OP_MASK = 0x3, + PVS_DST_DUAL_MATH_OP_SHIFT = 27, + PVS_DST_ADDR_SEL_MASK = 0x3, + PVS_DST_ADDR_SEL_SHIFT = 29, + PVS_DST_ADDR_MODE_0_MASK = 0x1, + PVS_DST_ADDR_MODE_0_SHIFT = 31, +}; + +/* PVS Source Operand Description */ + +enum { + PVS_SRC_REG_TYPE_MASK = 0x3, + PVS_SRC_REG_TYPE_SHIFT = 0, + SPARE_0_MASK = 0x1, + SPARE_0_SHIFT = 2, + PVS_SRC_ABS_XYZW_MASK = 0x1, + PVS_SRC_ABS_XYZW_SHIFT = 3, + PVS_SRC_ADDR_MODE_0_MASK = 0x1, + PVS_SRC_ADDR_MODE_0_SHIFT = 4, + PVS_SRC_OFFSET_MASK = 0xff, + PVS_SRC_OFFSET_SHIFT = 5, + PVS_SRC_SWIZZLE_X_MASK = 0x7, + PVS_SRC_SWIZZLE_X_SHIFT = 13, + PVS_SRC_SWIZZLE_Y_MASK = 0x7, + PVS_SRC_SWIZZLE_Y_SHIFT = 16, + PVS_SRC_SWIZZLE_Z_MASK = 0x7, + PVS_SRC_SWIZZLE_Z_SHIFT = 19, + PVS_SRC_SWIZZLE_W_MASK = 0x7, + PVS_SRC_SWIZZLE_W_SHIFT = 22, + PVS_SRC_MODIFIER_X_MASK = 0x1, + PVS_SRC_MODIFIER_X_SHIFT = 25, + PVS_SRC_MODIFIER_Y_MASK = 0x1, + PVS_SRC_MODIFIER_Y_SHIFT = 26, + PVS_SRC_MODIFIER_Z_MASK = 0x1, + PVS_SRC_MODIFIER_Z_SHIFT = 27, + PVS_SRC_MODIFIER_W_MASK = 0x1, + PVS_SRC_MODIFIER_W_SHIFT = 28, + PVS_SRC_ADDR_SEL_MASK = 0x3, + PVS_SRC_ADDR_SEL_SHIFT = 29, + PVS_SRC_ADDR_MODE_1_MASK = 0x0, + PVS_SRC_ADDR_MODE_1_SHIFT = 32, +}; + +/*\}*/ + +/* BEGIN: Packet 3 commands */ + +/* A primitive emission dword. */ +#define R300_PRIM_TYPE_NONE (0 << 0) +#define R300_PRIM_TYPE_POINT (1 << 0) +#define R300_PRIM_TYPE_LINE (2 << 0) +#define R300_PRIM_TYPE_LINE_STRIP (3 << 0) +#define R300_PRIM_TYPE_TRI_LIST (4 << 0) +#define R300_PRIM_TYPE_TRI_FAN (5 << 0) +#define R300_PRIM_TYPE_TRI_STRIP (6 << 0) +#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0) +#define R300_PRIM_TYPE_RECT_LIST (8 << 0) +#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) +#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) + /* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) +#define R300_PRIM_TYPE_LINE_LOOP (12 << 0) +#define R300_PRIM_TYPE_QUADS (13 << 0) +#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) +#define R300_PRIM_TYPE_POLYGON (15 << 0) +#define R300_PRIM_TYPE_MASK 0xF +#define R300_PRIM_WALK_IND (1 << 4) +#define R300_PRIM_WALK_LIST (2 << 4) +#define R300_PRIM_WALK_RING (3 << 4) +#define R300_PRIM_WALK_MASK (3 << 4) + /* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) +#define R300_PRIM_NUM_VERTICES_SHIFT 16 +#define R300_PRIM_NUM_VERTICES_MASK 0xffff + + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) ((x) << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SHIFT 12 +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SHIFT 19 +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) ((x) << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) ((x) << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) ((x) << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) ((x) << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SHIFT 12 +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25 +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SHIFT 0 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SHIFT 13 +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) ((x) << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) ((x) << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) ((x) << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) ((x) << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_MASK (3 << 0) +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) ((x) << 0) +# define R500_US_CODE_END_ADDR(x) ((x) << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) +# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) ((x) << 0) +# define R500_FC_INT_ADDR(x) ((x) << 8) +# define R500_FC_JUMP_ADDR(x) ((x) << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) ((x) << 8) +# define R500_FC_B_POP_CNT(x) ((x) << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) ((x) << 0) +# define R500_FC_INT_CONST_KG(x) ((x) << 8) +# define R500_FC_INT_CONST_KB(x) ((x) << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) ((x) << 0) +# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) +# define R500_FORMAT_TXDEPTH(x) ((x) << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0 0x46a4 +# define R500_OUT_FMT_C4_8 (0 << 0) +# define R500_OUT_FMT_C4_10 (1 << 0) +# define R500_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R500_OUT_FMT_C_16 (3 << 0) +# define R500_OUT_FMT_C2_16 (4 << 0) +# define R500_OUT_FMT_C4_16 (5 << 0) +# define R500_OUT_FMT_C_16_MPEG (6 << 0) +# define R500_OUT_FMT_C2_16_MPEG (7 << 0) +# define R500_OUT_FMT_C2_4 (8 << 0) +# define R500_OUT_FMT_C_3_3_2 (9 << 0) +# define R500_OUT_FMT_C_6_5_6 (10 << 0) +# define R500_OUT_FMT_C_11_11_10 (11 << 0) +# define R500_OUT_FMT_C_10_11_11 (12 << 0) +# define R500_OUT_FMT_C_2_10_10_10 (13 << 0) +/* #define R500_OUT_FMT_RESERVED (14 << 0) */ +# define R500_OUT_FMT_UNUSED (15 << 0) +# define R500_OUT_FMT_C_16_FP (16 << 0) +# define R500_OUT_FMT_C2_16_FP (17 << 0) +# define R500_OUT_FMT_C4_16_FP (18 << 0) +# define R500_OUT_FMT_C_32_FP (19 << 0) +# define R500_OUT_FMT_C2_32_FP (20 << 0) +# define R500_OUT_FMT_C4_32_FP (21 << 0) +# define R500_C0_SEL_A (0 << 8) +# define R500_C0_SEL_R (1 << 8) +# define R500_C0_SEL_G (2 << 8) +# define R500_C0_SEL_B (3 << 8) +# define R500_C1_SEL_A (0 << 10) +# define R500_C1_SEL_R (1 << 10) +# define R500_C1_SEL_G (2 << 10) +# define R500_C1_SEL_B (3 << 10) +# define R500_C2_SEL_A (0 << 12) +# define R500_C2_SEL_R (1 << 12) +# define R500_C2_SEL_G (2 << 12) +# define R500_C2_SEL_B (3 << 12) +# define R500_C3_SEL_A (0 << 14) +# define R500_C3_SEL_R (1 << 14) +# define R500_C3_SEL_G (2 << 14) +# define R500_C3_SEL_B (3 << 14) +# define R500_OUT_SIGN(x) ((x) << 16) +# define R500_ROUND_ADJ (1 << 20) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) ((x) << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) ((x) << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) ((x) << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) ((x) << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) ((x) << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R300_US_W_FMT 0x46b4 +# define R300_W_FMT_W0 (0 << 0) +# define R300_W_FMT_W24 (1 << 0) +# define R300_W_FMT_W24FP (2 << 0) +# define R300_W_SRC_US (0 << 2) +# define R300_W_SRC_RAS (1 << 2) + + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + */ +#define R300_PACKET3_3D_DRAW_VBUF 0x00002800 + +/* Draw a primitive from immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_IMMD 0x00002900 + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and + * immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_INDX 0x00002A00 + + +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ +#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 + +#define R300_PACKET3_INDX_BUFFER 0x00003300 +# define R300_INDX_BUFFER_DST_SHIFT 0 +# define R300_INDX_BUFFER_SKIP_SHIFT 16 +# define R300_INDX_BUFFER_ONE_REG_WR (1<<31) + +/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 +/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500 +/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 + +/* Clears a portion of hierachical Z RAM + * 3 dword parameters + * 0. START + * 1. COUNT: 13:0 (max is 0x3FFF) + * 2. CLEAR_VALUE: Value to write into HIZ RAM. + */ +#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700 + +/* Draws a set of primitives using vertex buffers pointed by the state data. + * At least 2 Parameters: + * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword. + * 2 to end: Data or indices (see other 3D_DRAW_* packets for details) + */ +#define R300_PACKET3_3D_DRAW_128 0x00003900 + +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8 2 +#define R300_CP_COLOR_FORMAT_ARGB1555 3 +#define R300_CP_COLOR_FORMAT_RGB565 4 +#define R300_CP_COLOR_FORMAT_ARGB8888 6 +#define R300_CP_COLOR_FORMAT_RGB332 7 +#define R300_CP_COLOR_FORMAT_RGB8 9 +#define R300_CP_COLOR_FORMAT_ARGB4444 15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 + +#endif /* _R300_REG_H */ + +/* *INDENT-ON* */ + +/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */ diff --git a/src/mesa/drivers/dri/r600/r600_render.c b/src/mesa/drivers/dri/r600/r600_render.c new file mode 100644 index 00000000000..3d4b3241c89 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_render.c @@ -0,0 +1,548 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \brief R300 Render (Vertex Buffer Implementation) + * + * The immediate implementation has been removed from CVS in favor of the vertex + * buffer implementation. + * + * The render functions are called by the pipeline manager to render a batch of + * primitives. They return TRUE to pass on to the next stage (i.e. software + * rasterization) or FALSE to indicate that the pipeline has finished after + * rendering something. + * + * When falling back to software TCL still attempt to use hardware + * rasterization. + * + * I am not sure that the cache related registers are setup correctly, but + * obviously this does work... Further investigation is needed. + * + * \author Nicolai Haehnle <[email protected]> + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "radeon_reg.h" +#include "radeon_macros.h" +#include "r600_context.h" +#include "r600_ioctl.h" +#include "r600_state.h" +#include "r600_reg.h" +#include "r600_tex.h" +#include "r600_emit.h" +#include "r600_fragprog.h" +extern int future_hw_tcl_on; + +/** + * \brief Convert a OpenGL primitive type into a R300 primitive type. + */ +int r300PrimitiveType(r300ContextPtr rmesa, int prim) +{ + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + return R300_VAP_VF_CNTL__PRIM_POINTS; + break; + case GL_LINES: + return R300_VAP_VF_CNTL__PRIM_LINES; + break; + case GL_LINE_STRIP: + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; + case GL_LINE_LOOP: + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case GL_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case GL_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case GL_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; + case GL_QUADS: + return R300_VAP_VF_CNTL__PRIM_QUADS; + break; + case GL_QUAD_STRIP: + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; + case GL_POLYGON: + return R300_VAP_VF_CNTL__PRIM_POLYGON; + break; + default: + assert(0); + return -1; + break; + } +} + +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +{ + int verts_off = 0; + + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + verts_off = 0; + break; + case GL_LINES: + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + if (num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + if (num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + if (num_verts < 3) + verts_off = num_verts; + break; + default: + assert(0); + return -1; + break; + } + + return num_verts - verts_off; +} + +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + void *out; + + radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, + &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4); + radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); + out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; + memcpy(out, elts, n_elts * 4); + radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); +} + +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (vertex_count > 0) { + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | + type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, + rmesa->radeon.tcl.elt_dma_bo, + rmesa->radeon.tcl.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(vertex_count); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); + OUT_BATCH(vertex_count); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); + } +} + +static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) +{ + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } else { + + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); + } + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } + +} + +static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); +} + +static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + int start, int end, int prim) +{ + int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + type = r300PrimitiveType(rmesa, prim); + num_verts = r300NumVerts(rmesa, end - start, prim); + + if (type < 0 || num_verts <= 0) + return; + + /* Make space for at least 64 dwords. + * This is supposed to ensure that we can get all rendering + * commands into a single command buffer. + */ + rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__); + + if (vb->Elts) { + if (num_verts > 65535) { + /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + /* Note: The following is incorrect, but it's the best I can do + * without a major refactoring of how DMA memory is handled. + * The problem: Ensuring that both vertex arrays *and* index + * arrays are at the right position, and then ensuring that + * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted + * at once. + * + * So why is the following incorrect? Well, it seems like + * allocating the index array might actually evict the vertex + * arrays. *sigh* + */ + r300EmitElts(ctx, vb->Elts, num_verts); + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); + r300FireEB(rmesa, num_verts, type); + } else { + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); + r300FireAOS(rmesa, num_verts, type); + } + COMMIT_BATCH(); +} + +static GLboolean r300RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + int i; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + r300UpdateShaders(rmesa); + if (r300EmitArrays(ctx)) + return GL_TRUE; + + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); + radeonEmitState(&rmesa->radeon); + + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + r300RunRenderPrimitive(rmesa, ctx, start, end, prim); + } + + r300EmitCacheFlush(rmesa); + + radeonReleaseArrays(ctx, ~0); + + return GL_FALSE; +} + +#define FALLBACK_IF(expr) \ + do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", \ + #expr); \ + return R300_FALLBACK_RAST; \ + } \ + } while(0) + +static int r300Fallback(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; + + FALLBACK_IF(r300->radeon.Fallback); + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } + + FALLBACK_IF(ctx->RenderMode != GL_RENDER); + + /* If GL_EXT_stencil_two_side is disabled, this fallback check can + * be removed. + */ + FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != + ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != + ctx->Stencil.WriteMask[back]); + + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); + + if (!r300->disable_lowimpact_fallback) { + FALLBACK_IF(ctx->Polygon.StippleFlag); + FALLBACK_IF(ctx->Multisample._Enabled); + FALLBACK_IF(ctx->Line.StippleFlag); + FALLBACK_IF(ctx->Line.SmoothFlag); + FALLBACK_IF(ctx->Point.SmoothFlag); + } + + return R300_FALLBACK_NONE; +} + +static GLboolean r300RunNonTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (r300Fallback(ctx) >= R300_FALLBACK_RAST) + return GL_TRUE; + + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; + + if (!r300ValidateBuffers(ctx)) + return GL_TRUE; + + return r300RunRender(ctx, stage); +} + +static GLboolean r300RunTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program *vp; + + hw_tcl_on = future_hw_tcl_on; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (hw_tcl_on == GL_FALSE) + return GL_TRUE; + + if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + if (!r300ValidateBuffers(ctx)) + return GL_TRUE; + + r300UpdateShaders(rmesa); + + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if (vp->native == GL_FALSE) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + return r300RunRender(ctx, stage); +} + +const struct tnl_pipeline_stage _r300_render_stage = { + "r300 Hardware Rasterization", + NULL, + NULL, + NULL, + NULL, + r300RunNonTCLRender +}; + +const struct tnl_pipeline_stage _r300_tcl_stage = { + "r300 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r300RunTCLRender +}; diff --git a/src/mesa/drivers/dri/r600/r600_shader.c b/src/mesa/drivers/dri/r600/r600_shader.c new file mode 100644 index 00000000000..63d4b45b9f7 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_shader.c @@ -0,0 +1,93 @@ + +#include "main/glheader.h" + +#include "shader/program.h" +#include "tnl/tnl.h" +#include "r600_context.h" +#include "r600_fragprog.h" + +static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, + GLuint id) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program_cont *vp; + struct r300_fragment_program *r300_fp; + struct r500_fragment_program *r500_fp; + + switch (target) { + case GL_VERTEX_STATE_PROGRAM_NV: + case GL_VERTEX_PROGRAM_ARB: + vp = CALLOC_STRUCT(r300_vertex_program_cont); + return _mesa_init_vertex_program(ctx, &vp->mesa_program, + target, id); + case GL_FRAGMENT_PROGRAM_ARB: + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + r500_fp->ctx = ctx; + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } + + case GL_FRAGMENT_PROGRAM_NV: + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r500_fp = CALLOC_STRUCT(r500_fragment_program); + return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, + target, id); + } else { + r300_fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, + target, id); + } + default: + _mesa_problem(ctx, "Bad target in r300NewProgram"); + } + + return NULL; +} + +static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) +{ + _mesa_delete_program(ctx, prog); +} + +static void +r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program_cont *vp = (void *)prog; + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; + struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + vp->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500_fp->translated = GL_FALSE; + else + r300_fp->translated = GL_FALSE; + break; + } + + /* need this for tcl fallbacks */ + _tnl_program_string(ctx, target, prog); +} + +static GLboolean +r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + return GL_TRUE; +} + +void r300InitShaderFuncs(struct dd_function_table *functions) +{ + functions->NewProgram = r300NewProgram; + functions->DeleteProgram = r300DeleteProgram; + functions->ProgramStringNotify = r300ProgramStringNotify; + functions->IsProgramNative = r300IsProgramNative; +} diff --git a/src/mesa/drivers/dri/r600/r600_state.c b/src/mesa/drivers/dri/r600/r600_state.c new file mode 100644 index 00000000000..8cb9c7cb6f7 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_state.c @@ -0,0 +1,2600 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. +Copyright (C) 2004 Nicolai Haehnle. +All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Nicolai Haehnle <[email protected]> + */ + +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/framebuffer.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" +#include "main/texformat.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" + +#include "r600_context.h" +#include "r600_ioctl.h" +#include "r600_state.h" +#include "r600_reg.h" +#include "r600_emit.h" +#include "r600_fragprog.h" +#include "r600_tex.h" + +#include "drirenderbuffer.h" + +extern int future_hw_tcl_on; +extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); + +static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, blend_color); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + GLuint r = IROUND(cf[0]*1023.0f); + GLuint g = IROUND(cf[1]*1023.0f); + GLuint b = IROUND(cf[2]*1023.0f); + GLuint a = IROUND(cf[3]*1023.0f); + + rmesa->hw.blend_color.cmd[1] = r | (a << 16); + rmesa->hw.blend_color.cmd[2] = b | (g << 16); + } else { + GLubyte color[4]; + CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]); + CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); + CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); + CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); + + rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], + color[1], color[2]); + } +} + +/** + * Calculate the hardware blend factor setting. This same function is used + * for source and destination of both alpha and RGB. + * + * \returns + * The hardware register value for the specified blend factor. This value + * will need to be shifted into the correct position for either source or + * destination factor. + * + * \todo + * Since the two cases where source and destination are handled differently + * are essentially error cases, they should never happen. Determine if these + * cases can be removed. + */ +static int blend_factor(GLenum factor, GLboolean is_src) +{ + switch (factor) { + case GL_ZERO: + return R300_BLEND_GL_ZERO; + break; + case GL_ONE: + return R300_BLEND_GL_ONE; + break; + case GL_DST_COLOR: + return R300_BLEND_GL_DST_COLOR; + break; + case GL_ONE_MINUS_DST_COLOR: + return R300_BLEND_GL_ONE_MINUS_DST_COLOR; + break; + case GL_SRC_COLOR: + return R300_BLEND_GL_SRC_COLOR; + break; + case GL_ONE_MINUS_SRC_COLOR: + return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; + break; + case GL_SRC_ALPHA: + return R300_BLEND_GL_SRC_ALPHA; + break; + case GL_ONE_MINUS_SRC_ALPHA: + return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; + break; + case GL_DST_ALPHA: + return R300_BLEND_GL_DST_ALPHA; + break; + case GL_ONE_MINUS_DST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; + break; + case GL_SRC_ALPHA_SATURATE: + return (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE : + R300_BLEND_GL_ZERO; + break; + case GL_CONSTANT_COLOR: + return R300_BLEND_GL_CONST_COLOR; + break; + case GL_ONE_MINUS_CONSTANT_COLOR: + return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; + break; + case GL_CONSTANT_ALPHA: + return R300_BLEND_GL_CONST_ALPHA; + break; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; + break; + default: + fprintf(stderr, "unknown blend factor %x\n", factor); + return (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO; + break; + } +} + +/** + * Sets both the blend equation and the blend function. + * This is done in a single + * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX) + * change the interpretation of the blend function. + * Also, make sure that blend function and blend equation are set to their + * default value if color blending is not enabled, since at least blend + * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results + * otherwise for unknown reasons. + */ + +/* helper function */ +static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn, + int cbits, int funcA, int eqnA) +{ + GLuint new_ablend, new_cblend; + +#if 0 + fprintf(stderr, + "eqnA=%08x funcA=%08x eqn=%08x func=%08x cbits=%08x\n", + eqnA, funcA, eqn, func, cbits); +#endif + new_ablend = eqnA | funcA; + new_cblend = eqn | func; + + /* Some blend factor combinations don't seem to work when the + * BLEND_NO_SEPARATE bit is set. + * + * Especially problematic candidates are the ONE_MINUS_* flags, + * but I can't see a real pattern. + */ +#if 0 + if (new_ablend == new_cblend) { + new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } +#endif + new_cblend |= cbits; + + if ((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) || + (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) { + R300_STATECHANGE(r300, bld); + r300->hw.bld.cmd[R300_BLD_ABLEND] = new_ablend; + r300->hw.bld.cmd[R300_BLD_CBLEND] = new_cblend; + } +} + +static void r300SetBlendState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqn = R300_COMB_FCN_ADD_CLAMP; + int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); + int eqnA = R300_COMB_FCN_ADD_CLAMP; + + if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + r300SetBlendCntl(r300, func, eqn, 0, func, eqn); + return; + } + + func = + (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB, + GL_FALSE) << + R300_DST_BLEND_SHIFT); + + switch (ctx->Color.BlendEquationRGB) { + case GL_FUNC_ADD: + eqn = R300_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqn = R300_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqn = R300_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqn = R300_COMB_FCN_MIN; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqn = R300_COMB_FCN_MAX; + func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid RGB blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + return; + } + + funcA = + (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA, + GL_FALSE) << + R300_DST_BLEND_SHIFT); + + switch (ctx->Color.BlendEquationA) { + case GL_FUNC_ADD: + eqnA = R300_COMB_FCN_ADD_CLAMP; + break; + + case GL_FUNC_SUBTRACT: + eqnA = R300_COMB_FCN_SUB_CLAMP; + break; + + case GL_FUNC_REVERSE_SUBTRACT: + eqnA = R300_COMB_FCN_RSUB_CLAMP; + break; + + case GL_MIN: + eqnA = R300_COMB_FCN_MIN; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + case GL_MAX: + eqnA = R300_COMB_FCN_MAX; + funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) | + (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT); + break; + + default: + fprintf(stderr, + "[%s:%u] Invalid A blend equation (0x%04x).\n", + __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + return; + } + + r300SetBlendCntl(r300, + func, eqn, + (R300_SEPARATE_ALPHA_ENABLE | + R300_READ_ENABLE | + R300_ALPHA_BLEND_ENABLE), funcA, eqnA); +} + +static void r300BlendEquationSeparate(GLcontext * ctx, + GLenum modeRGB, GLenum modeA) +{ + r300SetBlendState(ctx); +} + +static void r300BlendFuncSeparate(GLcontext * ctx, + GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA) +{ + r300SetBlendState(ctx); +} + +/** + * Translate LogicOp enums into hardware representation. + * Both use a very logical bit-wise layout, but unfortunately the order + * of bits is reversed. + */ +static GLuint translate_logicop(GLenum logicop) +{ + GLuint bits = logicop - GL_CLEAR; + bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3); + return bits << R300_RB3D_ROPCNTL_ROP_SHIFT; +} + +/** + * Used internally to update the r300->hw hardware state to match the + * current OpenGL state. + */ +static void r300SetLogicOpState(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + R300_STATECHANGE(r300, rop); + if (RGBA_LOGICOP_ENABLED(ctx)) { + r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE | + translate_logicop(ctx->Color.LogicOp); + } else { + r300->hw.rop.cmd[1] = 0; + } +} + +/** + * Called by Mesa when an application program changes the LogicOp state + * via glLogicOp. + */ +static void r300LogicOpcode(GLcontext *ctx, GLenum logicop) +{ + if (RGBA_LOGICOP_ENABLED(ctx)) + r300SetLogicOpState(ctx); +} + +static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLint p; + GLint *ip; + + /* no VAP UCP on non-TCL chipsets */ + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return; + + p = (GLint) plane - (GLint) GL_CLIP_PLANE0; + ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R300_STATECHANGE( rmesa, vpucp[p] ); + rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; +} + +static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint p; + + /* no VAP UCP on non-TCL chipsets */ + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return; + + p = cap - GL_CLIP_PLANE0; + R300_STATECHANGE(r300, vap_clip_cntl); + if (state) { + r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0 << p); + r300ClipPlane(ctx, cap, NULL); + } else { + r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0 << p); + } +} + +/** + * Update our tracked culling state based on Mesa's state. + */ +static void r300UpdateCulling(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t val = 0; + + if (ctx->Polygon.CullFlag) { + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: + val = R300_CULL_FRONT; + break; + case GL_BACK: + val = R300_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + val = R300_CULL_FRONT | R300_CULL_BACK; + break; + default: + break; + } + } + + switch (ctx->Polygon.FrontFace) { + case GL_CW: + val |= R300_FRONT_FACE_CW; + break; + case GL_CCW: + val |= R300_FRONT_FACE_CCW; + break; + default: + break; + } + + R300_STATECHANGE(r300, cul); + r300->hw.cul.cmd[R300_CUL_CULL] = val; +} + +static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, occlusion_cntl); + if (state) { + r300->hw.occlusion_cntl.cmd[1] |= (3 << 0); + } else { + r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0); + } +} + +static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + return (fp && fp->WritesDepth); + } else { + struct r500_fragment_program* fp = + (struct r500_fragment_program*)(char*) + ctx->FragmentProgram._Current; + return (fp && fp->writes_depth); + } +} + +static void r300SetEarlyZState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLuint topZ = R300_ZTOP_ENABLE; + + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) + topZ = R300_ZTOP_DISABLE; + if (current_fragment_program_writes_depth(ctx)) + topZ = R300_ZTOP_DISABLE; + + if (topZ != r300->hw.zstencil_format.cmd[2]) { + /* Note: This completely reemits the stencil format. + * I have not tested whether this is strictly necessary, + * or if emitting a write to ZB_ZTOP is enough. + */ + R300_STATECHANGE(r300, zstencil_format); + r300->hw.zstencil_format.cmd[2] = topZ; + } +} + +static void r300SetAlphaState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLubyte refByte; + uint32_t pp_misc = 0x0; + GLboolean really_enabled = ctx->Color.AlphaEnabled; + + CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef); + + switch (ctx->Color.AlphaFunc) { + case GL_NEVER: + pp_misc |= R300_FG_ALPHA_FUNC_NEVER; + break; + case GL_LESS: + pp_misc |= R300_FG_ALPHA_FUNC_LESS; + break; + case GL_EQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_EQUAL; + break; + case GL_LEQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_LE; + break; + case GL_GREATER: + pp_misc |= R300_FG_ALPHA_FUNC_GREATER; + break; + case GL_NOTEQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL; + break; + case GL_GEQUAL: + pp_misc |= R300_FG_ALPHA_FUNC_GE; + break; + case GL_ALWAYS: + /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */ + really_enabled = GL_FALSE; + break; + } + + if (really_enabled) { + pp_misc |= R300_FG_ALPHA_FUNC_ENABLE; + pp_misc |= R500_FG_ALPHA_FUNC_8BIT; + pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK); + } else { + pp_misc = 0x0; + } + + R300_STATECHANGE(r300, at); + r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; + + r300SetEarlyZState(ctx); +} + +static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) +{ + (void)func; + (void)ref; + r300SetAlphaState(ctx); +} + +static int translate_func(int func) +{ + switch (func) { + case GL_NEVER: + return R300_ZS_NEVER; + case GL_LESS: + return R300_ZS_LESS; + case GL_EQUAL: + return R300_ZS_EQUAL; + case GL_LEQUAL: + return R300_ZS_LEQUAL; + case GL_GREATER: + return R300_ZS_GREATER; + case GL_NOTEQUAL: + return R300_ZS_NOTEQUAL; + case GL_GEQUAL: + return R300_ZS_GEQUAL; + case GL_ALWAYS: + return R300_ZS_ALWAYS; + } + return 0; +} + +static void r300SetDepthState(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + R300_STATECHANGE(r300, zs); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); + + if (ctx->Depth.Test) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE; + if (ctx->Depth.Mask) + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE; + r300->hw.zs.cmd[R300_ZS_CNTL_1] |= + translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; + } + + r300SetEarlyZState(ctx); +} + +static void r300SetStencilState(GLcontext * ctx, GLboolean state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLboolean hw_stencil = GL_FALSE; + if (ctx->DrawBuffer) { + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (rrbStencil && rrbStencil->bo); + } + + if (hw_stencil) { + R300_STATECHANGE(r300, zs); + if (state) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= + R300_STENCIL_ENABLE; + } else { + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= + ~R300_STENCIL_ENABLE; + } + } else { +#if R200_MERGED + FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); +#endif + } +} + +static void r300UpdatePolygonMode(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE; + + /* Only do something if a polygon mode is wanted, default is GL_FILL */ + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) { + GLenum f, b; + + /* Handle GL_CW (clock wise and GL_CCW (counter clock wise) + * correctly by selecting the correct front and back face + */ + if (ctx->Polygon.FrontFace == GL_CCW) { + f = ctx->Polygon.FrontMode; + b = ctx->Polygon.BackMode; + } else { + f = ctx->Polygon.BackMode; + b = ctx->Polygon.FrontMode; + } + + /* Enable polygon mode */ + hw_mode |= R300_GA_POLY_MODE_DUAL; + + switch (f) { + case GL_LINE: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE; + break; + case GL_POINT: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT; + break; + case GL_FILL: + hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI; + break; + } + + switch (b) { + case GL_LINE: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE; + break; + case GL_POINT: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT; + break; + case GL_FILL: + hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI; + break; + } + } + + if (r300->hw.polygon_mode.cmd[1] != hw_mode) { + R300_STATECHANGE(r300, polygon_mode); + r300->hw.polygon_mode.cmd[1] = hw_mode; + } + + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; +} + +/** + * Change the culling mode. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300CullFace(GLcontext * ctx, GLenum mode) +{ + (void)mode; + + r300UpdateCulling(ctx); +} + +/** + * Change the polygon orientation. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300FrontFace(GLcontext * ctx, GLenum mode) +{ + (void)mode; + + r300UpdateCulling(ctx); + r300UpdatePolygonMode(ctx); +} + +/** + * Change the depth testing function. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300DepthFunc(GLcontext * ctx, GLenum func) +{ + (void)func; + r300SetDepthState(ctx); +} + +/** + * Enable/Disable depth writing. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300DepthMask(GLcontext * ctx, GLboolean mask) +{ + (void)mask; + r300SetDepthState(ctx); +} + +/** + * Handle glColorMask() + */ +static void r300ColorMask(GLcontext * ctx, + GLboolean r, GLboolean g, GLboolean b, GLboolean a) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int mask = (r ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (g ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (b ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (a ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0); + + if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) { + R300_STATECHANGE(r300, cmk); + r300->hw.cmk.cmd[R300_CMK_COLORMASK] = mask; + } +} + +/* ============================================================= + * Point state + */ +static void r300PointSize(GLcontext * ctx, GLfloat size) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); + + R300_STATECHANGE(r300, ps); + r300->hw.ps.cmd[R300_PS_POINTSIZE] = + ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) | + ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT); +} + +static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + switch (pname) { + case GL_POINT_SIZE_MIN: + R300_STATECHANGE(r300, ga_point_minmax); + r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK; + r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0); + break; + case GL_POINT_SIZE_MAX: + R300_STATECHANGE(r300, ga_point_minmax); + r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK; + r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0) + << R300_GA_POINT_MINMAX_MAX_SHIFT; + break; + case GL_POINT_DISTANCE_ATTENUATION: + break; + case GL_POINT_FADE_THRESHOLD_SIZE: + break; + default: + break; + } +} + +/* ============================================================= + * Line state + */ +static void r300LineWidth(GLcontext * ctx, GLfloat widthf) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + widthf = CLAMP(widthf, + ctx->Const.MinPointSize, + ctx->Const.MaxPointSize); + R300_STATECHANGE(r300, lcntl); + r300->hw.lcntl.cmd[1] = + R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0); +} + +static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) +{ + (void)face; + (void)mode; + + r300UpdatePolygonMode(ctx); +} + +/* ============================================================= + * Stencil + */ + +static int translate_stencil_op(int op) +{ + switch (op) { + case GL_KEEP: + return R300_ZS_KEEP; + case GL_ZERO: + return R300_ZS_ZERO; + case GL_REPLACE: + return R300_ZS_REPLACE; + case GL_INCR: + return R300_ZS_INCR; + case GL_DECR: + return R300_ZS_DECR; + case GL_INCR_WRAP_EXT: + return R300_ZS_INCR_WRAP; + case GL_DECR_WRAP_EXT: + return R300_ZS_DECR_WRAP; + case GL_INVERT: + return R300_ZS_INVERT; + default: + WARN_ONCE("Do not know how to translate stencil op"); + return R300_ZS_KEEP; + } + return 0; +} + +static void r300ShadeModel(GLcontext * ctx, GLenum mode) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, shade); + rmesa->hw.shade.cmd[1] = 0x00000002; + switch (mode) { + case GL_FLAT: + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; + break; + case GL_SMOOTH: + rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; + break; + default: + return; + } + rmesa->hw.shade.cmd[3] = 0x00000000; + rmesa->hw.shade.cmd[4] = 0x00000000; +} + +static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, + GLenum func, GLint ref, GLuint mask) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint refmask = + ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); + const unsigned back = ctx->Stencil._BackFace; + GLuint flag; + + R300_STATECHANGE(rmesa, zs); + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << + R300_S_FRONT_FUNC_SHIFT) + | (R300_ZS_MASK << + R300_S_BACK_FUNC_SHIFT)); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= + ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) | + (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT)); + + flag = translate_func(ctx->Stencil.Function[0]); + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_S_FRONT_FUNC_SHIFT); + + flag = translate_func(ctx->Stencil.Function[back]); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (flag << R300_S_BACK_FUNC_SHIFT); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask; +} + +static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + R300_STATECHANGE(rmesa, zs); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= + ~(R300_STENCILREF_MASK << + R300_STENCILWRITEMASK_SHIFT); + rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= + (ctx->Stencil. + WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT; +} + +static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, + GLenum fail, GLenum zfail, GLenum zpass) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + const unsigned back = ctx->Stencil._BackFace; + + R300_STATECHANGE(rmesa, zs); + /* It is easier to mask what's left.. */ + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= + (R300_ZS_MASK << R300_Z_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | + (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[0]) << + R300_S_FRONT_SFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) << + R300_S_FRONT_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) << + R300_S_FRONT_ZPASS_OP_SHIFT); + + rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |= + (translate_stencil_op(ctx->Stencil.FailFunc[back]) << + R300_S_BACK_SFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZFailFunc[back]) << + R300_S_BACK_ZFAIL_OP_SHIFT) + | (translate_stencil_op(ctx->Stencil.ZPassFunc[back]) << + R300_S_BACK_ZPASS_OP_SHIFT); +} + +/* ============================================================= + * Window position and viewport transformation + */ + +/* + * To correctly position primitives: + */ +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +static void r300UpdateWindow(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; + const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + GLfloat y_scale, y_bias; + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = yoffset; + } + + GLfloat sx = v[MAT_SX]; + GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + GLfloat sy = v[MAT_SY] * y_scale; + GLfloat ty = (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y; + GLfloat sz = v[MAT_SZ] * depthScale; + GLfloat tz = v[MAT_TZ] * depthScale; + + R300_STATECHANGE(rmesa, vpt); + + rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YSCALE] = r300PackFloat32(sy); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); + rmesa->hw.vpt.cmd[R300_VPT_ZSCALE] = r300PackFloat32(sz); + rmesa->hw.vpt.cmd[R300_VPT_ZOFFSET] = r300PackFloat32(tz); +} + +static void r300Viewport(GLcontext * ctx, GLint x, GLint y, + GLsizei width, GLsizei height) +{ + /* Don't pipeline viewport changes, conflict with window offset + * setting below. Could apply deltas to rescue pipelined viewport + * values, or keep the originals hanging around. + */ + r300UpdateWindow(ctx); + + radeon_viewport(ctx, x, y, width, height); +} + +static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) +{ + r300UpdateWindow(ctx); +} + +void r300UpdateViewportOffset(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable; + GLfloat xoffset = (GLfloat) dPriv->x; + GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; + const GLfloat *v = ctx->Viewport._WindowMap.m; + + GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; + GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; + + if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) || + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) { + /* Note: this should also modify whatever data the context reset + * code uses... + */ + R300_STATECHANGE(rmesa, vpt); + rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx); + rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty); + + } + + radeonUpdateScissor(ctx); +} + +static void +r300FetchStateParameter(GLcontext * ctx, + const gl_state_index state[STATE_LENGTH], + GLfloat * value) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + switch (state[0]) { + case STATE_INTERNAL: + switch (state[1]) { + case STATE_R300_WINDOW_DIMENSION: + value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */ + value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */ + value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + value[3] = 1.0F; /* not used */ + break; + + case STATE_R300_TEXRECT_FACTOR:{ + struct gl_texture_object *t = + ctx->Texture.Unit[state[2]].CurrentTex[TEXTURE_RECT_INDEX]; + + if (t && t->Image[0][t->BaseLevel]) { + struct gl_texture_image *image = + t->Image[0][t->BaseLevel]; + value[0] = 1.0 / image->Width2; + value[1] = 1.0 / image->Height2; + } else { + value[0] = 1.0; + value[1] = 1.0; + } + value[2] = 1.0; + value[3] = 1.0; + break; + } + + default: + break; + } + break; + + default: + break; + } +} + +/** + * Update R300's own internal state parameters. + * For now just STATE_R300_WINDOW_DIMENSION + */ +void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) +{ + struct r300_fragment_program *fp; + struct gl_program_parameter_list *paramList; + GLuint i; + + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + return; + + fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; + if (!fp) + return; + + paramList = fp->mesa_program.Base.Parameters; + + if (!paramList) + return; + + for (i = 0; i < paramList->NumParameters; i++) { + if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { + r300FetchStateParameter(ctx, + paramList->Parameters[i]. + StateIndexes, + paramList->ParameterValues[i]); + } + } +} + +/* ============================================================= + * Polygon state + */ +static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLfloat constant = units; + + switch (ctx->Visual.depthBits) { + case 16: + constant *= 4.0; + break; + case 24: + constant *= 2.0; + break; + } + + factor *= 12.0; + +/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */ + + R300_STATECHANGE(rmesa, zbs); + rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor); + rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant); + rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor); + rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant); +} + +/* Routing and texture-related */ + +/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST. + * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead. + * We need to recalculate wrap modes whenever filter mode is changed because someone might do: + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); + * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle + * combinations where only one of them is nearest. + */ +static unsigned long gen_fixed_filter(unsigned long f) +{ + unsigned long mag, min, needs_fixing = 0; + //return f; + + /* We ignore MIRROR bit so we dont have to do everything twice */ + if ((f & ((7 - 1) << R300_TX_WRAP_S_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)) { + needs_fixing |= 1; + } + if ((f & ((7 - 1) << R300_TX_WRAP_T_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) { + needs_fixing |= 2; + } + if ((f & ((7 - 1) << R300_TX_WRAP_R_SHIFT)) == + (R300_TX_CLAMP << R300_TX_WRAP_R_SHIFT)) { + needs_fixing |= 4; + } + + if (!needs_fixing) + return f; + + mag = f & R300_TX_MAG_FILTER_MASK; + min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK); + + /* TODO: Check for anisto filters too */ + if ((mag != R300_TX_MAG_FILTER_NEAREST) + && (min != R300_TX_MIN_FILTER_NEAREST)) + return f; + + /* r300 cant handle these modes hence we force nearest to linear */ + if ((mag == R300_TX_MAG_FILTER_NEAREST) + && (min != R300_TX_MIN_FILTER_NEAREST)) { + f &= ~R300_TX_MAG_FILTER_NEAREST; + f |= R300_TX_MAG_FILTER_LINEAR; + return f; + } + + if ((min == R300_TX_MIN_FILTER_NEAREST) + && (mag != R300_TX_MAG_FILTER_NEAREST)) { + f &= ~R300_TX_MIN_FILTER_NEAREST; + f |= R300_TX_MIN_FILTER_LINEAR; + return f; + } + + /* Both are nearest */ + if (needs_fixing & 1) { + f &= ~((7 - 1) << R300_TX_WRAP_S_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT; + } + if (needs_fixing & 2) { + f &= ~((7 - 1) << R300_TX_WRAP_T_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT; + } + if (needs_fixing & 4) { + f &= ~((7 - 1) << R300_TX_WRAP_R_SHIFT); + f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT; + } + return f; +} + +static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + int i; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code; + + R300_STATECHANGE(r300, fpt); + + for (i = 0; i < code->tex.length; i++) { + int unit; + int opcode; + unsigned long val; + + unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit &= 15; + + val = code->tex.inst[i]; + val &= ~R300_TEX_ID_MASK; + + opcode = + (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT; + if (opcode == R300_TEX_OP_KIL) { + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + if (tmu_mappings[unit] >= 0) { + val |= + tmu_mappings[unit] << + R300_TEX_ID_SHIFT; + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } else { + // We get here when the corresponding texture image is incomplete + // (e.g. incomplete mipmaps etc.) + r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val; + } + } + } + + r300->hw.fpt.cmd[R300_FPT_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, + R300_US_TEX_INST_0, code->tex.length); +} + +static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) +{ + int i; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code; + + /* find all the texture instructions and relocate the texture units */ + for (i = 0; i < code->inst_end + 1; i++) { + if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + uint32_t val; + int unit, opcode, new_unit; + + val = code->inst[i].inst1; + + unit = (val >> 16) & 0xf; + + val &= ~(0xf << 16); + + opcode = val & (0x7 << 22); + if (opcode == R500_TEX_INST_TEXKILL) { + new_unit = 0; + } else { + if (tmu_mappings[unit] >= 0) { + new_unit = tmu_mappings[unit]; + } else { + new_unit = 0; + } + } + val |= R500_TEX_ID(new_unit); + code->inst[i].inst1 = val; + } + } +} + +static GLuint translate_lod_bias(GLfloat bias) +{ + GLint b = (int)(bias*32); + if (b >= (1 << 9)) + b = (1 << 9)-1; + else if (b < -(1 << 9)) + b = -(1 << 9); + return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK; +} + +static void r300SetupTextures(GLcontext * ctx) +{ + int i, mtu; + struct radeon_tex_obj *t; + r300ContextPtr r300 = R300_CONTEXT(ctx); + int hw_tmu = 0; + int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ + int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, }; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + + R300_STATECHANGE(r300, txe); + R300_STATECHANGE(r300, tex.filter); + R300_STATECHANGE(r300, tex.filter_1); + R300_STATECHANGE(r300, tex.size); + R300_STATECHANGE(r300, tex.format); + R300_STATECHANGE(r300, tex.pitch); + R300_STATECHANGE(r300, tex.offset); + R300_STATECHANGE(r300, tex.chroma_key); + R300_STATECHANGE(r300, tex.border_color); + + r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0; + + mtu = r300->radeon.glCtx->Const.MaxTextureUnits; + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "mtu=%d\n", mtu); + + if (mtu > R300_MAX_TEXTURE_UNITS) { + fprintf(stderr, + "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n", + mtu, R300_MAX_TEXTURE_UNITS); + _mesa_exit(-1); + } + + /* We cannot let disabled tmu offsets pass DRM */ + for (i = 0; i < mtu; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) { + tmu_mappings[i] = hw_tmu; + + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (!t) + continue; + + if ((t->pp_txformat & 0xffffff00) == 0xffffff00) { + WARN_ONCE + ("unknown texture format (entry %x) encountered. Help me !\n", + t->pp_txformat & 0xff); + } + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, + "Activating texture unit %d\n", i); + + r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu); + + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + + hw_tmu] = + gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28); + /* Note: There is a LOD bias per texture unit and a LOD bias + * per texture object. We add them here to get the correct behaviour. + * (The per-texture object LOD bias was introduced in OpenGL 1.4 + * and is not present in the EXT_texture_object extension). + */ + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pp_txfilter_1 | + translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); + r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pp_txsize; + r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + + hw_tmu] = t->pp_txformat; + r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = + t->pp_txpitch; + r300->hw.textures[hw_tmu] = t; + + if (t->tile_bits & R300_TXO_MACRO_TILE) { + WARN_ONCE("macro tiling enabled!\n"); + } + + if (t->tile_bits & R300_TXO_MICRO_TILE) { + WARN_ONCE("micro tiling enabled!\n"); + } + + r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + + hw_tmu] = 0x0; + r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 + + hw_tmu] = + t->pp_border_color; + + last_hw_tmu = hw_tmu; + + hw_tmu++; + } + } + + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); + r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1); + r300->hw.tex.format.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1); + r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1); + r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); + + if (!fp) /* should only happenen once, just after context is created */ + return; + + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { + // The KILL operation requires the first texture unit + // to be enabled. + r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; + r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; + r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); + } + r300SetupFragmentShaderTextures(ctx, tmu_mappings); + } else + r500SetupFragmentShaderTextures(ctx, tmu_mappings); + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", + r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu); +} + +union r300_outputs_written { + GLuint vp_outputs; /* hw_tcl_on */ + DECLARE_RENDERINPUTS(index_bitset); /* !hw_tcl_on */ +}; + +#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \ + ((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \ + RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) )) + +static void r300SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, count, col_fmt; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = col_ip = tex_ip = col_fmt = 0; + + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; + + for (i=0; i<R300_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; + + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; + + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; + } + + int swiz; + + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + switch(count) { + case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; + case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + default: + case 1: + case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; + }; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += count; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; + } + + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= high_rr - 1; + + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + +static void r500SetupRSUnit(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + union r300_outputs_written OutputsWritten; + GLuint InputsRead; + int fp_reg, high_rr; + int col_ip, tex_ip; + int rs_tex_count = 0; + int i, count, col_fmt; + + if (hw_tcl_on) + OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + else + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + + if (ctx->FragmentProgram._Current) + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + else { + fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; /* This should only ever happen once.. */ + } + + R300_STATECHANGE(r300, ri); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + fp_reg = col_ip = tex_ip = col_fmt = 0; + + r300->hw.rc.cmd[1] = 0; + r300->hw.rc.cmd[2] = 0; + for (i=0; i<R300_RR_CMDSIZE-1; ++i) + r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0; + + for (i=0; i<R500_RI_CMDSIZE-1; ++i) + r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0; + + + if (InputsRead & FRAG_BIT_COL0) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL0; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col0, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_COL1) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { + count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; + if (count == 4) + col_fmt = R300_RS_COL_FMT_RGBA; + else if (count == 3) + col_fmt = R300_RS_COL_FMT_RGB1; + else + col_fmt = R300_RS_COL_FMT_0001; + + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_COL1; + ++col_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants col1, vp doesn't provide it\n"); + } + } + + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) + continue; + + if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) { + WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i); + continue; + } + + int swiz = 0; + + /* with TCL we always seem to route 4 components */ + if (hw_tcl_on) + count = 4; + else + count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; + + if (count == 4) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 3) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 2) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else if (count == 1) { + swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } else { + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; + swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; + swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; + } + + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz; + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~(FRAG_BIT_TEX0 << i); + rs_tex_count += count; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_FOGC) { + if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_FOGC; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } else { + WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n"); + } + } + + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + /* Setup default color if no color or tex was set */ + if (rs_tex_count == 0 && col_ip == 0) { + r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + ++col_ip; + } + + high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1); + + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr); + + if (InputsRead) + WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); +} + + + + +#define bump_vpu_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ + }while(0) + +static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf) +{ + int i; + + if (vsf->length == 0) + return; + + if (vsf->length & 0x3) { + fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); + _mesa_exit(-1); + } + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < vsf->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff)); + break; + + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < vsf->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < vsf->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + _mesa_exit(-1); + } +} + +#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) + + +static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, + GLuint output_count, GLuint temp_count) +{ + int vtx_mem_size; + int pvs_num_slots; + int pvs_num_cntrls; + + /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS. + * See r500 docs 6.5.2 - done in emit */ + + /* avoid division by zero */ + if (input_count == 0) input_count = 1; + if (output_count == 0) output_count = 1; + if (temp_count == 0) temp_count = 1; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vtx_mem_size = 128; + else + vtx_mem_size = 72; + + pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count); + pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); + + R300_STATECHANGE(rmesa, vap_cntl); + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = + (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | + (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | + (12 << R300_VF_MAX_VTX_NUM_SHIFT); + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION; + } else + /* not sure about non-tcl */ + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + (5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT); + else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || + (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT); + else + rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT); + +} + +static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) +{ + struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); + GLuint o_reg = 0; + GLuint i_reg = 0; + int i; + int inst_count = 0; + int param_count = 0; + int program_end = 0; + + for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { + if (rmesa->state.sw_tcl_inputs[i] != -1) { + prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT); + prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + program_end += 4; + i_reg++; + } + } + + prog->program.length = program_end; + + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, + &(prog->program)); + inst_count = (prog->program.length / 4) - 1; + + r300VapCntl(rmesa, i_reg, o_reg, 0); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} + +static int bit_count (int x) +{ + x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); + x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); + x = (x >> 16) + (x & 0xffff); + x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); + return (x >> 8) + (x & 0x00ff); +} + +static void r300SetupRealVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + int inst_count = 0; + int param_count = 0; + + /* FIXME: r300SetupVertexProgramFragment */ + R300_STATECHANGE(rmesa, vpp); + param_count = + r300VertexProgUpdateParams(ctx, + (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current, + (float *)&rmesa->hw.vpp. + cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; + + r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); + inst_count = (prog->program.length / 4) - 1; + + r300VapCntl(rmesa, bit_count(prog->key.InputsRead), + bit_count(prog->key.OutputsWritten), prog->num_temporaries); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (0 << R300_PVS_FIRST_INST_SHIFT) | + (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} + + +static void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + /* Not sure why this doesnt work... + 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. + 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ + //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); + if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) { + r300SetupRealVertexProgram(rmesa); + } else { + /* FIXME: This needs to be replaced by vertex shader generation code. */ + r300SetupDefaultVertexProgram(rmesa); + } + +} + +/** + * Enable/Disable states. + * + * \note Mesa already filters redundant calls to this function. + */ +static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(cap), + state ? "GL_TRUE" : "GL_FALSE"); + + switch (cap) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + /* empty */ + break; + case GL_FOG: + /* empty */ + break; + case GL_ALPHA_TEST: + r300SetAlphaState(ctx); + break; + case GL_COLOR_LOGIC_OP: + r300SetLogicOpState(ctx); + /* fall-through, because logic op overrides blending */ + case GL_BLEND: + r300SetBlendState(ctx); + break; + case GL_CLIP_PLANE0: + case GL_CLIP_PLANE1: + case GL_CLIP_PLANE2: + case GL_CLIP_PLANE3: + case GL_CLIP_PLANE4: + case GL_CLIP_PLANE5: + r300SetClipPlaneState(ctx, cap, state); + break; + case GL_DEPTH_TEST: + r300SetDepthState(ctx); + break; + case GL_STENCIL_TEST: + r300SetStencilState(ctx, state); + break; + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; + case GL_POLYGON_OFFSET_POINT: + case GL_POLYGON_OFFSET_LINE: + case GL_POLYGON_OFFSET_FILL: + r300SetPolygonOffsetState(ctx, state); + break; + case GL_SCISSOR_TEST: + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.state.scissor.enabled = state; + radeonUpdateScissor( ctx ); + break; + default: + break; + } +} + +/** + * Completely recalculates hardware state based on the Mesa state. + */ +static void r300ResetHwState(r300ContextPtr r300) +{ + GLcontext *ctx = r300->radeon.glCtx; + int has_tcl = 1; + + if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + has_tcl = 0; + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "%s\n", __FUNCTION__); + + radeon_firevertices(&r300->radeon); + + r300ColorMask(ctx, + ctx->Color.ColorMask[RCOMP], + ctx->Color.ColorMask[GCOMP], + ctx->Color.ColorMask[BCOMP], ctx->Color.ColorMask[ACOMP]); + + r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); + r300DepthMask(ctx, ctx->Depth.Mask); + r300DepthFunc(ctx, ctx->Depth.Func); + + /* stencil */ + r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled); + r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]); + r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0], + ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]); + r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0], + ctx->Stencil.ZFailFunc[0], + ctx->Stencil.ZPassFunc[0]); + + r300UpdateCulling(ctx); + + r300SetBlendState(ctx); + r300SetLogicOpState(ctx); + + r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef); + r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled); + + r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA + | R300_VPORT_X_OFFSET_ENA + | R300_VPORT_Y_SCALE_ENA + | R300_VPORT_Y_OFFSET_ENA + | R300_VPORT_Z_SCALE_ENA + | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT; + r300->hw.vte.cmd[2] = 0x00000008; + + r300->hw.vap_vf_max_vtx_indx.cmd[1] = 0x00FFFFFF; + r300->hw.vap_vf_max_vtx_indx.cmd[2] = 0x00000000; + +#ifdef MESA_LITTLE_ENDIAN + r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP; +#else + r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP; +#endif + + /* disable VAP/TCL on non-TCL capable chips */ + if (!has_tcl) + r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; + + r300->hw.vap_psc_sgn_norm_cntl.cmd[1] = 0xAAAAAAAA; + + /* XXX: Other families? */ + if (has_tcl) { + r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP; + + r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */ + r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */ + r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */ + + switch (r300->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R300: + r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_R300; + break; + default: + r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_RV350; + break; + } + } + + r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE + | R300_GB_LINE_STUFF_ENABLE + | R300_GB_TRIANGLE_STUFF_ENABLE; + + r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666; + r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666; + + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] = + R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/; + switch (r300->radeon.radeonScreen->num_gb_pipes) { + case 1: + default: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_RV300; + break; + case 2: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R300; + break; + case 3: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420_3P; + break; + case 4: + r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |= + R300_GB_TILE_PIPE_COUNT_R420; + break; + } + + /* XXX: Enable anti-aliasing? */ + r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; + r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0; + + r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); + r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); + r300->hw.ga_point_s0.cmd[3] = r300PackFloat32(1.0); + r300->hw.ga_point_s0.cmd[4] = r300PackFloat32(1.0); + + r300->hw.ga_triangle_stipple.cmd[1] = 0x00050005; + + r300PointSize(ctx, 1.0); + + r300->hw.ga_point_minmax.cmd[1] = 0x18000006; + r300->hw.ga_point_minmax.cmd[2] = 0x00020006; + r300->hw.ga_point_minmax.cmd[3] = r300PackFloat32(1.0 / 192.0); + + r300LineWidth(ctx, 1.0); + + r300->hw.ga_line_stipple.cmd[1] = 0; + r300->hw.ga_line_stipple.cmd[2] = r300PackFloat32(0.0); + r300->hw.ga_line_stipple.cmd[3] = r300PackFloat32(1.0); + + r300ShadeModel(ctx, ctx->Light.ShadeModel); + + r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); + r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); + r300->hw.zbias_cntl.cmd[1] = 0x00000000; + + r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, + ctx->Polygon.OffsetUnits); + r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint); + r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine); + r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); + + r300->hw.su_depth_scale.cmd[1] = 0x4B7FFFFF; + r300->hw.su_depth_scale.cmd[2] = 0x00000000; + + r300->hw.sc_hyperz.cmd[1] = 0x0000001C; + r300->hw.sc_hyperz.cmd[2] = 0x2DA49525; + + r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF; + + r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED | + R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A; + r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US; + + /* disable fog unit */ + r300->hw.fogs.cmd[R300_FOGS_STATE] = 0; + r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SCAN; + + r300->hw.rb3d_cctl.cmd[1] = 0; + + r300BlendColor(ctx, ctx->Color.BlendColor); + + r300->hw.rb3d_dither_ctl.cmd[1] = 0; + r300->hw.rb3d_dither_ctl.cmd[2] = 0; + r300->hw.rb3d_dither_ctl.cmd[3] = 0; + r300->hw.rb3d_dither_ctl.cmd[4] = 0; + r300->hw.rb3d_dither_ctl.cmd[5] = 0; + r300->hw.rb3d_dither_ctl.cmd[6] = 0; + r300->hw.rb3d_dither_ctl.cmd[7] = 0; + r300->hw.rb3d_dither_ctl.cmd[8] = 0; + r300->hw.rb3d_dither_ctl.cmd[9] = 0; + + r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0; + + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; + + r300->hw.zb_depthclearvalue.cmd[1] = 0; + + r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; + r300SetEarlyZState(ctx); + + r300->hw.unk4F30.cmd[1] = 0; + r300->hw.unk4F30.cmd[2] = 0; + + r300->hw.zb_hiz_offset.cmd[1] = 0; + + r300->hw.zb_hiz_pitch.cmd[1] = 0; + + r300VapCntl(r300, 0, 0, 0); + if (has_tcl) { + r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0; + r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0; + r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0); + r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; + } + + r300->radeon.hw.all_dirty = GL_TRUE; +} + +void r300UpdateShaders(r300ContextPtr rmesa) +{ + GLcontext *ctx; + struct r300_vertex_program *vp; + int i; + + ctx = rmesa->radeon.glCtx; + + if (rmesa->radeon.NewGLState && hw_tcl_on) { + rmesa->radeon.NewGLState = 0; + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + rmesa->temp_attrib[i] = + TNL_CONTEXT(ctx)->vb.AttribPtr[i]; + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + &rmesa->dummy_attrib[i]; + } + + _tnl_UpdateFixedFunctionProgram(ctx); + + for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { + TNL_CONTEXT(ctx)->vb.AttribPtr[i] = + rmesa->temp_attrib[i]; + } + + r300SelectVertexShader(rmesa); + vp = (struct r300_vertex_program *) + CURRENT_VERTEX_SHADER(ctx); + /*if (vp->translated == GL_FALSE) + r300TranslateVertexShader(vp); */ + if (vp->translated == GL_FALSE) { + fprintf(stderr, "Failing back to sw-tcl\n"); + hw_tcl_on = future_hw_tcl_on = 0; + r300ResetHwState(rmesa); + + r300UpdateStateParameters(ctx, _NEW_PROGRAM); + return; + } + } + r300UpdateStateParameters(ctx, _NEW_PROGRAM); +} + +static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, + struct gl_program *program, struct prog_src_register srcreg) +{ + static const GLfloat dummy[4] = { 0, 0, 0, 0 }; + + switch(srcreg.File) { + case PROGRAM_LOCAL_PARAM: + return program->LocalParams[srcreg.Index]; + case PROGRAM_ENV_PARAM: + return ctx->FragmentProgram.Parameters[srcreg.Index]; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + return program->Parameters->ParameterValues[srcreg.Index]; + default: + _mesa_problem(ctx, "get_fragmentprogram_constant: Unknown\n"); + return dummy; + } +} + + +static void r300SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code; + int i, k; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + r300TranslateFragmentShader(rmesa, fp); + if (!fp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", + __FUNCTION__); + return; + } + code = &fp->code; + + r300SetupTextures(ctx); + + R300_STATECHANGE(rmesa, fpi[0]); + R300_STATECHANGE(rmesa, fpi[1]); + R300_STATECHANGE(rmesa, fpi[2]); + R300_STATECHANGE(rmesa, fpi[3]); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; + } + + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + /* I just want to say, the way these nodes are stored.. weird.. */ + for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { + if (i < (code->cur_node + 1)) { + rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = + (code->node[i].alu_offset << R300_ALU_START_SHIFT) | + (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | + (code->node[i].tex_offset << R300_TEX_START_SHIFT) | + (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | + code->node[i].flags; + } else { + rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; + } + } + + R300_STATECHANGE(rmesa, fpp); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); + for (i = 0; i < code->const_nr; i++) { + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]); + } +} + +#define bump_r500fp_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/6; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +#define bump_r500fp_const_count(ptr, new_count) do{\ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ +} while(0) + +static void r500SetupPixelShader(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + int i; + struct r500_fragment_program_code *code; + + if (!fp) /* should only happenen once, just after context is created */ + return; + + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; + + r500TranslateFragmentShader(rmesa, fp); + if (!fp->translated) { + fprintf(stderr, "%s: No valid fragment shader, exiting\n", + __FUNCTION__); + return; + } + code = &fp->code; + + r300SetupTextures(ctx); + + R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; + + rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = + R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_END_ADDR(code->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = + R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_SIZE(code->inst_end); + rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = + R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ + + R300_STATECHANGE(rmesa, r500fp); + /* Emit our shader... */ + for (i = 0; i < code->inst_end+1; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5; + } + + bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); + + R300_STATECHANGE(rmesa, r500fp_const); + for (i = 0; i < code->const_nr; i++) { + const GLfloat *constant = get_fragmentprogram_constant(ctx, + &fp->mesa_program.Base, code->constant[i]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); + } + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); + +} + +void r300UpdateShaderStates(r300ContextPtr rmesa) +{ + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + + r300SetEarlyZState(ctx); + + /* w_fmt value is set to get best performance + * see p.130 R5xx 3D acceleration guide v1.3 */ + GLuint w_fmt, fgdepthsrc; + if (current_fragment_program_writes_depth(ctx)) { + fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; + } else { + fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; + } + + if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) { + R300_STATECHANGE(rmesa, us_out_fmt); + rmesa->hw.us_out_fmt.cmd[5] = w_fmt; + } + + if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { + R300_STATECHANGE(rmesa, fg_depth_src); + rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupPixelShader(rmesa); + else + r300SetupPixelShader(rmesa); + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + r500SetupRSUnit(ctx); + else + r300SetupRSUnit(ctx); + + if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + r300SetupVertexProgram(rmesa); + +} + +/** + * Called by Mesa after an internal state update. + */ +static void r300InvalidateState(GLcontext * ctx, GLuint new_state) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + _swrast_InvalidateState(ctx, new_state); + _swsetup_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + _tnl_InvalidateState(ctx, new_state); + _ae_invalidate_state(ctx, new_state); + + if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + + R300_STATECHANGE(r300, cb); + } + + r300UpdateStateParameters(ctx, new_state); + + r300->radeon.NewGLState |= new_state; +} + +/** + * Calculate initial hardware state and register state functions. + * Assumes that the command buffer and state atoms have been + * initialized already. + */ +void r300InitState(r300ContextPtr r300) +{ + memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); + + r300ResetHwState(r300); +} + +static void r300RenderMode(GLcontext * ctx, GLenum mode) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + (void)rmesa; + (void)mode; +} + +void r300UpdateClipPlanes( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; + + R300_STATECHANGE( rmesa, vpucp[p] ); + rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; + rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; + } + } +} + +/** + * Initialize driver's state callback functions + */ +void r300InitStateFuncs(struct dd_function_table *functions) +{ + + functions->UpdateState = r300InvalidateState; + functions->AlphaFunc = r300AlphaFunc; + functions->BlendColor = r300BlendColor; + functions->BlendEquationSeparate = r300BlendEquationSeparate; + functions->BlendFuncSeparate = r300BlendFuncSeparate; + functions->Enable = r300Enable; + functions->ColorMask = r300ColorMask; + functions->DepthFunc = r300DepthFunc; + functions->DepthMask = r300DepthMask; + functions->CullFace = r300CullFace; + functions->FrontFace = r300FrontFace; + functions->ShadeModel = r300ShadeModel; + functions->LogicOpcode = r300LogicOpcode; + + /* ARB_point_parameters */ + functions->PointParameterfv = r300PointParameter; + + /* Stencil related */ + functions->StencilFuncSeparate = r300StencilFuncSeparate; + functions->StencilMaskSeparate = r300StencilMaskSeparate; + functions->StencilOpSeparate = r300StencilOpSeparate; + + /* Viewport related */ + functions->Viewport = r300Viewport; + functions->DepthRange = r300DepthRange; + functions->PointSize = r300PointSize; + functions->LineWidth = r300LineWidth; + + functions->PolygonOffset = r300PolygonOffset; + functions->PolygonMode = r300PolygonMode; + + functions->RenderMode = r300RenderMode; + + functions->ClipPlane = r300ClipPlane; + functions->Scissor = radeonScissor; + + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; +} diff --git a/src/mesa/drivers/dri/r600/r600_state.h b/src/mesa/drivers/dri/r600/r600_state.h new file mode 100644 index 00000000000..bc33da97605 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_state.h @@ -0,0 +1,65 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Nicolai Haehnle <[email protected]> + */ + +#ifndef __R600_STATE_H__ +#define __R600_STATE_H__ + +#include "r600_context.h" + +#define R300_NEWPRIM( rmesa ) \ + do { \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ + } while (0) + +#define R300_STATECHANGE(r300, atom) \ + do { \ + R300_NEWPRIM(r300); \ + r300->hw.atom.dirty = GL_TRUE; \ + r300->radeon.hw.is_dirty = GL_TRUE; \ + } while(0) + +// r300_state.c +extern int future_hw_tcl_on; +void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx); +void r300UpdateViewportOffset (GLcontext * ctx); +void r300UpdateDrawBuffer (GLcontext * ctx); +void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state); +void r300UpdateShaders (r300ContextPtr rmesa); +void r300UpdateShaderStates (r300ContextPtr rmesa); +void r300InitState (r300ContextPtr r300); +void r300UpdateClipPlanes (GLcontext * ctx); +void r300InitStateFuncs (struct dd_function_table *functions); + +#endif /* __R300_STATE_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_swtcl.c b/src/mesa/drivers/dri/r600/r600_swtcl.c new file mode 100644 index 00000000000..9c8b62465fa --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_swtcl.c @@ -0,0 +1,722 @@ +/************************************************************************** + +Copyright (C) 2007 Dave Airlie + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Dave Airlie <[email protected]> + * Maciej Cencora <[email protected]> + */ + +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" + +#include "r600_swtcl.h" +#include "r600_emit.h" +#include "r600_tex.h" + +#define EMIT_ATTR( ATTR, STYLE ) \ +do { \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ +} while (0) + +#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask) \ +do { \ + attrs[num_attrs].attr = (_attr); \ + attrs[num_attrs].format = (_format); \ + attrs[num_attrs].dst_loc = (_dst_loc); \ + attrs[num_attrs].swizzle = (_swizzle); \ + attrs[num_attrs].write_mask = (_write_mask); \ + ++num_attrs; \ +} while (0) + +static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs; + int vte = 0; + int i, j, reg_count; + uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1]; + uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1]; + + for (i = 0; i < R300_VIR_CMDSIZE-1; ++i) + vir0[i] = vir1[i] = 0; + + for (i = 0, j = 0; i < rmesa->radeon.swtcl.vertex_attr_count; ++i) { + int tmp, data_format; + switch (attrs[i].format) { + case EMIT_1F: + data_format = R300_DATA_TYPE_FLOAT_1; + break; + case EMIT_2F: + data_format = R300_DATA_TYPE_FLOAT_2; + break; + case EMIT_3F: + data_format = R300_DATA_TYPE_FLOAT_3; + break; + case EMIT_4F: + data_format = R300_DATA_TYPE_FLOAT_4; + break; + case EMIT_4UB_4F_RGBA: + case EMIT_4UB_4F_ABGR: + data_format = R300_DATA_TYPE_BYTE | R300_NORMALIZE; + break; + default: + fprintf(stderr, "%s: Invalid data format type", __FUNCTION__); + _mesa_exit(-1); + break; + } + + tmp = data_format | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT); + if (i % 2 == 0) { + vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT; + vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT); + } else { + vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT; + vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; + ++j; + } + } + + reg_count = (rmesa->radeon.swtcl.vertex_attr_count + 1) >> 1; + if (rmesa->radeon.swtcl.vertex_attr_count % 2 != 0) { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } + + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vof); + R300_STATECHANGE(rmesa, vte); + R300_STATECHANGE(rmesa, vic); + + if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16; + rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16; + } else { + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count; + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count; + } + + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); + + vte = rmesa->hw.vte.cmd[1]; + vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); + /* Important: + */ + if ( VB->NdcPtr != NULL ) { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; + } + else { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + vte |= R300_VTX_W0_FMT; + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + + rmesa->hw.vte.cmd[1] = vte; + rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size; +} + + +static void r300SetVertexFormat( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + int fog_id = -1; + GLuint InputsRead = 0; + GLuint OutputsWritten = 0; + int num_attrs = 0; + struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs; + + rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; + rmesa->radeon.swtcl.vertex_attr_count = 0; + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS)) { + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW); + rmesa->swtcl.coloroffset = 4; + } + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW); +#endif + } + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR1, swiz, MASK_XYZW); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR1, swiz, MASK_XYZW); +#endif + rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; + } + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; + OutputsWritten |= 1 << VERT_RESULT_PSIZ; + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X); + } + + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) { + /* find first free tex coord slot */ + if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (!RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { + fog_id = i; + break; + } + } + } else { + fog_id = 0; + } + + if (fog_id == -1) { + fprintf(stderr, "\tout of free texcoords to do fog\n"); + _mesa_exit(-1); + } + + InputsRead |= 1 << VERT_ATTRIB_FOG; + OutputsWritten |= 1 << VERT_RESULT_FOGC; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(fog_id), swiz, MASK_X); + } + + if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + GLuint swiz, mask, format; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { + switch (VB->TexCoordPtr[i]->size) { + case 1: + case 2: + format = EMIT_2F; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + mask = MASK_X | MASK_Y; + break; + case 3: + format = EMIT_3F; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + mask = MASK_X | MASK_Y | MASK_Z; + break; + case 4: + format = EMIT_4F; + swiz = SWIZZLE_XYZW; + mask = MASK_XYZW; + break; + default: + continue; + } + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); + ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, mask); + } + } + } + + /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { + int first_free_tex = -1; + if (fog_id >= 0) { + first_free_tex = fog_id+1; + } else { + if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (!RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) { + first_free_tex = i; + break; + } + } + } else { + first_free_tex = 0; + } + } + + if (first_free_tex == -1) { + fprintf(stderr, "\tout of free texcoords to write w pos\n"); + _mesa_exit(-1); + } + + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); + EMIT_ATTR( _TNL_ATTRIB_TEX(first_free_tex), EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_TEX0 + first_free_tex, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW); + } + + R300_NEWPRIM(rmesa); + r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten); + + rmesa->radeon.swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->radeon.swtcl.vertex_attrs, + rmesa->radeon.swtcl.vertex_attr_count, + NULL, 0 ); + + rmesa->radeon.swtcl.vertex_size /= 4; + + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, tnl->render_inputs_bitset); +} + + +static GLuint reduced_prim[] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, +}; + +static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); +static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG r300ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size +#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; +#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +#define VERTEX r300Vertex +#undef TAG +#define TAG(x) r300_##x +#include "tnl_dd/t_dd_triemit.h" + + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c ) +#define LINE( a, b ) r300_line( rmesa, a, b ) +#define POINT( a ) r300_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define R300_TWOSIDE_BIT 0x01 +#define R300_UNFILLED_BIT 0x02 +#define R300_MAX_TRIFUNC 0x04 + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[R300_MAX_TRIFUNC]; + +#define DO_FALLBACK 0 +#define DO_UNFILLED (IND & R300_UNFILLED_BIT) +#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_FLAT 0 +#define DO_OFFSET 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_RGBA 1 +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) + +#define VERT_SET_RGBA( v, c ) \ +do { \ + r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ +} while (0) + +#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] + +#define VERT_SET_SPEC( v0, c ) \ +do { \ + if (specoffset) { \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \ + } \ +} while (0) + +#define VERT_COPY_SPEC( v0, v1 ) \ +do { \ + if (specoffset) { \ + v0->v.specular.red = v1->v.specular.red; \ + v0->v.specular.green = v1->v.specular.green; \ + v0->v.specular.blue = v1->v.specular.blue; \ + } \ +} while (0) + +#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + GLuint color[n] = { 0, }, spec[n] = { 0, }; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) +#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT) +#define TAG(x) x##_twoside +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) +#define TAG(x) x##_twoside_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + + +static void init_rast_tab( void ) +{ + init(); + init_twoside(); + init_unfilled(); + init_twoside_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + r300_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + r300_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#define INIT(x) do { \ + r300RenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) r300_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) r300_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ +static void r300ChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; + + if (index != rmesa->radeon.swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts; + tnl->Driver.Render.PrimTabElts = r300_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->radeon.swtcl.RenderIndex = index; + } +} + + +static void r300RenderStart(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + + r300ChooseRenderState(ctx); + r300SetVertexFormat(ctx); + + r300ValidateBuffers(ctx); + + r300UpdateShaders(rmesa); + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); + + /* investigate if we can put back flush optimisation if needed */ + if (rmesa->radeon.dma.flush != NULL) { + rmesa->radeon.dma.flush(ctx); + } +} + +static void r300RenderFinish(GLcontext *ctx) +{ +} + +static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->radeon.swtcl.hw_primitive = hwprim; + } +} + +static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +{ + + r300ContextPtr rmesa = R300_CONTEXT(ctx); + rmesa->radeon.swtcl.render_primitive = prim; + + if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) + return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); +} + +static void r300ResetLineStipple(GLcontext *ctx) +{ +} + +void r300InitSwtcl(GLcontext *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + /* FIXME: what are these numbers? */ + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); + + rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; + rmesa->radeon.swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); +} + +void r300DestroySwtcl(GLcontext *ctx) +{ +} + +static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + BEGIN_BATCH(7); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); + OUT_BATCH(1); + OUT_BATCH(vertex_size | (vertex_size << 8)); + OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); +} + +static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +{ + BATCH_LOCALS(&rmesa->radeon); + int type, num_verts; + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); + END_BATCH(); +} + +void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + rcommonEnsureCmdBufSpace(&rmesa->radeon, + rmesa->radeon.hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); + radeonEmitState(&rmesa->radeon); + r300EmitVertexAOS(rmesa, + rmesa->radeon.swtcl.vertex_size, + rmesa->radeon.dma.current, + current_offset); + + r300EmitVbufPrim(rmesa, + rmesa->radeon.swtcl.hw_primitive, + rmesa->radeon.swtcl.numverts); + r300EmitCacheFlush(rmesa); + COMMIT_BATCH(); +} diff --git a/src/mesa/drivers/dri/r600/r600_swtcl.h b/src/mesa/drivers/dri/r600/r600_swtcl.h new file mode 100644 index 00000000000..d1739f8db87 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_swtcl.h @@ -0,0 +1,62 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <[email protected]> - original r200 code + * Dave Airlie <[email protected]> + */ + +#ifndef __R600_SWTCL_H__ +#define __R600_SWTCL_H__ + +#include "main/mtypes.h" +#include "swrast/swrast.h" +#include "r600_context.h" + +#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) +#define MASK_X R300_WRITE_ENA_X +#define MASK_Y R300_WRITE_ENA_Y +#define MASK_Z R300_WRITE_ENA_Z +#define MASK_W R300_WRITE_ENA_W + +/* + * Here are definitions of OVM locations of vertex attributes for non TCL hw + */ +#define SWTCL_OVM_POS 0 +#define SWTCL_OVM_COLOR0 2 +#define SWTCL_OVM_COLOR1 3 +#define SWTCL_OVM_TEX(n) ((n) + 6) +#define SWTCL_OVM_POINT_SIZE 15 + + +extern void r300InitSwtcl( GLcontext *ctx ); +extern void r300DestroySwtcl( GLcontext *ctx ); + +extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset); +#endif diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c new file mode 100644 index 00000000000..70bf6c4b948 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -0,0 +1,347 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/** + * \file + * + * \author Keith Whitwell <[email protected]> + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/colormac.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mipmap.h" +#include "main/simple_list.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "main/teximage.h" +#include "main/texobj.h" + +#include "texmem.h" + +#include "r600_context.h" +#include "r600_state.h" +#include "r600_ioctl.h" +#include "radeon_mipmap_tree.h" +#include "r600_tex.h" + +#include "xmlpool.h" + + +static unsigned int translate_wrap_mode(GLenum wrapmode) +{ + switch(wrapmode) { + case GL_REPEAT: return R300_TX_REPEAT; + case GL_CLAMP: return R300_TX_CLAMP; + case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE; + case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER; + case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED; + default: + _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__); + return 0; + } +} + + +/** + * Update the cached hardware registers based on the current texture wrap modes. + * + * \param t Texture object whose wrap modes are to be set + */ +static void r300UpdateTexWrap(radeonTexObjPtr t) +{ + struct gl_texture_object *tObj = &t->base; + + t->pp_txfilter &= + ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); + + t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; + + if (tObj->Target != GL_TEXTURE_1D) { + t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; + + if (tObj->Target == GL_TEXTURE_3D) + t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; + } +} + +static GLuint aniso_filter(GLfloat anisotropy) +{ + if (anisotropy >= 16.0) { + return R300_TX_MAX_ANISO_16_TO_1; + } else if (anisotropy >= 8.0) { + return R300_TX_MAX_ANISO_8_TO_1; + } else if (anisotropy >= 4.0) { + return R300_TX_MAX_ANISO_4_TO_1; + } else if (anisotropy >= 2.0) { + return R300_TX_MAX_ANISO_2_TO_1; + } else { + return R300_TX_MAX_ANISO_1_TO_1; + } +} + +/** + * Set the texture magnification and minification modes. + * + * \param t Texture whose filter modes are to be set + * \param minf Texture minification mode + * \param magf Texture magnification mode + * \param anisotropy Maximum anisotropy level + */ +static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) +{ + /* Force revalidation to account for switches from/to mipmapping. */ + t->validated = GL_FALSE; + + t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); + t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; + + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. + * When anisotropic filtering is enabled, we override min and mag + * filter settings completely. This includes driconf's settings. + */ + if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { + t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO + | R300_TX_MIN_FILTER_ANISO + | R300_TX_MIN_FILTER_MIP_LINEAR + | aniso_filter(anisotropy); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); + return; + } + + switch (minf) { + case GL_NEAREST: + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + } + + /* Note we don't have 3D mipmaps so only use the mag filter setting + * to set the 3D texture filter mode. + */ + switch (magf) { + case GL_NEAREST: + t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST; + break; + case GL_LINEAR: + t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR; + break; + } +} + +static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4]) +{ + t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); +} + +/** + * Changes variables and flags for a state update, which will happen at the + * next UpdateTextureState + */ + +static void r300TexParameter(GLcontext * ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat * params) +{ + radeonTexObj* t = radeon_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %s )\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(pname)); + } + + switch (pname) { + case GL_TEXTURE_MIN_FILTER: + case GL_TEXTURE_MAG_FILTER: + case GL_TEXTURE_MAX_ANISOTROPY_EXT: + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); + break; + + case GL_TEXTURE_WRAP_S: + case GL_TEXTURE_WRAP_T: + case GL_TEXTURE_WRAP_R: + r300UpdateTexWrap(t); + break; + + case GL_TEXTURE_BORDER_COLOR: + r300SetTexBorderColor(t, texObj->_BorderChan); + break; + + case GL_TEXTURE_BASE_LEVEL: + case GL_TEXTURE_MAX_LEVEL: + case GL_TEXTURE_MIN_LOD: + case GL_TEXTURE_MAX_LOD: + /* This isn't the most efficient solution but there doesn't appear to + * be a nice alternative. Since there's no LOD clamping, + * we just have to rely on loading the right subset of mipmap levels + * to simulate a clamped LOD. + */ + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + t->validated = GL_FALSE; + } + break; + + case GL_DEPTH_TEXTURE_MODE: + if (!texObj->Image[0][texObj->BaseLevel]) + return; + if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat + == GL_DEPTH_COMPONENT) { + r300SetDepthTexMode(texObj); + break; + } else { + /* If the texture isn't a depth texture, changing this + * state won't cause any changes to the hardware. + * Don't force a flush of texture state. + */ + return; + } + + default: + return; + } +} + +static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeonTexObj* t = radeon_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + (void *)texObj, + _mesa_lookup_enum_by_nr(texObj->Target)); + } + + if (rmesa) { + int i; + radeon_firevertices(&rmesa->radeon); + + for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) + if (rmesa->hw.textures[i] == t) + rmesa->hw.textures[i] = 0; + } + + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + } + _mesa_delete_texture_object(ctx, texObj); +} + +/** + * Allocate a new texture object. + * Called via ctx->Driver.NewTextureObject. + * Note: this function will be called during context creation to + * allocate the default texture objects. + * Fixup MaxAnisotropy according to user preference. + */ +static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, + GLuint name, + GLenum target) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); + + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + t, _mesa_lookup_enum_by_nr(target)); + } + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; + + /* Initialize hardware state */ + r300UpdateTexWrap(t); + r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); + r300SetTexBorderColor(t, t->base._BorderChan); + + return &t->base; +} + +void r300InitTextureFuncs(struct dd_function_table *functions) +{ + /* Note: we only plug in the functions we implement in the driver + * since _mesa_init_driver_functions() was already called. + */ + functions->NewTextureImage = radeonNewTextureImage; + functions->FreeTexImageData = radeonFreeTexImageData; + functions->MapTexture = radeonMapTexture; + functions->UnmapTexture = radeonUnmapTexture; + + functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; + functions->TexImage1D = radeonTexImage1D; + functions->TexImage2D = radeonTexImage2D; + functions->TexImage3D = radeonTexImage3D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; + functions->TexSubImage3D = radeonTexSubImage3D; + functions->GetTexImage = radeonGetTexImage; + functions->GetCompressedTexImage = radeonGetCompressedTexImage; + functions->NewTextureObject = r300NewTextureObject; + functions->DeleteTexture = r300DeleteTexture; + functions->IsTextureResident = driIsTextureResident; + + functions->TexParameter = r300TexParameter; + + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + + functions->GenerateMipmap = radeonGenerateMipmap; + + driInitTextureFormats(); +} diff --git a/src/mesa/drivers/dri/r600/r600_tex.h b/src/mesa/drivers/dri/r600/r600_tex.h new file mode 100644 index 00000000000..1d7d396f6d0 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_tex.h @@ -0,0 +1,54 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#ifndef __r600_TEX_H__ +#define __r600_TEX_H__ + +extern void r300SetDepthTexMode(struct gl_texture_object *tObj); + +extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, + __DRIdrawable *dPriv); + +extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint format, __DRIdrawable *dPriv); + +extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, + unsigned long long offset, GLint depth, + GLuint pitch); + +extern GLboolean r300ValidateBuffers(GLcontext * ctx); + +extern void r300InitTextureFuncs(struct dd_function_table *functions); + +#endif /* __r600_TEX_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c new file mode 100644 index 00000000000..3168d5a8b2a --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -0,0 +1,488 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/** + * \file + * + * \author Keith Whitwell <[email protected]> + * + * \todo Enable R300 texture tiling code? + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/texformat.h" +#include "main/teximage.h" +#include "main/texobj.h" +#include "main/enums.h" + +#include "r600_context.h" +#include "r600_state.h" +#include "r600_ioctl.h" +#include "radeon_mipmap_tree.h" +#include "r600_tex.h" +#include "r600_reg.h" + +#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \ + || ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \ + (f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \ + && tx_table[f].flag ) + +#define _ASSIGN(entry, format) \ + [ MESA_FORMAT_ ## entry ] = { format, 0, 1} + +/* + * Note that the _REV formats are the same as the non-REV formats. This is + * because the REV and non-REV formats are identical as a byte string, but + * differ when accessed as 16-bit or 32-bit words depending on the endianness of + * the host. Since the textures are transferred to the R300 as a byte string + * (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats + * identically. -- paulus + */ + +static const struct tx_table { + GLuint format, filter, flag; +} tx_table[] = { + /* *INDENT-OFF* */ +#ifdef MESA_LITTLE_ENDIAN + _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), + _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), + _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), + _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), +#else + _ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)), + _ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)), + _ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)), +#endif + _ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)), + _ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), + _ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)), + _ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), + _ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)), + _ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), + _ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)), + _ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), + _ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)), + _ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)), + _ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)), + _ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)), + _ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), + _ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)), + _ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), + _ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE), + _ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)), + _ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)), + _ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)), + _ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)), + _ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)), + _ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)), + _ASSIGN(RGB_FLOAT32, 0xffffffff), + _ASSIGN(RGB_FLOAT16, 0xffffffff), + _ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)), + _ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)), + _ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)), + _ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)), + _ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)), + _ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)), + _ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)), + _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), + _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)), + _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)), + _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)), + /* *INDENT-ON* */ +}; + +#undef _ASSIGN + +void r300SetDepthTexMode(struct gl_texture_object *tObj) +{ + static const GLuint formats[3][3] = { + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X16), + R300_EASY_TX_FORMAT(X, X, X, X, X16), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16), + }, + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8), + R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8), + }, + { + R300_EASY_TX_FORMAT(X, X, X, ONE, X32), + R300_EASY_TX_FORMAT(X, X, X, X, X32), + R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32), + }, + }; + const GLuint *format; + radeonTexObjPtr t; + + if (!tObj) + return; + + t = radeon_tex_obj(tObj); + + switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { + case MESA_FORMAT_Z16: + format = formats[0]; + break; + case MESA_FORMAT_Z24_S8: + format = formats[1]; + break; + case MESA_FORMAT_Z32: + format = formats[2]; + break; + default: + /* Error...which should have already been caught by higher + * levels of Mesa. + */ + ASSERT(0); + return; + } + + switch (tObj->DepthMode) { + case GL_LUMINANCE: + t->pp_txformat = format[0]; + break; + case GL_INTENSITY: + t->pp_txformat = format[1]; + break; + case GL_ALPHA: + t->pp_txformat = format[2]; + break; + default: + /* Error...which should have already been caught by higher + * levels of Mesa. + */ + ASSERT(0); + return; + } +} + + +/** + * Compute the cached hardware register values for the given texture object. + * + * \param rmesa Context pointer + * \param t the r300 texture object + */ +static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) +{ + const struct gl_texture_image *firstImage; + int firstlevel = t->mt ? t->mt->firstLevel : 0; + + firstImage = t->base.Image[0][firstlevel]; + + if (!t->image_override + && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { + if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { + r300SetDepthTexMode(&t->base); + } else { + t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format; + } + + t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter; + } else if (!t->image_override) { + _mesa_problem(NULL, "unexpected texture format in %s", + __FUNCTION__); + return; + } + + if (t->image_override && t->bo) + return; + + t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT) + | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT) + | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT) + | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT)); + + t->tile_bits = 0; + + if (t->base.Target == GL_TEXTURE_CUBE_MAP) + t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP; + if (t->base.Target == GL_TEXTURE_3D) + t->pp_txformat |= R300_TX_FORMAT_3D; + + + if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { + unsigned int align = (64 / t->mt->bpp) - 1; + t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; + if (!t->image_override) + t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1; + } + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (firstImage->Width > 2048) + t->pp_txpitch |= R500_TXWIDTH_BIT11; + if (firstImage->Height > 2048) + t->pp_txpitch |= R500_TXHEIGHT_BIT11; + } +} + +/** + * Ensure the given texture is ready for rendering. + * + * Mostly this means populating the texture object's mipmap tree. + */ +static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + + if (!radeon_validate_texture_miptree(ctx, texObj)) + return GL_FALSE; + + /* Configure the hardware registers (more precisely, the cached version + * of the hardware registers). */ + setup_hardware_state(rmesa, t); + + t->validated = GL_TRUE; + return GL_TRUE; +} + +/** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. + */ +GLboolean r300ValidateBuffers(GLcontext * ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + int i; + + radeon_validate_reset_bos(&rmesa->radeon); + + rrb = radeon_get_colorbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); + } + + /* depth buffer */ + rrb = radeon_get_depthbuffer(&rmesa->radeon); + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); + } + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { + radeonTexObj *t; + + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + + if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { + _mesa_warning(ctx, + "failed to validate texture for unit %d.\n", + i); + } + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (t->image_override && t->bo) + radeon_validate_bo(&rmesa->radeon, t->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + + else if (t->mt->bo) + radeon_validate_bo(&rmesa->radeon, t->mt->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + } + if (rmesa->radeon.dma.current) + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + + return radeon_revalidate_bos(ctx); +} + +void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, + unsigned long long offset, GLint depth, GLuint pitch) +{ + r300ContextPtr rmesa = pDRICtx->driverPrivate; + struct gl_texture_object *tObj = + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + radeonTexObjPtr t = radeon_tex_obj(tObj); + uint32_t pitch_val; + + if (!tObj) + return; + + t->image_override = GL_TRUE; + + if (!offset) + return; + + t->bo = NULL; + t->override_offset = offset; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = pitch; + + switch (depth) { + case 32: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + t->pp_txfilter |= tx_table[2].filter; + pitch_val /= 4; + break; + case 24: + default: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + t->pp_txfilter |= tx_table[4].filter; + pitch_val /= 4; + break; + case 16: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + t->pp_txfilter |= tx_table[5].filter; + pitch_val /= 2; + break; + } + pitch_val--; + + t->pp_txpitch |= pitch_val; +} + +void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct radeon_renderbuffer *rb; + radeon_texture_image *rImage; + radeonContextPtr radeon; + r300ContextPtr rmesa; + struct radeon_framebuffer *rfb; + radeonTexObjPtr t; + uint32_t pitch_val; + uint32_t internalFormat, type, format; + + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + + radeon = pDRICtx->driverPrivate; + rmesa = pDRICtx->driverPrivate; + + rfb = dPriv->driverPrivate; + texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); + texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); + + rImage = get_radeon_texture_image(texImage); + t = radeon_tex_obj(texObj); + if (t == NULL) { + return; + } + + radeon_update_renderbuffers(pDRICtx, dPriv); + /* back & depth buffer are useless free them right away */ + rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = rfb->color_rb[0]; + if (rb->bo == NULL) { + /* Failed to BO for the buffer */ + return; + } + + _mesa_lock_texture(radeon->glCtx, texObj); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + if (rImage->bo) { + radeon_bo_unref(rImage->bo); + rImage->bo = NULL; + } + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = NULL; + } + if (rImage->mt) { + radeon_miptree_unreference(rImage->mt); + rImage->mt = NULL; + } + fprintf(stderr,"settexbuf %dx%d@%d %d targ %x format %x\n", rb->width, rb->height, rb->cpp, rb->pitch, target, format); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->width, rb->height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; + texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, + internalFormat, + type, format, 0); + rImage->bo = rb->bo; + radeon_bo_ref(rImage->bo); + t->bo = rb->bo; + radeon_bo_ref(t->bo); + t->tile_bits = 0; + t->image_override = GL_TRUE; + t->override_offset = 0; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = rb->pitch; + switch (rb->cpp) { + case 4: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + t->pp_txfilter |= tx_table[2].filter; + pitch_val /= 4; + break; + case 3: + default: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + t->pp_txfilter |= tx_table[4].filter; + pitch_val /= 4; + break; + case 2: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + t->pp_txfilter |= tx_table[5].filter; + pitch_val /= 2; + break; + } + pitch_val--; + t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) | + ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT); + t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; + t->pp_txpitch |= pitch_val; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (rb->width > 2048) + t->pp_txpitch |= R500_TXWIDTH_BIT11; + if (rb->height > 2048) + t->pp_txpitch |= R500_TXHEIGHT_BIT11; + } + t->validated = GL_TRUE; + _mesa_unlock_texture(radeon->glCtx, texObj); + return; +} + +void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} diff --git a/src/mesa/drivers/dri/r600/r600_vertprog.c b/src/mesa/drivers/dri/r600/r600_vertprog.c new file mode 100644 index 00000000000..946d8beb467 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_vertprog.c @@ -0,0 +1,1479 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola <[email protected]> +Copyright (C) 2008 Oliver McFadden <[email protected]> + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* Radeon R5xx Acceleration, Revision 1.2 */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_statevars.h" +#include "tnl/tnl.h" + +#include "r600_context.h" + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ + t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ + (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ + t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(src[x].File), \ + VSF_FLAG_NONE) | (src[x].RelAddr << 4)) + +#define FREE_TEMPS() \ + do { \ + int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ + if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) + +int r300VertexProgUpdateParams(GLcontext * ctx, + struct r300_vertex_program_cont *vp, float *dst) +{ + int pi; + struct gl_vertex_program *mesa_vp = &vp->mesa_program; + float *dst_o = dst; + struct gl_program_parameter_list *paramList; + + if (mesa_vp->IsNVProgram) { + _mesa_load_tracked_matrices(ctx); + + for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) { + *dst++ = ctx->VertexProgram.Parameters[pi][0]; + *dst++ = ctx->VertexProgram.Parameters[pi][1]; + *dst++ = ctx->VertexProgram.Parameters[pi][2]; + *dst++ = ctx->VertexProgram.Parameters[pi][3]; + } + return dst - dst_o; + } + + assert(mesa_vp->Base.Parameters); + _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); + + if (mesa_vp->Base.Parameters->NumParameters * 4 > + VSF_MAX_FRAGMENT_LENGTH) { + fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); + _mesa_exit(-1); + } + + paramList = mesa_vp->Base.Parameters; + for (pi = 0; pi < paramList->NumParameters; pi++) { + switch (paramList->Parameters[pi].Type) { + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); + case PROGRAM_CONSTANT: + *dst++ = paramList->ParameterValues[pi][0]; + *dst++ = paramList->ParameterValues[pi][1]; + *dst++ = paramList->ParameterValues[pi][2]; + *dst++ = paramList->ParameterValues[pi][3]; + break; + default: + _mesa_problem(NULL, "Bad param type in %s", + __FUNCTION__); + } + + } + + return dst - dst_o; +} + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & VSF_FLAG_ALL; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ +/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +#if 0 +static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) +{ + int i; + + if (vp == NULL) { + fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, + caller); + return; + } + + fprintf(stderr, "%s:<", caller); + for (i = 0; i < VERT_ATTRIB_MAX; i++) + fprintf(stderr, "%d ", vp->inputs[i]); + fprintf(stderr, ">\n"); + +} +#endif + +static unsigned long t_src_index(struct r300_vertex_program *vp, + struct prog_src_register *src) +{ + int i; + int max_reg = -1; + + if (src->File == PROGRAM_INPUT) { + if (vp->inputs[src->Index] != -1) + return vp->inputs[src->Index]; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + if (vp->inputs[i] > max_reg) + max_reg = vp->inputs[i]; + + vp->inputs[src->Index] = max_reg + 1; + + //vp_dump_inputs(vp, __FUNCTION__); + + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program *vp, + struct prog_src_register *src) +{ + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->NegateBase) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program *vp, + struct prog_src_register *src) +{ + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src-> + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; +} + +static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = + PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + PVS_SRC_SELECT_FORCE_1, + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(*u_temp_i, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, + /* Not 100% sure about this */ + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + /*VSF_FLAG_ALL */ ); + inst[3] = __CONST(0, SWIZZLE_ZERO); + (*u_temp_i)--; + + return inst; +} + +static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 0 + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#else + inst[0] = + PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ONE); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); +#endif + + return inst; +} + +static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + */ + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = + PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); + + (*u_temp_i)--; + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program *vp) +{ + int i; + int cur_reg = 0; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + vp->inputs[i] = -1; + + for (i = 0; i < VERT_RESULT_MAX; i++) + vp->outputs[i] = -1; + + assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { + vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { + vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = + vp->outputs[VERT_RESULT_COL0] + 1; + cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = + vp->outputs[VERT_RESULT_COL0] + 2; + cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = + vp->outputs[VERT_RESULT_COL0] + 3; + cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + } + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (vp->key.OutputsWritten & (1 << i)) { + vp->outputs[i] = cur_reg++; + } + } + + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } +} + +static void r300TranslateVertexShader(struct r300_vertex_program *vp, + struct prog_instruction *vpi) +{ + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + + vp->pos_end = 0; /* Not supported yet */ + vp->program.length = 0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + vp->translated = GL_TRUE; + vp->native = GL_TRUE; + + t_inputs_outputs(vp); + + for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + + FREE_TEMPS(); + + if (!valid_dst(vp, &vpi->DstReg)) { + /* redirect result to unused temp */ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = u_temp_i; + } + + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); + + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { + src[i] = vpi->SrcReg[i]; + } + + if (num_operands == 3) { /* TODO: scalars */ + if (CMP_SRCS(src[1], src[2]) + || CMP_SRCS(src[0], src[2])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + VSF_FLAG_ALL, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(vp, &src[2]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[2].File), + VSF_FLAG_NONE) | (src[2]. + RelAddr << + 4); + inst[2] = __CONST(2, SWIZZLE_ZERO); + inst[3] = __CONST(2, SWIZZLE_ZERO); + inst += 4; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + } + + if (num_operands >= 2) { + if (CMP_SRCS(src[1], src[0])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + VSF_FLAG_ALL, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[0].File), + VSF_FLAG_NONE) | (src[0]. + RelAddr << + 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + switch (vpi->Opcode) { + case OPCODE_ABS: + inst = r300TranslateOpcodeABS(vp, vpi, inst, src); + break; + case OPCODE_ADD: + inst = r300TranslateOpcodeADD(vp, vpi, inst, src); + break; + case OPCODE_ARL: + inst = r300TranslateOpcodeARL(vp, vpi, inst, src); + break; + case OPCODE_DP3: + inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); + break; + case OPCODE_DP4: + inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); + break; + case OPCODE_DPH: + inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); + break; + case OPCODE_DST: + inst = r300TranslateOpcodeDST(vp, vpi, inst, src); + break; + case OPCODE_EX2: + inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); + break; + case OPCODE_EXP: + inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); + break; + case OPCODE_FLR: + inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); + break; + case OPCODE_LG2: + inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); + break; + case OPCODE_LIT: + inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); + break; + case OPCODE_LOG: + inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); + break; + case OPCODE_MAD: + inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); + break; + case OPCODE_MAX: + inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); + break; + case OPCODE_MIN: + inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); + break; + case OPCODE_MOV: + inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); + break; + case OPCODE_MUL: + inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); + break; + case OPCODE_POW: + inst = r300TranslateOpcodePOW(vp, vpi, inst, src); + break; + case OPCODE_RCP: + inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); + break; + case OPCODE_SGE: + inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); + break; + case OPCODE_SLT: + inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); + break; + case OPCODE_SUB: + inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); + break; + case OPCODE_XPD: + inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + default: + assert(0); + break; + } + } + + /* Some outputs may be artificially added, to match the inputs + of the fragment program. Blank the outputs here. */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (vp->key.OutputsAdded & (1 << i)) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + vp->outputs[i], + VSF_FLAG_ALL, + PVS_DST_REG_OUT); + inst[1] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + } + } + + vp->program.length = (inst - vp->program.body.i); + if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { + vp->program.length = 0; + vp->native = GL_FALSE; + } +#if 0 + fprintf(stderr, "hw program:\n"); + for (i = 0; i < vp->program.length; i++) + fprintf(stderr, "%08x\n", vp->program.body.d[i]); +#endif +} + +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + +static void position_invariant(struct gl_program *prog) +{ + struct prog_instruction *vpi; + struct gl_program_parameter_list *paramList; + int i; + + gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + + /* tokens[4] = matrix modifier */ +#ifdef PREFER_DP4 + tokens[4] = 0; /* not transposed or inverted */ +#else + tokens[4] = STATE_MATRIX_TRANSPOSE; +#endif + paramList = prog->Parameters; + + vpi = _mesa_alloc_instructions(prog->NumInstructions + 4); + _mesa_init_instructions(vpi, prog->NumInstructions + 4); + + for (i = 0; i < 4; i++) { + GLint idx; + tokens[2] = tokens[3] = i; /* matrix row[i]..row[i] */ + idx = _mesa_add_state_reference(paramList, tokens); +#ifdef PREFER_DP4 + vpi[i].Opcode = OPCODE_DP4; + vpi[i].StringPos = 0; + vpi[i].Data = 0; + + vpi[i].DstReg.File = PROGRAM_OUTPUT; + vpi[i].DstReg.Index = VERT_RESULT_HPOS; + vpi[i].DstReg.WriteMask = 1 << i; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW; +#else + if (i == 0) + vpi[i].Opcode = OPCODE_MUL; + else + vpi[i].Opcode = OPCODE_MAD; + + vpi[i].Data = 0; + + if (i == 3) + vpi[i].DstReg.File = PROGRAM_OUTPUT; + else + vpi[i].DstReg.File = PROGRAM_TEMPORARY; + vpi[i].DstReg.Index = 0; + vpi[i].DstReg.WriteMask = 0xf; + vpi[i].DstReg.CondMask = COND_TR; + + vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; + vpi[i].SrcReg[0].Index = idx; + vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + vpi[i].SrcReg[1].File = PROGRAM_INPUT; + vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; + vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); + + if (i > 0) { + vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; + vpi[i].SrcReg[2].Index = 0; + vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW; + } +#endif + } + + _mesa_copy_instructions(&vpi[i], prog->Instructions, + prog->NumInstructions); + + free(prog->Instructions); + + prog->Instructions = vpi; + + prog->NumInstructions += 4; + vpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(vpi->Opcode == OPCODE_END); +} + +static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog, + GLuint temp_index) +{ + struct prog_instruction *vpi; + struct prog_instruction *vpi_insert; + int i = 0; + + vpi = _mesa_alloc_instructions(prog->NumInstructions + 2); + _mesa_init_instructions(vpi, prog->NumInstructions + 2); + /* all but END */ + _mesa_copy_instructions(vpi, prog->Instructions, + prog->NumInstructions - 1); + /* END */ + _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], + &prog->Instructions[prog->NumInstructions - 1], + 1); + vpi_insert = &vpi[prog->NumInstructions - 1]; + + vpi_insert[i].Opcode = OPCODE_MOV; + + vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; + vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS; + vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; + vpi_insert[i].DstReg.CondMask = COND_TR; + + vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; + vpi_insert[i].SrcReg[0].Index = temp_index; + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + i++; + + vpi_insert[i].Opcode = OPCODE_MOV; + + vpi_insert[i].DstReg.File = PROGRAM_OUTPUT; + vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx; + vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW; + vpi_insert[i].DstReg.CondMask = COND_TR; + + vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY; + vpi_insert[i].SrcReg[0].Index = temp_index; + vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + i++; + + free(prog->Instructions); + + prog->Instructions = vpi; + + prog->NumInstructions += i; + vpi = &prog->Instructions[prog->NumInstructions - 1]; + + assert(vpi->Opcode == OPCODE_END); +} + +static void pos_as_texcoord(struct r300_vertex_program *vp, + struct gl_program *prog) +{ + struct prog_instruction *vpi; + GLuint tempregi = prog->NumTemporaries; + /* should do something else if no temps left... */ + prog->NumTemporaries++; + + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT + && vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } + insert_wpos(vp, prog, tempregi); +} + +static struct r300_vertex_program *build_program(struct r300_vertex_program_key + *wanted_key, struct gl_vertex_program + *mesa_vp, GLint wpos_idx) +{ + struct r300_vertex_program *vp; + + vp = _mesa_calloc(sizeof(*vp)); + _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); + vp->wpos_idx = wpos_idx; + + if (mesa_vp->IsPositionInvariant) { + position_invariant(&mesa_vp->Base); + } + + if (wpos_idx > -1) { + pos_as_texcoord(vp, &mesa_vp->Base); + } + + assert(mesa_vp->Base.NumInstructions); + vp->num_temporaries = mesa_vp->Base.NumTemporaries; + r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); + + return vp; +} + +static void add_outputs(struct r300_vertex_program_key *key, GLint vert) +{ + if (key->OutputsWritten & (1 << vert)) + return; + + key->OutputsWritten |= 1 << vert; + key->OutputsAdded |= 1 << vert; +} + +void r300SelectVertexShader(r300ContextPtr r300) +{ + GLcontext *ctx = ctx = r300->radeon.glCtx; + GLuint InputsRead; + struct r300_vertex_program_key wanted_key = { 0 }; + GLint i; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + GLint wpos_idx; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; + + wpos_idx = -1; + if (InputsRead & FRAG_BIT_WPOS) { + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (!(InputsRead & (FRAG_BIT_TEX0 << i))) + break; + + if (i == ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tno free texcoord found\n"); + _mesa_exit(-1); + } + + wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + wpos_idx = i; + } + + add_outputs(&wanted_key, VERT_RESULT_HPOS); + + if (InputsRead & FRAG_BIT_COL0) { + add_outputs(&wanted_key, VERT_RESULT_COL0); + } + + if (InputsRead & FRAG_BIT_COL1) { + add_outputs(&wanted_key, VERT_RESULT_COL1); + } + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); + } + } + + if (vpc->mesa_program.IsPositionInvariant) { + /* we wan't position don't we ? */ + wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + } + + for (vp = vpc->progs; vp; vp = vp->next) + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { + r300->selected_vp = vp; + return; + } + //_mesa_print_program(&vpc->mesa_program.Base); + + vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); + vp->next = vpc->progs; + vpc->progs = vp; + r300->selected_vp = vp; +} diff --git a/src/mesa/drivers/dri/r600/r600_vertprog.h b/src/mesa/drivers/dri/r600/r600_vertprog.h new file mode 100644 index 00000000000..48c97face45 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_vertprog.h @@ -0,0 +1,35 @@ +#ifndef __R600_VERTPROG_H_ +#define __R600_VERTPROG_H_ + +#include "r600_reg.h" + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + +#if 1 + +#define VSF_FLAG_X 1 +#define VSF_FLAG_Y 2 +#define VSF_FLAG_Z 4 +#define VSF_FLAG_W 8 +#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) +#define VSF_FLAG_ALL 0xf +#define VSF_FLAG_NONE 0 + +#endif + +#endif diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c new file mode 100644 index 00000000000..c8041b4a273 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -0,0 +1,719 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "r700_fragprog.h" + +#include "radeon_nqssadce.h" +#include "radeon_program_alu.h" + + +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu) +{ + gl_state_index fail_value_tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0 + }; + struct prog_src_register reg = { 0, }; + + fail_value_tokens[2] = tmu; + reg.File = PROGRAM_STATE_VAR; + reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens); + reg.Swizzle = SWIZZLE_WWWW; + return reg; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + */ +static GLboolean transform_TEX( + struct radeon_transform_context *t, + struct prog_instruction* orig_inst, void* data) +{ + struct r500_fragment_program_compiler *compiler = + (struct r500_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* ARB_shadow & EXT_shadow_funcs */ + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = inst.DstReg; + if (comparefunc == GL_ALWAYS) { + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_1111; + } else { + tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit); + } + return GL_TRUE; + } + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = radeonFindFreeTemporary(t); + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) { + int tempreg = radeonFindFreeTemporary(t); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + + if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) { + int tmpreg = radeonFindFreeTemporary(t); + tgt = radeonAppendInstructions(t->Program, 1); + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tmpreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tmpreg; + } + + tgt = radeonAppendInstructions(t->Program, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + int rcptemp = radeonFindFreeTemporary(t); + int pass, fail; + + tgt = radeonAppendInstructions(t->Program, 3); + + tgt[0].Opcode = OPCODE_RCP; + tgt[0].DstReg.File = PROGRAM_TEMPORARY; + tgt[0].DstReg.Index = rcptemp; + tgt[0].DstReg.WriteMask = WRITEMASK_W; + tgt[0].SrcReg[0] = inst.SrcReg[0]; + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + + tgt[1].Opcode = OPCODE_MAD; + tgt[1].DstReg = inst.DstReg; + tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[1].SrcReg[0] = inst.SrcReg[0]; + tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ; + tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[1].Index = rcptemp; + tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW; + tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[2].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + else + tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + + tgt[2].Opcode = OPCODE_CMP; + tgt[2].DstReg = orig_inst->DstReg; + tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + pass = 1; + fail = 2; + } else { + pass = 2; + fail = 1; + } + + tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN; + tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111; + tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit); + } else if (destredirect) { + tgt = radeonAppendInstructions(t->Program, 1); + + tgt->Opcode = OPCODE_MOV; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + } + + return GL_TRUE; +} + + +static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) +{ + struct gl_fragment_program *mp = &fp->mesa_program; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +} + + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. + */ +static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) + return; + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + +static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) +{ + GLuint relevant; + int i; + + if (opcode == OPCODE_TEX || + opcode == OPCODE_TXB || + opcode == OPCODE_TXP || + opcode == OPCODE_KIL) { + if (reg.Abs) + return GL_FALSE; + + if (reg.NegateAbs) + reg.NegateBase ^= 15; + + if (opcode == OPCODE_KIL) { + if (reg.Swizzle != SWIZZLE_NOOP) + return GL_FALSE; + } else { + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.NegateBase &= ~(1 << i); + continue; + } + if (swz >= 4) + return GL_FALSE; + } + } + + if (reg.NegateBase) + return GL_FALSE; + + return GL_TRUE; + } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; + * if it doesn't fit perfectly into a .xyzw case... */ + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs + && !reg.NegateBase && !reg.NegateAbs) + return GL_TRUE; + + return GL_FALSE; + } else { + /* ALU instructions support almost everything */ + if (reg.Abs) + return GL_TRUE; + + relevant = 0; + for(i = 0; i < 3; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + relevant |= 1 << i; + } + if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + return GL_FALSE; + + return GL_TRUE; + } +} + +/** + * Implement a MOV with a potentially non-native swizzle. + * + * The only thing we *cannot* do in an ALU instruction is per-component + * negation. Therefore, we split the MOV into two instructions when necessary. + */ +static void nqssadce_build_swizzle(struct nqssadce_state *s, + struct prog_dst_register dst, struct prog_src_register src) +{ + struct prog_instruction *inst; + GLuint negatebase[2] = { 0, 0 }; + int i; + + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(src.Swizzle, i); + if (swz == SWIZZLE_NIL) + continue; + negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + } + + _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); + inst = s->Program->Instructions + s->IP; + + for(i = 0; i <= 1; ++i) { + if (!negatebase[i]) + continue; + + inst->Opcode = OPCODE_MOV; + inst->DstReg = dst; + inst->DstReg.WriteMask = negatebase[i]; + inst->SrcReg[0] = src; + inst++; + s->IP++; + } +} + +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r500_fragment_program *fp, + struct r500_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + +static void dump_program(struct r500_fragment_program_code *code); + +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp) +{ + struct r500_fragment_program_external_state state; + + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); + } + + if (!fp->translated) { + struct r500_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; + compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: Initial program:\n"); + _mesa_print_program(compiler.program); + } + + insert_WPOS_trailer(&compiler); + + struct radeon_program_transformation transformations[] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(r300->radeon.glCtx, compiler.program, + 4, transformations); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after native rewrite:\n"); + _mesa_print_program(compiler.program); + } + + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &is_native_swizzle, + .BuildSwizzle = &nqssadce_build_swizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + } + + fp->translated = r500FragmentProgramEmit(&compiler); + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); + fp->mesa_program.Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); + + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + if (fp->translated) { + _mesa_printf("Machine-readable code:\n"); + dump_program(&fp->code); + } + } + + } + + update_params(r300, fp); + +} + +static char *toswiz(int swiz_val) { + switch(swiz_val) { + case 0: return "R"; + case 1: return "G"; + case 2: return "B"; + case 3: return "A"; + case 4: return "0"; + case 5: return "1/2"; + case 6: return "1"; + case 7: return "U"; + } + return NULL; +} + +static char *toop(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP3"; break; + case 2: str = "DP4"; break; + case 3: str = "D2A"; break; + case 4: str = "MIN"; break; + case 5: str = "MAX"; break; + case 6: str = "Reserved"; break; + case 7: str = "CND"; break; + case 8: str = "CMP"; break; + case 9: str = "FRC"; break; + case 10: str = "SOP"; break; + case 11: str = "MDH"; break; + case 12: str = "MDV"; break; + } + return str; +} + +static char *to_alpha_op(int op_val) +{ + char *str = NULL; + switch (op_val) { + case 0: str = "MAD"; break; + case 1: str = "DP"; break; + case 2: str = "MIN"; break; + case 3: str = "MAX"; break; + case 4: str = "Reserved"; break; + case 5: str = "CND"; break; + case 6: str = "CMP"; break; + case 7: str = "FRC"; break; + case 8: str = "EX2"; break; + case 9: str = "LN2"; break; + case 10: str = "RCP"; break; + case 11: str = "RSQ"; break; + case 12: str = "SIN"; break; + case 13: str = "COS"; break; + case 14: str = "MDH"; break; + case 15: str = "MDV"; break; + } + return str; +} + +static char *to_mask(int val) +{ + char *str = NULL; + switch(val) { + case 0: str = "NONE"; break; + case 1: str = "R"; break; + case 2: str = "G"; break; + case 3: str = "RG"; break; + case 4: str = "B"; break; + case 5: str = "RB"; break; + case 6: str = "GB"; break; + case 7: str = "RGB"; break; + case 8: str = "A"; break; + case 9: str = "AR"; break; + case 10: str = "AG"; break; + case 11: str = "ARG"; break; + case 12: str = "AB"; break; + case 13: str = "ARB"; break; + case 14: str = "AGB"; break; + case 15: str = "ARGB"; break; + } + return str; +} + +static char *to_texop(int val) +{ + switch(val) { + case 0: return "NOP"; + case 1: return "LD"; + case 2: return "TEXKILL"; + case 3: return "PROJ"; + case 4: return "LODBIAS"; + case 5: return "LOD"; + case 6: return "DXDY"; + } + return NULL; +} + +static void dump_program(struct r500_fragment_program_code *code) +{ + + fprintf(stderr, "R500 Fragment Program:\n--------\n"); + + int n; + uint32_t inst; + uint32_t inst0; + char *str = NULL; + + if (code->const_nr) { + fprintf(stderr, "--------\nConstants:\n"); + for (n = 0; n < code->const_nr; n++) { + fprintf(stderr, "Constant %d: %i[%i]\n", n, + code->constant[n].File, code->constant[n].Index); + } + fprintf(stderr, "--------\n"); + } + + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; + fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); + switch(inst & 0x3) { + case R500_INST_TYPE_ALU: str = "ALU"; break; + case R500_INST_TYPE_OUT: str = "OUT"; break; + case R500_INST_TYPE_FC: str = "FC"; break; + case R500_INST_TYPE_TEX: str = "TEX"; break; + }; + fprintf(stderr,"%s %s %s %s %s ", str, + inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "", + inst & R500_INST_LAST ? "LAST" : "", + inst & R500_INST_NOP ? "NOP" : "", + inst & R500_INST_ALU_WAIT ? "ALU WAIT" : ""); + fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf), + to_mask((inst >> 15) & 0xf)); + + switch(inst0 & 0x3) { + case 0: + case 1: + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; + + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; + fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", + inst & 0xff, (inst & (1<<8)) ? 'c' : 't', + (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', + (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', + (inst >> 30)); + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; + fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", + (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), + (inst >> 11) & 0x3, + (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7), + (inst >> 24) & 0x3); + + + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; + fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, + (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, + (inst >> 31) & 0x1); + + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; + fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), + (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", + (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), + (inst >> 23) & 0x3, + (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3); + break; + case 2: + break; + case 3: + inst = code->inst[n].inst1; + fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, + to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", + (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); + inst = code->inst[n].inst2; + fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, + inst & 127, inst & (1<<7) ? "(rel)" : "", + toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), + toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3), + (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "", + toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), + toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); + + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); + break; + } + fprintf(stderr,"\n"); + } + +} diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h new file mode 100644 index 00000000000..e48cb60a546 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs <[email protected]> + * Jerome Glisse <[email protected]> + */ +#ifndef __R700_FRAGPROG_H_ +#define __R700_FRAGPROG_H_ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + +#include "r600_context.h" +#include "r600_state.h" +#include "radeon_program.h" + +struct r500_fragment_program; + +extern void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp); + +struct r500_fragment_program_compiler { + r300ContextPtr r300; + struct r500_fragment_program *fp; + struct r500_fragment_program_code *code; + struct gl_program *program; +}; + +extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); + +#endif diff --git a/src/mesa/drivers/dri/r600/r700_fragprog_emit.c b/src/mesa/drivers/dri/r600/r700_fragprog_emit.c new file mode 100644 index 00000000000..ccffd7c28ef --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_fragprog_emit.c @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson <[email protected]> + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs <[email protected]> + * + * \author Jerome Glisse <[email protected]> + * + * \author Corbin Simpson <[email protected]> + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + */ + +#include "r700_fragprog.h" + +#include "radeon_program_pair.h" + + +#define PROG_CODE \ + struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = c->code + +#define error(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + } while(0) + + +/** + * Callback to register hardware constants. + */ +static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex) +{ + PROG_CODE; + + for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) { + if (code->constant[*hwindex].File == file && + code->constant[*hwindex].Index == idx) + break; + } + + if (*hwindex >= code->const_nr) { + if (*hwindex >= PFS_NUM_CONST_REGS) { + error("Out of hw constants!\n"); + return GL_FALSE; + } + + code->const_nr++; + code->constant[*hwindex].File = file; + code->constant[*hwindex].Index = idx; + } + + return GL_TRUE; +} + +static GLuint translate_rgb_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + default: + error("translate_rgb_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + } +} + +static GLuint translate_alpha_op(GLuint opcode) +{ + switch(opcode) { + case OPCODE_CMP: return R500_ALPHA_OP_CMP; + case OPCODE_COS: return R500_ALPHA_OP_COS; + case OPCODE_DDX: return R500_ALPHA_OP_MDH; + case OPCODE_DDY: return R500_ALPHA_OP_MDV; + case OPCODE_DP3: return R500_ALPHA_OP_DP; + case OPCODE_DP4: return R500_ALPHA_OP_DP; + case OPCODE_EX2: return R500_ALPHA_OP_EX2; + case OPCODE_FRC: return R500_ALPHA_OP_FRC; + case OPCODE_LG2: return R500_ALPHA_OP_LN2; + default: + error("translate_alpha_op(%d): unknown opcode\n", opcode); + /* fall through */ + case OPCODE_NOP: + /* fall through */ + case OPCODE_MAD: return R500_ALPHA_OP_MAD; + case OPCODE_MAX: return R500_ALPHA_OP_MAX; + case OPCODE_MIN: return R500_ALPHA_OP_MIN; + case OPCODE_RCP: return R500_ALPHA_OP_RCP; + case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case OPCODE_SIN: return R500_ALPHA_OP_SIN; + } +} + +static GLuint fix_hw_swizzle(GLuint swz) +{ + if (swz == 5) swz = 6; + if (swz == SWIZZLE_NIL) swz = 4; + return swz; +} + +static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +{ + GLuint t = inst->RGB.Arg[arg].Source; + int comp; + t |= inst->RGB.Arg[arg].Negate << 11; + t |= inst->RGB.Arg[arg].Abs << 12; + + for(comp = 0; comp < 3; ++comp) + t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2); + + return t; +} + +static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +{ + GLuint t = inst->Alpha.Arg[i].Source; + t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= inst->Alpha.Arg[i].Negate << 5; + t |= inst->Alpha.Arg[i].Abs << 6; + return t; +} + +static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +{ + if (index > code->max_temp_idx) + code->max_temp_idx = index; +} + +static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (!src.Constant) + use_temporary(code, src.Index); + return src.Index | src.Constant << 8; +} + + +/** + * Emit a paired ALU instruction. + */ +static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_alu: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode); + code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode); + + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + code->inst[ip].inst0 = R500_INST_TYPE_OUT; + else + code->inst[ip].inst0 = R500_INST_TYPE_ALU; + code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; + + code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); + code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); + if (inst->Alpha.DepthWriteMask) { + code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; + c->fp->writes_depth = GL_TRUE; + } + + code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); + code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex); + use_temporary(code, inst->Alpha.DestIndex); + use_temporary(code, inst->RGB.DestIndex); + + if (inst->RGB.Saturate) + code->inst[ip].inst0 |= R500_INST_RGB_CLAMP; + if (inst->Alpha.Saturate) + code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP; + + code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0])); + code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1])); + code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2])); + + code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1])); + code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2])); + + code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT; + code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT; + + code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT; + code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; + code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; + + return GL_TRUE; +} + +static GLuint translate_strq_swizzle(struct prog_src_register src) +{ + GLuint swiz = 0; + int i; + for (i = 0; i < 4; i++) + swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2; + return swiz; +} + +/** + * Emit a single TEX instruction + */ +static GLboolean emit_tex(void *data, struct prog_instruction *inst) +{ + PROG_CODE; + + if (code->inst_end >= 511) { + error("emit_tex: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + + code->inst[ip].inst0 = R500_INST_TYPE_TEX + | (inst->DstReg.WriteMask << 11) + | R500_INST_TEX_SEM_WAIT; + code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; + + switch (inst->Opcode) { + case OPCODE_KIL: + code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; + break; + case OPCODE_TEX: + code->inst[ip].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + code->inst[ip].inst1 |= R500_TEX_INST_PROJ; + break; + default: + error("emit_tex can't handle opcode %x\n", inst->Opcode); + } + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0]) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + return GL_TRUE; +} + +static const struct radeon_pair_handler pair_handler = { + .EmitConst = emit_const, + .EmitPaired = emit_paired, + .EmitTex = emit_tex, + .MaxHwTemps = 128 +}; + +GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) +{ + struct r500_fragment_program_code *code = compiler->code; + + _mesa_bzero(code, sizeof(*code)); + code->max_temp_idx = 1; + code->inst_offset = 0; + code->inst_end = -1; + + if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler)) + return GL_FALSE; + + if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { + /* This may happen when dead-code elimination is disabled or + * when most of the fragment program logic is leading to a KIL */ + if (code->inst_end >= 511) { + error("Introducing fake OUT: Too many instructions"); + return GL_FALSE; + } + + int ip = ++code->inst_end; + code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; + } + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/radeon_context.h b/src/mesa/drivers/dri/r600/radeon_context.h new file mode 100644 index 00000000000..250570f6b89 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_context.h @@ -0,0 +1,76 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Gareth Hughes <[email protected]> + * Keith Whitwell <[email protected]> + * Kevin E. Martin <[email protected]> + * Nicolai Haehnle <[email protected]> + */ + +#ifndef __RADEON_CONTEXT_H__ +#define __RADEON_CONTEXT_H__ + +#include "main/mtypes.h" +#include "main/colormac.h" +#include "radeon_screen.h" +#include "drm.h" +#include "dri_util.h" + +#include "radeon_screen.h" + +#if R200_MERGED +extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + +#define FALLBACK( radeon, bit, mode ) do { \ + if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ + __FUNCTION__, bit, mode ); \ + radeonFallback( (radeon)->glCtx, bit, mode ); \ +} while (0) +#else +#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__); +#endif + +/* TCL fallbacks */ +extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); + +#if R200_MERGED +#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) +#else +#define TCL_FALLBACK( ctx, bit, mode ) ; +#endif + + +#endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r600/radeon_nqssadce.c b/src/mesa/drivers/dri/r600/radeon_nqssadce.c new file mode 100644 index 00000000000..a083c3d2436 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_nqssadce.c @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * "Not-quite SSA" and Dead-Code Elimination. + * + * @note This code uses SWIZZLE_NIL in a source register to indicate that + * the corresponding component is ignored by the corresponding instruction. + */ + +#include "radeon_nqssadce.h" + + +/** + * Return the @ref register_state for the given register (or 0 for untracked + * registers, i.e. constants). + */ +static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_OUTPUT: return &s->Outputs[index]; + default: return 0; + } +} + + +/** + * Left multiplication of a register with a swizzle + * + * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. + */ +static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) +{ + struct prog_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.NegateBase = 0; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} + + +static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, + struct prog_instruction *inst, GLint src, GLuint sourced) +{ + int i; + GLuint deswz_source = 0; + + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) { + GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); + deswz_source |= 1 << swz; + } else { + inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + } + + if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { + struct prog_dst_register dstreg = inst->DstReg; + dstreg.File = PROGRAM_TEMPORARY; + dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + dstreg.WriteMask = sourced; + + s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); + + inst = s->Program->Instructions + s->IP; + inst->SrcReg[src].File = PROGRAM_TEMPORARY; + inst->SrcReg[src].Index = dstreg.Index; + inst->SrcReg[src].Swizzle = 0; + inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Abs = 0; + inst->SrcReg[src].NegateAbs = 0; + for(i = 0; i < 4; ++i) { + if (GET_BIT(sourced, i)) + inst->SrcReg[src].Swizzle |= i << (3*i); + else + inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); + } + deswz_source = sourced; + } + + struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); + if (regstate) + regstate->Sourced |= deswz_source & 0xf; + + return inst; +} + + +static void rewrite_depth_out(struct prog_instruction *inst) +{ + if (inst->DstReg.WriteMask & WRITEMASK_Z) { + inst->DstReg.WriteMask = WRITEMASK_W; + } else { + inst->DstReg.WriteMask = 0; + return; + } + + switch (inst->Opcode) { + case OPCODE_FRC: + case OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + break; + default: + // Scalar instructions needn't be reswizzled + break; + } +} + +static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) +{ + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; ++i) + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) + inst->SrcReg[i].Index = newindex; +} + +static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) +{ + GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); + int ip; + for(ip = 0; ip < s->IP; ++ip) { + struct prog_instruction* inst = s->Program->Instructions + ip; + if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) + inst->DstReg.Index = newindex; + unalias_srcregs(inst, oldindex, newindex); + } + unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); +} + + +/** + * Handle one instruction. + */ +static void process_instruction(struct nqssadce_state* s) +{ + struct prog_instruction *inst = s->Program->Instructions + s->IP; + + if (inst->Opcode == OPCODE_END) + return; + + if (inst->Opcode != OPCODE_KIL) { + if (s->Descr->RewriteDepthOut) { + if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH) + rewrite_depth_out(inst); + } + + struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); + if (!regstate) { + _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", + inst->DstReg.File, inst->DstReg.Index); + return; + } + + inst->DstReg.WriteMask &= regstate->Sourced; + regstate->Sourced &= ~inst->DstReg.WriteMask; + + if (inst->DstReg.WriteMask == 0) { + _mesa_delete_instructions(s->Program, s->IP, 1); + return; + } + + if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) + unalias_temporary(s, inst->DstReg.Index); + } + + /* Attention: Due to swizzle emulation code, the following + * might change the instruction stream under us, so we have + * to be careful with the inst pointer. */ + switch (inst->Opcode) { + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MOV: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + break; + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + break; + case OPCODE_CMP: + case OPCODE_MAD: + inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); + inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + inst = track_used_srcreg(s, inst, 0, 0x1); + break; + case OPCODE_DP3: + inst = track_used_srcreg(s, inst, 0, 0x7); + inst = track_used_srcreg(s, inst, 1, 0x7); + break; + case OPCODE_DP4: + inst = track_used_srcreg(s, inst, 0, 0xf); + inst = track_used_srcreg(s, inst, 1, 0xf); + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + inst = track_used_srcreg(s, inst, 0, 0xf); + break; + default: + _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); + return; + } +} + + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) +{ + struct nqssadce_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = p; + s.Descr = descr; + s.Descr->Init(&s); + s.IP = p->NumInstructions; + + while(s.IP > 0) { + s.IP--; + process_instruction(&s); + } +} diff --git a/src/mesa/drivers/dri/r600/radeon_nqssadce.h b/src/mesa/drivers/dri/r600/radeon_nqssadce.h new file mode 100644 index 00000000000..a4f94abcb62 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_nqssadce.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_NQSSADCE_H_ +#define __RADEON_PROGRAM_NQSSADCE_H_ + +#include "radeon_program.h" + + +struct register_state { + /** + * Bitmask indicating which components of the register are sourced + * by later instructions. + */ + GLuint Sourced : 4; +}; + +/** + * Maintain state such as which registers are used, which registers are + * read from, etc. + */ +struct nqssadce_state { + GLcontext *Ctx; + struct gl_program *Program; + struct radeon_nqssadce_descr *Descr; + + /** + * All instructions after this instruction pointer have been dealt with. + */ + int IP; + + /** + * Which registers are read by subsequent instructions? + */ + struct register_state Temps[MAX_PROGRAM_TEMPS]; + struct register_state Outputs[VERT_RESULT_MAX]; +}; + + +/** + * This structure contains a description of the hardware in-so-far as + * it is required for the NqSSA-DCE pass. + */ +struct radeon_nqssadce_descr { + /** + * Fill in which outputs + */ + void (*Init)(struct nqssadce_state *); + + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + */ + GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); + + /** + * Emit (at the current IP) the instruction MOV dst, src; + * The transformation will work recursively on the emitted instruction(s). + */ + void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); + + /** + * Rewrite instructions that write to DEPR.z to write to DEPR.w + * instead (rewriting is done *before* the WriteMask test). + */ + GLboolean RewriteDepthOut; + void *Data; +}; + +void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr); + +#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_program.c b/src/mesa/drivers/dri/r600/radeon_program.c new file mode 100644 index 00000000000..da5e7aefce5 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_program.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + +#include "shader/prog_print.h" + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonLocalTransform( + GLcontext *Ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct radeon_transform_context ctx; + int ip; + + ctx.Ctx = Ctx; + ctx.Program = program; + ctx.OldInstructions = program->Instructions; + ctx.OldNumInstructions = program->NumInstructions; + + program->Instructions = 0; + program->NumInstructions = 0; + + for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { + struct prog_instruction *instr = ctx.OldInstructions + ip; + int i; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(&ctx, instr, t->userData)) + break; + } + + if (i >= num_transformations) { + struct prog_instruction* dest = radeonAppendInstructions(program, 1); + _mesa_copy_instructions(dest, instr, 1); + } + } + + _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); +} + + +static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) +{ + GLuint i; + for (i = 0; i < count; i++) { + const struct prog_instruction *inst = insts + i; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } +} + +GLint radeonFindFreeTemporary(struct radeon_transform_context *t) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + _mesa_memset(used, 0, sizeof(used)); + scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); + scan_instructions(used, t->OldInstructions, t->OldNumInstructions); + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; +} + + +/** + * Append the given number of instructions to the program and return a + * pointer to the first new instruction. + */ +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count) +{ + int oldnum = program->NumInstructions; + _mesa_insert_instructions(program, oldnum, count); + return program->Instructions + oldnum; +} diff --git a/src/mesa/drivers/dri/r600/radeon_program.h b/src/mesa/drivers/dri/r600/radeon_program.h new file mode 100644 index 00000000000..b411f69bc88 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_program.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + + +enum { + CLAUSE_MIXED = 0, + CLAUSE_ALU, + CLAUSE_TEX +}; + +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +enum { + OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + +/** + * Transformation context that is passed to local transformations. + * + * Care must be taken with some operations during transformation, + * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary + */ +struct radeon_transform_context { + GLcontext *Ctx; + struct gl_program *Program; + struct prog_instruction *OldInstructions; + GLuint OldNumInstructions; +}; + +/** + * A transformation that can be passed to \ref radeonLocalTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_transform_context*, + struct prog_instruction*, + void*); + void *userData; +}; + +void radeonLocalTransform( + GLcontext* ctx, + struct gl_program *program, + int num_transformations, + struct radeon_program_transformation* transformations); + +/** + * Find a usable free temporary register during program transformation + */ +GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); + +struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); + +#endif diff --git a/src/mesa/drivers/dri/r600/radeon_program_alu.c b/src/mesa/drivers/dri/r600/radeon_program_alu.c new file mode 100644 index 00000000000..1ef71e74dcf --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_program_alu.c @@ -0,0 +1,658 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + +#include "shader/prog_parameter.h" + + +static struct prog_instruction *emit1(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg; + return fpi; +} + +static struct prog_instruction *emit2(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + return fpi; +} + +static struct prog_instruction *emit3(struct gl_program* p, + gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct prog_instruction *fpi = radeonAppendInstructions(p, 1); + + fpi->Opcode = Opcode; + fpi->SaturateMode = Saturate; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + fpi->SrcReg[2] = SrcReg2; + return fpi; +} + +static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) +{ + SrcReg->Swizzle &= ~(7 << (3*coordinate)); + SrcReg->Swizzle |= swz << (3*coordinate); +} + +static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) +{ + SrcReg->NegateBase &= ~(1 << coordinate); + SrcReg->NegateBase |= (negate << coordinate); +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static struct prog_dst_register dstregtmpmask(int index, int mask) +{ + struct prog_dst_register dst; + dst.File = PROGRAM_TEMPORARY; + dst.Index = index; + dst.WriteMask = mask; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register srcregswz(int file, int index, int swz) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + src.Swizzle = swz; + return src; +} + +static struct prog_src_register absolute(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.Abs = 1; + newreg.NegateBase = 0; + newreg.NegateAbs = 0; + return newreg; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.NegateAbs = !newreg.NegateAbs; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + x >= 4 ? x : GET_SWZ(reg.Swizzle, x), + y >= 4 ? y : GET_SWZ(reg.Swizzle, y), + z >= 4 ? z : GET_SWZ(reg.Swizzle, z), + w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src = inst->SrcReg[0]; + src.Abs = 1; + src.NegateBase = 0; + src.NegateAbs = 0; + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); +} + +static void transform_DPH(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + if (src0.NegateAbs) { + if (src0.Abs) { + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); + src0 = srcreg(src0.File, src0.Index); + } else { + src0.NegateAbs = 0; + src0.NegateBase ^= NEGATE_XYZW; + } + } + set_swizzle(&src0, 3, SWIZZLE_ONE); + set_negate_base(&src0, 3, 0); + emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); +} + +/** + * [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. + */ +static void transform_DST(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), + swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); +} + +static void transform_FLR(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + +/** + * Definition of LIT (from ARB_fragment_program): + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots, if the subsequent optimization passes are clever enough + * to pair instructions correctly. + */ +static void transform_LIT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + static const GLfloat LitConst[4] = { -127.999999 }; + + GLuint constant; + GLuint constant_swizzle; + GLuint temp; + int needTemporary = 0; + struct prog_src_register srctemp; + + constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle); + + if (inst->DstReg.WriteMask != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (inst->DstReg.File != PROGRAM_TEMPORARY) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = radeonFindFreeTemporary(t); + } else { + temp = inst->DstReg.Index; + } + srctemp = srcreg(PROGRAM_TEMPORARY, temp); + + // tmp.x = max(0.0, Src.x); + // tmp.y = max(0.0, Src.y); + // tmp.w = clamp(Src.z, -128+eps, 128-eps); + emit2(t->Program, OPCODE_MAX, 0, + dstregtmpmask(temp, WRITEMASK_XYW), + inst->SrcReg[0], + swizzle(srcreg(PROGRAM_CONSTANT, constant), + SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); + emit2(t->Program, OPCODE_MIN, 0, + dstregtmpmask(temp, WRITEMASK_Z), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + + // tmp.w = Pow(tmp.y, tmp.w) + emit1(t->Program, OPCODE_LG2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit2(t->Program, OPCODE_MUL, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); + emit1(t->Program, OPCODE_EX2, 0, + dstregtmpmask(temp, WRITEMASK_W), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + + // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_Z), + negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + builtin_zero); + + // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, + dstregtmpmask(temp, WRITEMASK_XYW), + swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + + if (needTemporary) + emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp); +} + +static void transform_LRP(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, + dstreg(PROGRAM_TEMPORARY, tempreg), + inst->SrcReg[1], negate(inst->SrcReg[2])); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, + inst->DstReg, + inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]); +} + +static void transform_POW(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); + emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc); +} + +static void transform_RSQ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0])); +} + +static void transform_SGE(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); +} + +static void transform_SLT(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, + srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); +} + +static void transform_SUB(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); +} + +static void transform_SWZ(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]); +} + +static void transform_XPD(struct radeon_transform_context* t, + struct prog_instruction* inst) +{ + int tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * Transforms RSQ to Radeon's native RSQ by explicitly setting + * absolute value. + * + * @note should be applicable to R300 and R500 fragment programs. + */ +GLboolean radeonTransformALU(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + switch(inst->Opcode) { + case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; + case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; + case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; + case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; + case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} + + +static void sincos_constants(struct radeon_transform_context* t, GLuint *constants) +{ + static const GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.5, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } + }; + int i; + + for(i = 0; i < 2; ++i) { + GLuint swz; + constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); + ASSERT(swz == SWIZZLE_NOOP); + } +} + +/** + * Approximate sin(x), where x is clamped to (-pi/2, pi/2). + * + * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } + * MAD tmp.x, tmp.y, |src|, tmp.x + * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x + * MAD dest, tmp.y, weight, tmp.x + */ +static void sin_approx(struct radeon_transform_context* t, + struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) +{ + GLuint tempreg = radeonFindFreeTemporary(t); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcreg(PROGRAM_CONSTANT, constants[0])); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), + negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); + emit3(t->Program, OPCODE_MAD, 0, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); +} + +/** + * Translate the trigonometric functions COS, SIN, and SCS + * using only the basic instructions + * MOV, ADD, MUL, MAD, FRC + */ +GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + GLuint constants[2]; + GLuint tempreg = radeonFindFreeTemporary(t); + + sincos_constants(t, constants); + + if (inst->Opcode == OPCODE_COS) { + // MAD tmp.x, src, 1/(2*PI), 0.75 + // FRC tmp.x, tmp.x + // MAD tmp.z, tmp.x, 2*PI, -PI + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else if (inst->Opcode == OPCODE_SIN) { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + sin_approx(t, inst->DstReg, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + constants); + } else { + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg)); + emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), + srcreg(PROGRAM_TEMPORARY, tempreg), + swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); + + struct prog_dst_register dst = inst->DstReg; + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + constants); + + dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; + sin_approx(t, dst, + swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + constants); + } + + return GL_TRUE; +} + + +/** + * Transform the trigonometric functions COS, SIN, and SCS + * to include pre-scaling by 1/(2*PI) and taking the fractional + * part, so that the input to COS and SIN is always in the range [0,1). + * SCS is replaced by one COS and one SIN instruction. + * + * @warning This transformation implicitly changes the semantics of SIN and COS! + */ +GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_COS && + inst->Opcode != OPCODE_SIN && + inst->Opcode != OPCODE_SCS) + return GL_FALSE; + + static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; + GLuint temp; + GLuint constant; + GLuint constant_swizzle; + + temp = radeonFindFreeTemporary(t); + constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); + + emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), + swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); + emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), + srcreg(PROGRAM_TEMPORARY, temp)); + + if (inst->Opcode == OPCODE_COS) { + emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SIN) { + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, + inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } else if (inst->Opcode == OPCODE_SCS) { + struct prog_dst_register moddst = inst->DstReg; + + if (inst->DstReg.WriteMask & WRITEMASK_X) { + moddst.WriteMask = WRITEMASK_X; + emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + if (inst->DstReg.WriteMask & WRITEMASK_Y) { + moddst.WriteMask = WRITEMASK_Y; + emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, + srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + } + } + + return GL_TRUE; +} + +/** + * Rewrite DDX/DDY instructions to properly work with r5xx shaders. + * The r5xx MDH/MDV instruction provides per-quad partial derivatives. + * It takes the form A*B+C. A and C are set by setting src0. B should be -1. + * + * @warning This explicitly changes the form of DDX and DDY! + */ + +GLboolean radeonTransformDeriv(struct radeon_transform_context* t, + struct prog_instruction* inst, + void* unused) +{ + if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) + return GL_FALSE; + + struct prog_src_register B = inst->SrcReg[1]; + + B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, + SWIZZLE_ONE, SWIZZLE_ONE); + B.NegateBase = NEGATE_XYZW; + + emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, + inst->SrcReg[0], B); + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/r600/radeon_program_alu.h b/src/mesa/drivers/dri/r600/radeon_program_alu.h new file mode 100644 index 00000000000..b45958115cf --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_program_alu.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigSimple( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformTrigScale( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +GLboolean radeonTransformDeriv( + struct radeon_transform_context *t, + struct prog_instruction*, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_program_pair.c b/src/mesa/drivers/dri/r600/radeon_program_pair.c new file mode 100644 index 00000000000..49aa90dd94a --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_program_pair.c @@ -0,0 +1,1006 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Perform temporary register allocation and attempt to pair off instructions + * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction + * vs. ALU instruction scheduling. + */ + +#include "radeon_program_pair.h" + +#include "radeon_common.h" + +#include "shader/prog_print.h" + +#define error(fmt, args...) do { \ + _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + s->Error = GL_TRUE; \ +} while(0) + +struct pair_state_instruction { + GLuint IsTex:1; /**< Is a texture instruction */ + GLuint NeedRGB:1; /**< Needs the RGB ALU */ + GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ + GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + GLuint NumDependencies:5; + + /** + * Next instruction in the linked list of ready instructions. + */ + struct pair_state_instruction *NextReady; + + /** + * Values that this instruction writes + */ + struct reg_value *Values[4]; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + GLuint IP; /**< IP of the instruction that performs this access */ + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * PROGRAM_TEMPORARY. + */ +struct reg_value { + GLuint IP; /**< IP of the instruction that writes this value */ + struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ + + /** + * Unordered linked list of instructions that read from this value. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is calculated during @ref scan_instructions + * and continually decremented during code emission. + * When this count reaches zero, the instruction that writes the @ref Next value + * can be scheduled. + */ + GLuint NumReaders; +}; + +/** + * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register + * to the proper hardware temporary. + */ +struct pair_register_translation { + GLuint Allocated:1; + GLuint HwIndex:8; + GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ + + /** + * Notes the value that is currently contained in each component + * (only used for PROGRAM_TEMPORARY registers). + */ + struct reg_value *Value[4]; +}; + +struct pair_state { + GLcontext *Ctx; + struct gl_program *Program; + const struct radeon_pair_handler *Handler; + GLboolean Error; + GLboolean Debug; + GLboolean Verbose; + void *UserData; + + /** + * Translate Mesa registers to hardware registers + */ + struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; + struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; + + /** + * Derived information about program instructions. + */ + struct pair_state_instruction *Instructions; + + struct { + GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ + } HwTemps[128]; + + /** + * Linked list of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + struct pair_state_instruction *ReadyFullALU; + struct pair_state_instruction *ReadyRGB; + struct pair_state_instruction *ReadyAlpha; + struct pair_state_instruction *ReadyTEX; + + /** + * Pool of @ref reg_value structures for fast allocation. + */ + struct reg_value *ValuePool; + GLuint ValuePoolUsed; + struct reg_value_reader *ReaderPool; + GLuint ReaderPoolUsed; +}; + + +static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) +{ + switch(file) { + case PROGRAM_TEMPORARY: return &s->Temps[index]; + case PROGRAM_INPUT: return &s->Inputs[index]; + default: return 0; + } +} + +static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) +{ + struct pair_register_translation *t = get_register(s, file, index); + ASSERT(!s->HwTemps[hwindex].RefCount); + ASSERT(!t->Allocated); + s->HwTemps[hwindex].RefCount = t->RefCount; + t->Allocated = 1; + t->HwIndex = hwindex; +} + +static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) +{ + GLuint hwindex; + + struct pair_register_translation *t = get_register(s, file, index); + if (!t) { + _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); + return 0; + } + + if (t->Allocated) + return t->HwIndex; + + for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) + if (!s->HwTemps[hwindex].RefCount) + break; + + if (hwindex >= s->Handler->MaxHwTemps) { + error("Ran out of hardware temporaries"); + return 0; + } + + alloc_hw_reg(s, file, index, hwindex); + return hwindex; +} + + +static void deref_hw_reg(struct pair_state *s, GLuint hwindex) +{ + if (!s->HwTemps[hwindex].RefCount) { + error("Hwindex %i refcount error", hwindex); + return; + } + + s->HwTemps[hwindex].RefCount--; +} + +static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) +{ + pairinst->NextReady = *list; + *list = pairinst; +} + +/** + * The instruction at the given IP has become ready. Link it into the ready + * instructions. + */ +static void instruction_ready(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("instruction_ready(%i)\n", ip); + + if (pairinst->IsTex) + add_pairinst_to_list(&s->ReadyTEX, pairinst); + else if (!pairinst->NeedAlpha) + add_pairinst_to_list(&s->ReadyRGB, pairinst); + else if (!pairinst->NeedRGB) + add_pairinst_to_list(&s->ReadyAlpha, pairinst); + else + add_pairinst_to_list(&s->ReadyFullALU, pairinst); +} + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) +{ + struct prog_src_register tmp; + + switch(inst->Opcode) { + case OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[1].NegateBase = 0; + inst->SrcReg[1].NegateAbs = 0; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = PROGRAM_BUILTIN; + inst->SrcReg[1].Swizzle = SWIZZLE_1111; + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + case OPCODE_MUL: + inst->SrcReg[2].File = PROGRAM_BUILTIN; + inst->SrcReg[2].Swizzle = SWIZZLE_0000; + inst->Opcode = OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct pair_state *s, + struct prog_instruction *inst, struct pair_state_instruction *pairinst) +{ + pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; + pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; + + switch(inst->Opcode) { + case OPCODE_ADD: + case OPCODE_CMP: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_FRC: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + break; + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + pairinst->IsTranscendent = 1; + pairinst->NeedAlpha = 1; + break; + case OPCODE_DP4: + pairinst->NeedAlpha = 1; + /* fall through */ + case OPCODE_DP3: + pairinst->NeedRGB = 1; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + case OPCODE_END: + pairinst->IsTex = 1; + break; + default: + error("Unknown opcode %d\n", inst->Opcode); + break; + } +} + + +/** + * Count which (input, temporary) register is read and written how often, + * and scan the instruction stream to find dependencies. + */ +static void scan_instructions(struct pair_state *s) +{ + struct prog_instruction *inst; + struct pair_state_instruction *pairinst; + GLuint ip; + + for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; + inst->Opcode != OPCODE_END; + ++inst, ++pairinst, ++ip) { + final_rewrite(s, inst); + classify_instruction(s, inst, pairinst); + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int j; + for(j = 0; j < nsrc; j++) { + struct pair_register_translation *t = + get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); + if (!t) + continue; + + t->RefCount++; + + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + int i; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); + if (swz >= 4) + continue; /* constant or NIL swizzle */ + if (!t->Value[swz]) + continue; /* this is an undefined read */ + + /* Do not add a dependency if this instruction + * also rewrites the value. The code below adds + * a dependency for the DstReg, which is a superset + * of the SrcReg dependency. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[j].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; + pairinst->NumDependencies++; + t->Value[swz]->NumReaders++; + r->IP = ip; + r->Next = t->Value[swz]->Readers; + t->Value[swz]->Readers = r; + } + } + } + + int ndst = _mesa_num_inst_dst_regs(inst->Opcode); + if (ndst) { + struct pair_register_translation *t = + get_register(s, inst->DstReg.File, inst->DstReg.Index); + if (t) { + t->RefCount++; + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + int j; + for(j = 0; j < 4; ++j) { + if (!GET_BIT(inst->DstReg.WriteMask, j)) + continue; + + struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; + v->IP = ip; + if (t->Value[j]) { + pairinst->NumDependencies++; + t->Value[j]->Next = v; + } + t->Value[j] = v; + pairinst->Values[j] = v; + } + } + } + } + + if (s->Verbose) + _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); + + if (!pairinst->NumDependencies) + instruction_ready(s, ip); + } + + /* Clear the PROGRAM_TEMPORARY state */ + int i, j; + for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { + for(j = 0; j < 4; ++j) + s->Temps[i].Value[j] = 0; + } +} + + +/** + * Reserve hardware temporary registers for the program inputs. + * + * @note This allocation is performed explicitly, because the order of inputs + * is determined by the RS hardware. + */ +static void allocate_input_registers(struct pair_state *s) +{ + GLuint InputsRead = s->Program->InputsRead; + int i; + GLuint hwindex = 0; + + /* Primary colour */ + if (InputsRead & FRAG_BIT_COL0) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); + InputsRead &= ~FRAG_BIT_COL1; + + /* Texcoords */ + for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Fogcoords treated as a texcoord */ + if (InputsRead & FRAG_BIT_FOGC) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++); + InputsRead &= ~FRAG_BIT_FOGC; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) + alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); + InputsRead &= ~FRAG_BIT_WPOS; + + /* Anything else */ + if (InputsRead) + error("Don't know how to handle inputs 0x%x\n", InputsRead); +} + + +static void decrement_dependencies(struct pair_state *s, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + ASSERT(pairinst->NumDependencies > 0); + if (!--pairinst->NumDependencies) + instruction_ready(s, ip); +} + +/** + * Update the dependency tracking state based on what the instruction + * at the given IP does. + */ +static void commit_instruction(struct pair_state *s, int ip) +{ + struct prog_instruction *inst = s->Program->Instructions + ip; + struct pair_state_instruction *pairinst = s->Instructions + ip; + + if (s->Verbose) + _mesa_printf("commit_instruction(%i)\n", ip); + + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; + deref_hw_reg(s, t->HwIndex); + + int i; + for(i = 0; i < 4; ++i) { + if (!GET_BIT(inst->DstReg.WriteMask, i)) + continue; + + t->Value[i] = pairinst->Values[i]; + if (t->Value[i]->NumReaders) { + struct reg_value_reader *r; + for(r = pairinst->Values[i]->Readers; r; r = r->Next) + decrement_dependencies(s, r->IP); + } else if (t->Value[i]->Next) { + /* This happens when the only reader writes + * the register at the same time */ + decrement_dependencies(s, t->Value[i]->Next->IP); + } + } + } + + int nsrc = _mesa_num_inst_src_regs(inst->Opcode); + int i; + for(i = 0; i < nsrc; i++) { + struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); + if (!t) + continue; + + deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); + + if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) + continue; + + int j; + for(j = 0; j < 4; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz >= 4) + continue; + if (!t->Value[swz]) + continue; + + /* Do not free a dependency if this instruction + * also rewrites the value. See scan_instructions. */ + if (inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == inst->SrcReg[i].Index && + GET_BIT(inst->DstReg.WriteMask, swz)) + continue; + + if (!--t->Value[swz]->NumReaders) { + if (t->Value[swz]->Next) + decrement_dependencies(s, t->Value[swz]->Next->IP); + } + } + } +} + + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + * + * In R500, we don't really know when the result of a texture instruction + * arrives. So allocate all destinations first, to make sure they do not + * arrive early and overwrite a texture coordinate we're going to use later + * in the block. + */ +static void emit_all_tex(struct pair_state *s) +{ + struct pair_state_instruction *readytex; + struct pair_state_instruction *pairinst; + + ASSERT(s->ReadyTEX); + + // Don't let the ready list change under us! + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + // Allocate destination hardware registers in one block to avoid conflicts. + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + if (inst->Opcode != OPCODE_KIL) + get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + } + + if (s->Debug) + _mesa_printf(" BEGIN_TEX\n"); + + if (s->Handler->BeginTexBlock) + s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); + + for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { + int ip = pairinst - s->Instructions; + struct prog_instruction *inst = s->Program->Instructions + ip; + commit_instruction(s, ip); + + if (inst->Opcode != OPCODE_KIL) + inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); + + if (s->Debug) { + _mesa_printf(" "); + _mesa_print_instruction(inst); + } + s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); + } + + if (s->Debug) + _mesa_printf(" END_TEX\n"); +} + + +static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, + struct prog_src_register src, GLboolean rgb, GLboolean alpha) +{ + int candidate = -1; + int candidate_quality = -1; + int i; + + if (!rgb && !alpha) + return 0; + + GLuint constant; + GLuint index; + + if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { + constant = 0; + index = get_hw_reg(s, src.File, src.Index); + } else { + constant = 1; + s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); + } + + for(i = 0; i < 3; ++i) { + int q = 0; + if (rgb) { + if (pair->RGB.Src[i].Used) { + if (pair->RGB.Src[i].Constant != constant || + pair->RGB.Src[i].Index != index) + continue; + q++; + } + } + if (alpha) { + if (pair->Alpha.Src[i].Used) { + if (pair->Alpha.Src[i].Constant != constant || + pair->Alpha.Src[i].Index != index) + continue; + q++; + } + } + if (q > candidate_quality) { + candidate_quality = q; + candidate = i; + } + } + + if (candidate >= 0) { + if (rgb) { + pair->RGB.Src[candidate].Used = 1; + pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].Index = index; + } + if (alpha) { + pair->Alpha.Src[candidate].Used = 1; + pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].Index = index; + } + } + + return candidate; +} + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); + ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); + + if (pairinst->NeedRGB) { + if (pairinst->IsTranscendent) + pair->RGB.Opcode = OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (pairinst->NeedAlpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + int nargs = _mesa_num_inst_src_regs(inst->Opcode); + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { + if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) + return GL_FALSE; + else + nargs++; + } + + for(i = 0; i < nargs; ++i) { + int source; + if (pairinst->NeedRGB && !pairinst->IsTranscendent) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint negatebase = 0; + int j; + for(j = 0; j < 3; ++j) { + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) + negatebase = 1; + } + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + } + if (pairinst->NeedAlpha) { + GLboolean srcrgb = GL_FALSE; + GLboolean srcalpha = GL_FALSE; + GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); + GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); + if (swz < 3) + srcrgb = GL_TRUE; + else if (swz < 4) + srcalpha = GL_TRUE; + source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); + if (source < 0) + return GL_FALSE; + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + } + } + + return GL_TRUE; +} + + +/** + * Fill in the destination register information. + * + * This is split from filling in source registers because we want + * to avoid allocating hardware temporaries for destinations until + * we are absolutely certain that we're going to emit a certain + * instruction pairing. + */ +static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip) +{ + struct pair_state_instruction *pairinst = s->Instructions + ip; + struct prog_instruction *inst = s->Program->Instructions + ip; + + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.Index == FRAG_RESULT_COLOR) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == FRAG_RESULT_DEPTH) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); + if (pairinst->NeedRGB) { + pair->RGB.DestIndex = hwindex; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; + } + if (pairinst->NeedAlpha) { + pair->Alpha.DestIndex = hwindex; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_alu(struct pair_state *s) +{ + struct radeon_pair_instruction pair; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + int ip; + if (s->ReadyFullALU) { + ip = s->ReadyFullALU - s->Instructions; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + ip = s->ReadyAlpha - s->Instructions; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + } else { + struct pair_state_instruction **prgb; + struct pair_state_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + int rgbip = *prgb - s->Instructions; + int alphaip = *palpha - s->Instructions; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, rgbip); + if (!fill_instruction_into_pair(s, &pair, alphaip)) + continue; + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + fill_dest_into_pair(s, &pair, rgbip); + fill_dest_into_pair(s, &pair, alphaip); + commit_instruction(s, rgbip); + commit_instruction(s, alphaip); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + int ip = s->ReadyRGB - s->Instructions; + s->ReadyRGB = s->ReadyRGB->NextReady; + _mesa_bzero(&pair, sizeof(pair)); + fill_instruction_into_pair(s, &pair, ip); + fill_dest_into_pair(s, &pair, ip); + commit_instruction(s, ip); + success: ; + } + + if (s->Debug) + radeonPrintPairInstruction(&pair); + + s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); +} + + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler* handler, void *userdata) +{ + struct pair_state s; + + _mesa_bzero(&s, sizeof(s)); + s.Ctx = ctx; + s.Program = program; + s.Handler = handler; + s.UserData = userdata; + s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; + s.Verbose = GL_FALSE && s.Debug; + + s.Instructions = (struct pair_state_instruction*)_mesa_calloc( + sizeof(struct pair_state_instruction)*s.Program->NumInstructions); + s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); + s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( + sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); + + if (s.Debug) + _mesa_printf("Emit paired program\n"); + + scan_instructions(&s); + allocate_input_registers(&s); + + while(!s.Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s); + + while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) + emit_alu(&s); + } + + if (s.Debug) + _mesa_printf(" END\n"); + + _mesa_free(s.Instructions); + _mesa_free(s.ValuePool); + _mesa_free(s.ReaderPool); + + return !s.Error; +} + + +static void print_pair_src(int i, struct radeon_pair_instruction_source* src) +{ + _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); +} + +static const char* opcode_string(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return "SOP"; + else + return _mesa_opcode_string(opcode); +} + +static int num_pairinst_args(GLuint opcode) +{ + if (opcode == OPCODE_REPL_ALPHA) + return 0; + else + return _mesa_num_inst_src_regs(opcode); +} + +static char swizzle_char(GLuint swz) +{ + switch(swz) { + case SWIZZLE_X: return 'x'; + case SWIZZLE_Y: return 'y'; + case SWIZZLE_Z: return 'z'; + case SWIZZLE_W: return 'w'; + case SWIZZLE_ZERO: return '0'; + case SWIZZLE_ONE: return '1'; + case SWIZZLE_NIL: return '_'; + default: return '?'; + } +} + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) +{ + int nargs; + int i; + + _mesa_printf(" RGB: "); + for(i = 0; i < 3; ++i) { + if (inst->RGB.Src[i].Used) + print_pair_src(i, inst->RGB.Src + i); + } + _mesa_printf("\n"); + _mesa_printf(" Alpha:"); + for(i = 0; i < 3; ++i) { + if (inst->Alpha.Src[i].Used) + print_pair_src(i, inst->Alpha.Src + i); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + _mesa_printf(" COLOR.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + nargs = num_pairinst_args(inst->RGB.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), + swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), + abs); + } + _mesa_printf("\n"); + + _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + _mesa_printf(" COLOR.w"); + if (inst->Alpha.DepthWriteMask) + _mesa_printf(" DEPTH.w"); + nargs = num_pairinst_args(inst->Alpha.Opcode); + for(i = 0; i < nargs; ++i) { + const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; + _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, + swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); + } + _mesa_printf("\n"); +} diff --git a/src/mesa/drivers/dri/r600/radeon_program_pair.h b/src/mesa/drivers/dri/r600/radeon_program_pair.h new file mode 100644 index 00000000000..4624a246298 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_program_pair.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_PAIR_H_ +#define __RADEON_PROGRAM_PAIR_H_ + +#include "radeon_program.h" + + +/** + * Represents a paired instruction, as found in R300 and R500 + * fragment programs. + */ +struct radeon_pair_instruction_source { + GLuint Index:8; + GLuint Constant:1; + GLuint Used:1; +}; + +struct radeon_pair_instruction_rgb { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:3; + GLuint OutputWriteMask:3; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:9; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction_alpha { + GLuint Opcode:8; + GLuint DestIndex:8; + GLuint WriteMask:1; + GLuint OutputWriteMask:1; + GLuint DepthWriteMask:1; + GLuint Saturate:1; + + struct radeon_pair_instruction_source Src[3]; + + struct { + GLuint Source:2; + GLuint Swizzle:3; + GLuint Abs:1; + GLuint Negate:1; + } Arg[3]; +}; + +struct radeon_pair_instruction { + struct radeon_pair_instruction_rgb RGB; + struct radeon_pair_instruction_alpha Alpha; +}; + + +/** + * + */ +struct radeon_pair_handler { + /** + * Fill in the proper hardware index for the given constant register. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex); + + /** + * Write a paired instruction to the hardware. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); + + /** + * Write a texture instruction to the hardware. + * Register indices have already been rewritten to the allocated + * hardware register numbers. + * + * @return GL_FALSE on error. + */ + GLboolean (*EmitTex)(void*, struct prog_instruction*); + + /** + * Called before a block of contiguous, independent texture + * instructions is emitted. + */ + GLboolean (*BeginTexBlock)(void*); + + GLuint MaxHwTemps; +}; + +GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, + const struct radeon_pair_handler*, void *userdata); + +void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); + +#endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index f6bd1eb83fb..26edf3d544c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -255,6 +255,124 @@ #define PCI_CHIP_RS740_796E 0x796E #define PCI_CHIP_RS740_796F 0x796F +#define PCI_CHIP_R600_9400 0x9400 +#define PCI_CHIP_R600_9401 0x9401 +#define PCI_CHIP_R600_9402 0x9402 +#define PCI_CHIP_R600_9403 0x9403 +#define PCI_CHIP_R600_9405 0x9405 +#define PCI_CHIP_R600_940A 0x940A +#define PCI_CHIP_R600_940B 0x940B +#define PCI_CHIP_R600_940F 0x940F + +#define PCI_CHIP_RV610_94C0 0x94C0 +#define PCI_CHIP_RV610_94C1 0x94C1 +#define PCI_CHIP_RV610_94C3 0x94C3 +#define PCI_CHIP_RV610_94C4 0x94C4 +#define PCI_CHIP_RV610_94C5 0x94C5 +#define PCI_CHIP_RV610_94C6 0x94C6 +#define PCI_CHIP_RV610_94C7 0x94C7 +#define PCI_CHIP_RV610_94C8 0x94C8 +#define PCI_CHIP_RV610_94C9 0x94C9 +#define PCI_CHIP_RV610_94CB 0x94CB +#define PCI_CHIP_RV610_94CC 0x94CC +#define PCI_CHIP_RV610_94CD 0x94CD + +#define PCI_CHIP_RV630_9580 0x9580 +#define PCI_CHIP_RV630_9581 0x9581 +#define PCI_CHIP_RV630_9583 0x9583 +#define PCI_CHIP_RV630_9586 0x9586 +#define PCI_CHIP_RV630_9587 0x9587 +#define PCI_CHIP_RV630_9588 0x9588 +#define PCI_CHIP_RV630_9589 0x9589 +#define PCI_CHIP_RV630_958A 0x958A +#define PCI_CHIP_RV630_958B 0x958B +#define PCI_CHIP_RV630_958C 0x958C +#define PCI_CHIP_RV630_958D 0x958D +#define PCI_CHIP_RV630_958E 0x958E +#define PCI_CHIP_RV630_958F 0x958F + +#define PCI_CHIP_RV670_9500 0x9500 +#define PCI_CHIP_RV670_9501 0x9501 +#define PCI_CHIP_RV670_9504 0x9504 +#define PCI_CHIP_RV670_9505 0x9505 +#define PCI_CHIP_RV670_9506 0x9506 +#define PCI_CHIP_RV670_9507 0x9507 +#define PCI_CHIP_RV670_9508 0x9508 +#define PCI_CHIP_RV670_9509 0x9509 +#define PCI_CHIP_RV670_950F 0x950F +#define PCI_CHIP_RV670_9511 0x9511 +#define PCI_CHIP_RV670_9515 0x9515 +#define PCI_CHIP_RV670_9517 0x9517 +#define PCI_CHIP_RV670_9519 0x9519 + +#define PCI_CHIP_RV620_95C0 0x95C0 +#define PCI_CHIP_RV620_95C2 0x95C2 +#define PCI_CHIP_RV620_95C4 0x95C4 +#define PCI_CHIP_RV620_95C5 0x95C5 +#define PCI_CHIP_RV620_95C6 0x95C6 +#define PCI_CHIP_RV620_95C7 0x95C7 +#define PCI_CHIP_RV620_95C9 0x95C9 +#define PCI_CHIP_RV620_95CC 0x95CC +#define PCI_CHIP_RV620_95CD 0x95CD +#define PCI_CHIP_RV620_95CE 0x95CE +#define PCI_CHIP_RV620_95CF 0x95CF + +#define PCI_CHIP_RV635_9590 0x9590 +#define PCI_CHIP_RV635_9591 0x9591 +#define PCI_CHIP_RV635_9593 0x9593 +#define PCI_CHIP_RV635_9595 0x9595 +#define PCI_CHIP_RV635_9596 0x9596 +#define PCI_CHIP_RV635_9597 0x9597 +#define PCI_CHIP_RV635_9598 0x9598 +#define PCI_CHIP_RV635_9599 0x9599 +#define PCI_CHIP_RV635_959B 0x959B + +#define PCI_CHIP_RS780_9611 0x9611 +#define PCI_CHIP_RS780_9612 0x9612 +#define PCI_CHIP_RS780_9613 0x9613 +#define PCI_CHIP_RS780_9614 0x9614 +#define PCI_CHIP_RS780_9615 0x9615 +#define PCI_CHIP_RS780_9616 0x9616 + +#define PCI_CHIP_RV770_9440 0x9440 +#define PCI_CHIP_RV770_9441 0x9441 +#define PCI_CHIP_RV770_9442 0x9442 +#define PCI_CHIP_RV770_9444 0x9444 +#define PCI_CHIP_RV770_9446 0x9446 +#define PCI_CHIP_RV770_944A 0x944A +#define PCI_CHIP_RV770_944B 0x944B +#define PCI_CHIP_RV770_944C 0x944C +#define PCI_CHIP_RV770_944E 0x944E +#define PCI_CHIP_RV770_9450 0x9450 +#define PCI_CHIP_RV770_9452 0x9452 +#define PCI_CHIP_RV770_9456 0x9456 +#define PCI_CHIP_RV770_945A 0x945A +#define PCI_CHIP_RV770_945B 0x945B +#define PCI_CHIP_RV790_9460 0x9460 +#define PCI_CHIP_RV790_9462 0x9462 +#define PCI_CHIP_RV770_946A 0x946A +#define PCI_CHIP_RV770_946B 0x946B +#define PCI_CHIP_RV770_947A 0x947A +#define PCI_CHIP_RV770_947B 0x947B + +#define PCI_CHIP_RV730_9487 0x9487 +#define PCI_CHIP_RV730_9489 0x9489 +#define PCI_CHIP_RV730_948F 0x948F +#define PCI_CHIP_RV730_9490 0x9490 +#define PCI_CHIP_RV730_9491 0x9491 +#define PCI_CHIP_RV730_9498 0x9498 +#define PCI_CHIP_RV730_949C 0x949C +#define PCI_CHIP_RV730_949E 0x949E +#define PCI_CHIP_RV730_949F 0x949F + +#define PCI_CHIP_RV710_9540 0x9540 +#define PCI_CHIP_RV710_9541 0x9541 +#define PCI_CHIP_RV710_9542 0x9542 +#define PCI_CHIP_RV710_954E 0x954E +#define PCI_CHIP_RV710_954F 0x954F +#define PCI_CHIP_RV710_9552 0x9552 +#define PCI_CHIP_RV710_9553 0x9553 +#define PCI_CHIP_RV710_9555 0x9555 enum { CHIP_FAMILY_R100, @@ -282,6 +400,16 @@ enum { CHIP_FAMILY_R580, CHIP_FAMILY_RV560, CHIP_FAMILY_RV570, + CHIP_FAMILY_R600, + CHIP_FAMILY_RV610, + CHIP_FAMILY_RV630, + CHIP_FAMILY_RV670, + CHIP_FAMILY_RV620, + CHIP_FAMILY_RV635, + CHIP_FAMILY_RS780, + CHIP_FAMILY_RV770, + CHIP_FAMILY_RV730, + CHIP_FAMILY_RV710, CHIP_FAMILY_LAST }; @@ -289,6 +417,7 @@ enum { #define RADEON_CLASS_R100 (0 << 0) #define RADEON_CLASS_R200 (1 << 0) #define RADEON_CLASS_R300 (2 << 0) +#define RADEON_CLASS_R600 (3 << 0) #define RADEON_CLASS_MASK (3 << 0) #define RADEON_CHIPSET_TCL (1 << 2) /* tcl support - any radeon */ diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index ba74c97f2cb..f8a29fdba08 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -60,7 +60,9 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) switch (name) { case GL_VENDOR: - if (IS_R300_CLASS(radeon->radeonScreen)) + if (IS_R600_CLASS(radeon->radeonScreen)) + return (GLubyte *) "Advanced Micro Devices, Inc."; + else if (IS_R300_CLASS(radeon->radeonScreen)) return (GLubyte *) "DRI R300 Project"; else return (GLubyte *) "Tungsten Graphics, Inc."; @@ -72,7 +74,9 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) radeon->radeonScreen->AGPMode; const char* chipname; - if (IS_R300_CLASS(radeon->radeonScreen)) + if (IS_R600_CLASS(radeon->radeonScreen)) + chipname = "R600"; + else if (IS_R300_CLASS(radeon->radeonScreen)) chipname = "R300"; else if (IS_R200_CLASS(radeon->radeonScreen)) chipname = "R200"; @@ -82,7 +86,9 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) offset = driGetRendererString(buffer, chipname, DRIVER_DATE, agp_mode); - if (IS_R300_CLASS(radeon->radeonScreen)) { + if (IS_R600_CLASS(radeon->radeonScreen)) { + sprintf(&buffer[offset], " TCL"); + } else if (IS_R300_CLASS(radeon->radeonScreen)) { sprintf(&buffer[offset], " %sTCL", (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) ? "" : "NO-"); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 49c7eae6d2c..ef1f84634b3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -59,6 +59,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_fragprog.h" #include "r300_tex.h" +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +#include "r600_context.h" +#include "r600_fragprog.h" +#include "r600_tex.h" #endif #include "utils.h" @@ -144,7 +148,7 @@ extern const struct dri_extension NV_vp_extension[]; extern const struct dri_extension ATI_fs_extension[]; extern const struct dri_extension point_extensions[]; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) +#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) /* TODO: integrate these into xmlpool.h! */ #define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \ @@ -393,6 +397,19 @@ static const __DRItexBufferExtension r300TexBufferExtension = { }; #endif +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) +static const __DRItexOffsetExtension r300texOffsetExtension = { + { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, + r300SetTexOffset, +}; + +static const __DRItexBufferExtension r300TexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + r300SetTexBuffer, + r300SetTexBuffer2, +}; +#endif + static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { screen->chip_flags = 0; @@ -732,6 +749,155 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_R600_9400: + case PCI_CHIP_R600_9401: + case PCI_CHIP_R600_9402: + case PCI_CHIP_R600_9403: + case PCI_CHIP_R600_9405: + case PCI_CHIP_R600_940A: + case PCI_CHIP_R600_940B: + case PCI_CHIP_R600_940F: + screen->chip_family = CHIP_FAMILY_R600; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV610_94C0: + case PCI_CHIP_RV610_94C1: + case PCI_CHIP_RV610_94C3: + case PCI_CHIP_RV610_94C4: + case PCI_CHIP_RV610_94C5: + case PCI_CHIP_RV610_94C6: + case PCI_CHIP_RV610_94C7: + case PCI_CHIP_RV610_94C8: + case PCI_CHIP_RV610_94C9: + case PCI_CHIP_RV610_94CB: + case PCI_CHIP_RV610_94CC: + case PCI_CHIP_RV610_94CD: + screen->chip_family = CHIP_FAMILY_RV610; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV630_9580: + case PCI_CHIP_RV630_9581: + case PCI_CHIP_RV630_9583: + case PCI_CHIP_RV630_9586: + case PCI_CHIP_RV630_9587: + case PCI_CHIP_RV630_9588: + case PCI_CHIP_RV630_9589: + case PCI_CHIP_RV630_958A: + case PCI_CHIP_RV630_958B: + case PCI_CHIP_RV630_958C: + case PCI_CHIP_RV630_958D: + case PCI_CHIP_RV630_958E: + case PCI_CHIP_RV630_958F: + screen->chip_family = CHIP_FAMILY_RV630; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV670_9500: + case PCI_CHIP_RV670_9501: + case PCI_CHIP_RV670_9504: + case PCI_CHIP_RV670_9505: + case PCI_CHIP_RV670_9506: + case PCI_CHIP_RV670_9507: + case PCI_CHIP_RV670_9508: + case PCI_CHIP_RV670_9509: + case PCI_CHIP_RV670_950F: + case PCI_CHIP_RV670_9511: + case PCI_CHIP_RV670_9515: + case PCI_CHIP_RV670_9517: + case PCI_CHIP_RV670_9519: + screen->chip_family = CHIP_FAMILY_RV670; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV620_95C0: + case PCI_CHIP_RV620_95C2: + case PCI_CHIP_RV620_95C4: + case PCI_CHIP_RV620_95C5: + case PCI_CHIP_RV620_95C6: + case PCI_CHIP_RV620_95C7: + case PCI_CHIP_RV620_95C9: + case PCI_CHIP_RV620_95CC: + case PCI_CHIP_RV620_95CD: + case PCI_CHIP_RV620_95CE: + case PCI_CHIP_RV620_95CF: + screen->chip_family = CHIP_FAMILY_RV620; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV635_9590: + case PCI_CHIP_RV635_9591: + case PCI_CHIP_RV635_9593: + case PCI_CHIP_RV635_9595: + case PCI_CHIP_RV635_9596: + case PCI_CHIP_RV635_9597: + case PCI_CHIP_RV635_9598: + case PCI_CHIP_RV635_9599: + case PCI_CHIP_RV635_959B: + screen->chip_family = CHIP_FAMILY_RV635; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RS780_9611: + case PCI_CHIP_RS780_9612: + case PCI_CHIP_RS780_9613: + case PCI_CHIP_RS780_9614: + case PCI_CHIP_RS780_9615: + case PCI_CHIP_RS780_9616: + screen->chip_family = CHIP_FAMILY_RS780; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV770_9440: + case PCI_CHIP_RV770_9441: + case PCI_CHIP_RV770_9442: + case PCI_CHIP_RV770_9444: + case PCI_CHIP_RV770_9446: + case PCI_CHIP_RV770_944A: + case PCI_CHIP_RV770_944B: + case PCI_CHIP_RV770_944C: + case PCI_CHIP_RV770_944E: + case PCI_CHIP_RV770_9450: + case PCI_CHIP_RV770_9452: + case PCI_CHIP_RV770_9456: + case PCI_CHIP_RV770_945A: + case PCI_CHIP_RV770_945B: + case PCI_CHIP_RV790_9460: + case PCI_CHIP_RV790_9462: + case PCI_CHIP_RV770_946A: + case PCI_CHIP_RV770_946B: + case PCI_CHIP_RV770_947A: + case PCI_CHIP_RV770_947B: + screen->chip_family = CHIP_FAMILY_RV770; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV730_9487: + case PCI_CHIP_RV730_9489: + case PCI_CHIP_RV730_948F: + case PCI_CHIP_RV730_9490: + case PCI_CHIP_RV730_9491: + case PCI_CHIP_RV730_9498: + case PCI_CHIP_RV730_949C: + case PCI_CHIP_RV730_949E: + case PCI_CHIP_RV730_949F: + screen->chip_family = CHIP_FAMILY_RV730; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_RV710_9540: + case PCI_CHIP_RV710_9541: + case PCI_CHIP_RV710_9542: + case PCI_CHIP_RV710_954E: + case PCI_CHIP_RV710_954F: + case PCI_CHIP_RV710_9552: + case PCI_CHIP_RV710_9553: + case PCI_CHIP_RV710_9555: + screen->chip_family = CHIP_FAMILY_RV710; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", device_id); @@ -901,14 +1067,16 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } if (getenv("R300_NO_TCL")) - screen->chip_flags &= ~RADEON_CHIPSET_TCL; + screen->chip_flags &= ~RADEON_CHIPSET_TCL; if (screen->chip_family <= CHIP_FAMILY_RS200) - screen->chip_flags |= RADEON_CLASS_R100; + screen->chip_flags |= RADEON_CLASS_R100; else if (screen->chip_family <= CHIP_FAMILY_RV280) - screen->chip_flags |= RADEON_CLASS_R200; + screen->chip_flags |= RADEON_CLASS_R200; + else if (screen->chip_family <= CHIP_FAMILY_RV570) + screen->chip_flags |= RADEON_CLASS_R300; else - screen->chip_flags |= RADEON_CLASS_R300; + screen->chip_flags |= RADEON_CLASS_R600; screen->cpp = dri_priv->bpp / 8; screen->AGPMode = dri_priv->AGPMode; @@ -926,7 +1094,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->fbLocation = (temp & 0xffff) << 16; } - if (screen->chip_family >= CHIP_FAMILY_R300) { + if (IS_R300_CLASS(screen)) { ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp); if (ret) { fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); @@ -1031,6 +1199,10 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) screen->extensions[i++] = &r300texOffsetExtension.base; #endif + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) + screen->extensions[i++] = &r300texOffsetExtension.base; +#endif } screen->extensions[i++] = NULL; @@ -1095,7 +1267,19 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) if (ret == -1) return NULL; - if (screen->chip_family >= CHIP_FAMILY_R300) { + if (getenv("R300_NO_TCL")) + screen->chip_flags &= ~RADEON_CHIPSET_TCL; + + if (screen->chip_family <= CHIP_FAMILY_RS200) + screen->chip_flags |= RADEON_CLASS_R100; + else if (screen->chip_family <= CHIP_FAMILY_RV280) + screen->chip_flags |= RADEON_CLASS_R200; + else if (screen->chip_family <= CHIP_FAMILY_RV570) + screen->chip_flags |= RADEON_CLASS_R300; + else + screen->chip_flags |= RADEON_CLASS_R600; + + if (IS_R300_CLASS(screen)) { ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp); if (ret) { fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); @@ -1124,16 +1308,6 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) } } - if (screen->chip_family <= CHIP_FAMILY_RS200) - screen->chip_flags |= RADEON_CLASS_R100; - else if (screen->chip_family <= CHIP_FAMILY_RV280) - screen->chip_flags |= RADEON_CLASS_R200; - else - screen->chip_flags |= RADEON_CLASS_R300; - - if (getenv("R300_NO_TCL")) - screen->chip_flags &= ~RADEON_CHIPSET_TCL; - i = 0; screen->extensions[i++] = &driCopySubBufferExtension.base; screen->extensions[i++] = &driFrameTrackingExtension.base; @@ -1159,6 +1333,10 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv) screen->extensions[i++] = &r300TexBufferExtension.base; #endif +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) + screen->extensions[i++] = &r300TexBufferExtension.base; +#endif + screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; @@ -1352,6 +1530,11 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, { __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) + if (IS_R600_CLASS(screen)) + return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); +#endif + #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) if (IS_R300_CLASS(screen)) return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); @@ -1394,6 +1577,11 @@ radeonInitScreen(__DRIscreenPrivate *psp) static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 1, 24, 0 }; +#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) + static const char *driver_name = "R600"; + static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 24, 0 }; #endif RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv; @@ -1421,7 +1609,7 @@ radeonInitScreen(__DRIscreenPrivate *psp) driInitSingleExtension( NULL, NV_vp_extension ); driInitSingleExtension( NULL, ATI_fs_extension ); driInitExtensions( NULL, point_extensions, GL_FALSE ); -#elif defined(RADEON_COMMON_FOR_R300) +#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600)) driInitSingleExtension( NULL, gl_20_extension ); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 8605eb4f075..5194224acb7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -117,6 +117,8 @@ typedef struct radeon_screen { ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R200) #define IS_R300_CLASS(screen) \ ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300) +#define IS_R600_CLASS(screen) \ + ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R600) extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv); #endif /* __RADEON_SCREEN_H__ */ |