diff options
Diffstat (limited to 'src/gallium/drivers/nv40')
25 files changed, 6269 insertions, 0 deletions
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile new file mode 100644 index 00000000000..9c8eadf7e44 --- /dev/null +++ b/src/gallium/drivers/nv40/Makefile @@ -0,0 +1,37 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +DRIVER_SOURCES = \ + nv40_clear.c \ + nv40_context.c \ + nv40_draw.c \ + nv40_fragprog.c \ + nv40_fragtex.c \ + nv40_miptree.c \ + nv40_query.c \ + nv40_screen.c \ + nv40_state.c \ + nv40_state_blend.c \ + nv40_state_emit.c \ + nv40_state_fb.c \ + nv40_state_rasterizer.c \ + nv40_state_scissor.c \ + nv40_state_stipple.c \ + nv40_state_viewport.c \ + nv40_state_zsa.c \ + nv40_surface.c \ + nv40_vbo.c \ + nv40_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c new file mode 100644 index 00000000000..59efd620e32 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c new file mode 100644 index 00000000000..5d325f5067f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (1); + } + + FIRE_RING(fence); +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (nv40->draw) + draw_destroy(nv40->draw); + FREE(nv40); +} + +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_context *nv40; + struct nouveau_winsys *nvws = screen->nvws; + + nv40 = CALLOC(1, sizeof(struct nv40_context)); + if (!nv40) + return NULL; + nv40->screen = screen; + nv40->pctx_id = pctx_id; + + nv40->nvws = nvws; + + nv40->pipe.winsys = ws; + nv40->pipe.screen = pscreen; + nv40->pipe.destroy = nv40_destroy; + nv40->pipe.draw_arrays = nv40_draw_arrays; + nv40->pipe.draw_elements = nv40_draw_elements; + nv40->pipe.clear = nv40_clear; + nv40->pipe.flush = nv40_flush; + + nv40_init_query_functions(nv40); + nv40_init_surface_functions(nv40); + nv40_init_state_functions(nv40); + + /* Create, configure, and install fallback swtnl path */ + nv40->draw = draw_create(); + draw_wide_point_threshold(nv40->draw, 9999999.0); + draw_wide_line_threshold(nv40->draw, 9999999.0); + draw_enable_line_stipple(nv40->draw, FALSE); + draw_enable_point_sprites(nv40->draw, FALSE); + draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + + return &nv40->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h new file mode 100644 index 00000000000..adcfbdd85a8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -0,0 +1,233 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv40_screen *ctx = nv40->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv40_state_index { + NV40_STATE_FB = 0, + NV40_STATE_VIEWPORT = 1, + NV40_STATE_BLEND = 2, + NV40_STATE_RAST = 3, + NV40_STATE_ZSA = 4, + NV40_STATE_BCOL = 5, + NV40_STATE_CLIP = 6, + NV40_STATE_SCISSOR = 7, + NV40_STATE_STIPPLE = 8, + NV40_STATE_FRAGPROG = 9, + NV40_STATE_VERTPROG = 10, + NV40_STATE_FRAGTEX0 = 11, + NV40_STATE_FRAGTEX1 = 12, + NV40_STATE_FRAGTEX2 = 13, + NV40_STATE_FRAGTEX3 = 14, + NV40_STATE_FRAGTEX4 = 15, + NV40_STATE_FRAGTEX5 = 16, + NV40_STATE_FRAGTEX6 = 17, + NV40_STATE_FRAGTEX7 = 18, + NV40_STATE_FRAGTEX8 = 19, + NV40_STATE_FRAGTEX9 = 20, + NV40_STATE_FRAGTEX10 = 21, + NV40_STATE_FRAGTEX11 = 22, + NV40_STATE_FRAGTEX12 = 23, + NV40_STATE_FRAGTEX13 = 24, + NV40_STATE_FRAGTEX14 = 25, + NV40_STATE_FRAGTEX15 = 26, + NV40_STATE_VERTTEX0 = 27, + NV40_STATE_VERTTEX1 = 28, + NV40_STATE_VERTTEX2 = 29, + NV40_STATE_VERTTEX3 = 30, + NV40_STATE_VTXBUF = 31, + NV40_STATE_VTXFMT = 32, + NV40_STATE_VTXATTR = 33, + NV40_STATE_MAX = 34 +}; + +#include "nv40_screen.h" + +#define NV40_NEW_BLEND (1 << 0) +#define NV40_NEW_RAST (1 << 1) +#define NV40_NEW_ZSA (1 << 2) +#define NV40_NEW_SAMPLER (1 << 3) +#define NV40_NEW_FB (1 << 4) +#define NV40_NEW_STIPPLE (1 << 5) +#define NV40_NEW_SCISSOR (1 << 6) +#define NV40_NEW_VIEWPORT (1 << 7) +#define NV40_NEW_BCOL (1 << 8) +#define NV40_NEW_VERTPROG (1 << 9) +#define NV40_NEW_FRAGPROG (1 << 10) +#define NV40_NEW_ARRAYS (1 << 11) +#define NV40_NEW_UCP (1 << 12) + +struct nv40_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nv40_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned viewport_bypass; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NV40_STATE_MAX]; +}; + +struct nv40_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv40_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nv40_state state; + struct { + struct nv40_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; + + /* Context state */ + unsigned dirty, draw_dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nv40_vertex_program *vertprog; + struct nv40_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nv40_rasterizer_state *rasterizer; + struct nv40_zsa_state *zsa; + struct nv40_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + const unsigned *edgeflags; +}; + +static INLINE struct nv40_context * +nv40_context(struct pipe_context *pipe) +{ + return (struct nv40_context *)pipe; +} + +struct nv40_state_entry { + boolean (*validate)(struct nv40_context *nv40); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern void nv40_init_state_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_query_functions(struct nv40_context *nv40); + +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); +extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_destroy(struct nv40_context *, + struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_destroy(struct nv40_context *, + struct nv40_fragment_program *); + +/* nv40_fragtex.c */ +extern void nv40_fragtex_bind(struct nv40_context *); + +/* nv40_state.c and friends */ +extern boolean nv40_state_validate(struct nv40_context *nv40); +extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); +extern void nv40_state_emit(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_rasterizer; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; +extern struct nv40_state_entry nv40_state_vtxfmt; + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c new file mode 100644 index 00000000000..c83ff91d7e3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -0,0 +1,349 @@ +#include "pipe/p_shader_tokens.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nv40_context.h" +#define NV40_SHADER_NO_FUCKEDNESS +#include "nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nv40_render_stage { + struct draw_stage stage; + struct nv40_context *nv40; + unsigned prim; +}; + +static INLINE struct nv40_render_stage * +nv40_render_stage(struct draw_stage *stage) +{ + return (struct nv40_render_stage *)stage; +} + +static INLINE void +nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) +{ + unsigned i; + + for (i = 0; i < nv40->swtnl.nr_attribs; i++) { + unsigned idx = nv40->swtnl.draw[i]; + unsigned hw = nv40->swtnl.hw[i]; + + switch (nv40->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + OUT_RING (fui(v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nv40_render_stage *rs = nv40_render_stage(stage); + struct nv40_context *nv40 = rs->nv40; + struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (pb->remaining < ((count * 20) + 6)) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(NULL); + nv40_state_emit(nv40); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nv40_render_vertex(nv40, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (pb->remaining < ((count * 20) + 6)) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + +static void +nv40_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); +} + +static void +nv40_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); +} + +static void +nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); +} + +static void +nv40_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nv40_render_stage *rs = nv40_render_stage(draw); + struct nv40_context *nv40 = rs->nv40; + + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + +static void +nv40_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nv40_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static INLINE void +emit_mov(struct nv40_vertex_program *vp, + unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ + struct nv40_vertex_program_exec *inst; + + vp->insns = realloc(vp->insns, + sizeof(struct nv40_vertex_program_exec) * + ++vp->nr_insns); + inst = &vp->insns[vp->nr_insns - 1]; + + inst->data[0] = 0x401f9c6c; + inst->data[1] = 0x0040000d | (src << 8); + inst->data[2] = 0x8106c083; + inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); + inst->const_index = -1; + inst->has_branch_offset = FALSE; + + vp->ir |= (1 << src); + if (vor != ~0) + vp->or |= (1 << vor); +} + +static struct nv40_vertex_program * +create_drawvp(struct nv40_context *nv40) +{ + struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); + unsigned i; + + emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); + for (i = 0; i < 8; i++) + emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); + + vp->insns[vp->nr_insns - 1].data[3] |= 1; + vp->translated = TRUE; + return vp; +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ + struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); + + if (!nv40->swtnl.vertprog) + nv40->swtnl.vertprog = create_drawvp(nv40); + + render->nv40 = nv40; + render->stage.draw = nv40->draw; + render->stage.point = nv40_render_point; + render->stage.line = nv40_render_line; + render->stage.tri = nv40_render_tri; + render->stage.flush = nv40_render_flush; + render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; + render->stage.destroy = nv40_render_destroy; + + return &render->stage; +} + +boolean +nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + unsigned i; + void *map; + + if (!nv40_state_validate_swtnl(nv40)) + return FALSE; + nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); + nv40_state_emit(nv40); + + for (i = 0; i < nv40->vtxbuf_nr; i++) { + map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nv40->draw, i, map); + } + + if (idxbuf) { + map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nv40->draw, 0, NULL); + } + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nv40->draw, map, nr); + } + + draw_arrays(nv40->draw, mode, start, count); + + for (i = 0; i < nv40->vtxbuf_nr; i++) + ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + + if (idxbuf) + ws->buffer_unmap(ws, idxbuf); + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) + ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nv40->draw); + pipe->flush(pipe, 0, NULL); + + return TRUE; +} + +static INLINE void +emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned a = nv40->swtnl.nr_attribs++; + + nv40->swtnl.hw[a] = hw; + nv40->swtnl.emit[a] = emit; + nv40->swtnl.draw[a] = draw_out; +} + +static boolean +nv40_state_vtxfmt_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nv40->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nv40_state_entry nv40_state_vtxfmt = { + .validate = nv40_state_vtxfmt_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c new file mode 100644 index 00000000000..91dcbebda0d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -0,0 +1,991 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv40_fpc { + struct nv40_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_temp; + + int num_regs; + + unsigned inst_offset; + unsigned have_const; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv40_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv40_sreg +temp(struct nv40_fpc *fpc) +{ + int idx = ffs(~fpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + fpc->r_temps |= (1 << idx); + fpc->r_temps_discard |= (1 << idx); + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_fpc *fpc) +{ + fpc->r_temps &= ~fpc->r_temps_discard; + fpc->r_temps_discard = 0; +} + +static INLINE struct nv40_sreg +constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv40_fpc *fpc, int size) +{ + struct nv40_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_INPUT: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + break; + case NV40SR_OUTPUT: + sr |= NV40_FP_REG_SRC_HALF; + /* fall-through */ + case NV40SR_TEMP: + sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + break; + case NV40SR_CONST: + if (!fpc->have_const) { + grow_insns(fpc, 4); + fpc->have_const = 1; + } + + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv40_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + break; + case NV40SR_NONE: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV40SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV40SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV40_FP_OP_OUT_REG_HALF; + } + break; + case NV40SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + fpc->have_const = 0; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV40_FP_OP_OPCODE_KIL) + fp->fp_control |= NV40TCL_FP_CONTROL_KIL; + hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV40_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + + nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = fpc->r_temp[fsrc->SrcRegister.Index]; + break; + /* NV40 fragprog result regs are just temps, so this is simple */ + case TGSI_FILE_OUTPUT: + src = fpc->r_result[fsrc->SrcRegister.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + return fpc->r_result[fdst->DstRegister.Index]; + case TGSI_FILE_TEMPORARY: + return fpc->r_temp[fdst->DstRegister.Index]; + case TGSI_FILE_NULL: + return nv40_sr(NV40SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv40_sr(NV40SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg src[3], dst, tmp; + int mask, sat, unit; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DDX: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDX, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DDY: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDY, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KILP: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KIL: + dst = nv40_sr(NV40SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; + case TGSI_OPCODE_POW: + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + break; + case TGSI_OPCODE_RSQ: + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SEQ: + arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SLE: + arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SNE: + arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(fpc, sat, STR, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(fpc); + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->DeclarationRange.First; + unsigned hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = 1; + break; + case TGSI_SEMANTIC_COLOR: + switch (fdec->Semantic.SemanticIndex) { + case 0: hw = 0; break; + case 1: hw = 2; break; + case 2: hw = 3; break; + case 3: hw = 4; break; + default: + NOUVEAU_ERR("bad rcol index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_temps |= (1 << hw); + return TRUE; +} + +static boolean +nv40_fragprog_prepare(struct nv40_fpc *fpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, i; + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + } + + return TRUE; + +out_err: + if (fpc->r_temp) + FREE(fpc->r_temp); + tgsi_parse_free(&p); + return FALSE; +} + +static void +nv40_fragprog_translate(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv40_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv40_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->num_regs = 2; + + if (!nv40_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv40_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (fpc->r_temp) + FREE(fpc->r_temp); + FREE(fpc); +} + +static void +nv40_fragprog_upload(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + struct pipe_buffer *constbuf = + nv40->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; + int i; + + if (fp->translated) + goto update_constants; + + nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; + nv40_fragprog_translate(nv40, fp); + if (!fp->translated) { + nv40->fallback_swrast |= NV40_NEW_FRAGPROG; + return FALSE; + } + + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv40_fragprog_upload(nv40, fp); + + so = so_new(4, 1); + so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); + so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + so_ref(so, &fp->so); + +update_constants: + if (fp->nr_consts) { + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv40_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + ws->buffer_unmap(ws, constbuf); + + if (new_consts) + nv40_fragprog_upload(nv40, fp); + } + + if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { + so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv40_fragprog_destroy(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + if (fp->insn_len) + FREE(fp->insn); +} + +struct nv40_state_entry nv40_state_fragprog = { + .validate = nv40_fragprog_validate, + .dirty = { + .pipe = NV40_NEW_FRAGPROG, + .hw = NV40_STATE_FRAGPROG + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c new file mode 100644 index 00000000000..0227d22620d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -0,0 +1,168 @@ +#include "nv40_context.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV40TCL_TEX_FORMAT_FORMAT_##tf, \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ + NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ + NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ + NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ + ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \ + (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \ +} + +struct nv40_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; + int sign; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), + _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), + _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + {}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ + struct nv40_texture_format *tf = nv40_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return NULL; +} + + +static struct nouveau_stateobj * +nv40_fragtex_build(struct nv40_context *nv40, int unit) +{ + struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; + struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; + struct pipe_texture *pt = &nv40mt->base; + struct nv40_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs, txp; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv40_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = ps->fmt; + txf |= tf->format | 0x8000; + txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + + if (1) /* XXX */ + txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV40TCL_TEX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV40TCL_TEX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV40TCL_TEX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV40TCL_TEX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + txp = 0; + } else { + txp = nv40mt->level[0].pitch; + txf |= NV40TCL_TEX_FORMAT_LINEAR; + } + + txs = tf->swizzle; + + so = so_new(16, 2); + so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + + return so; +} + +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + struct nv40_state *state = &nv40->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(2, 0); + so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); + } + + samplers = nv40->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = nv40_fragtex_build(nv40, unit); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); + } + + nv40->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nv40_state_entry nv40_state_fragtex = { + .validate = nv40_fragtex_validate, + .dirty = { + .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c new file mode 100644 index 00000000000..9bef23ad1f6 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -0,0 +1,221 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv40_context.h" + +static void +nv40_miptree_layout(struct nv40_miptree *mt) +{ + struct pipe_texture *pt = &mt->base; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f, pitch; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + pitch = pt->width[0]; + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) + pitch = pt->nblocksx[l]; + pitch = align(pitch, 64); + + mt->level[l].pitch = pitch * pt->block.size; + mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + mt->level[l].image_offset[f] = offset; + offset += mt->level[l].pitch * pt->height[l]; + } + } + + mt->total_size = offset; +} + +static struct pipe_texture * +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_miptree *mt; + unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE; + + mt = MALLOC(sizeof(struct nv40_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->shadow_tex = NULL; + mt->shadow_surface = NULL; + + /* Swizzled textures must be POT */ + if (pt->width[0] & (pt->width[0] - 1) || + pt->height[0] & (pt->height[0] - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R16_SNORM: + break; + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + nv40_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static struct pipe_texture * +nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv40_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv40_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static void +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + struct nv40_miptree *mt = (struct nv40_miptree *)pt; + int l; + + *ppt = NULL; + if (--pt->refcount) + return; + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + if (mt->shadow_tex) { + assert(mt->shadow_surface); + pscreen->tex_surface_release(pscreen, &mt->shadow_surface); + nv40_miptree_release(pscreen, &mt->shadow_tex); + } + + FREE(mt); +} + +static struct pipe_surface * +nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = mt->level[level].image_offset[zslice]; + } else { + ps->offset = mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv40_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + FREE(ps); +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv40_miptree_create; + pscreen->texture_blanket = nv40_miptree_blanket; + pscreen->texture_release = nv40_miptree_release; + pscreen->get_tex_surface = nv40_miptree_surface_new; + pscreen->tex_surface_release = nv40_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c new file mode 100644 index 00000000000..9b9a43f49df --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" + +struct nv40_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv40_query * +nv40_query(struct pipe_query *pipe) +{ + return (struct nv40_query *)pipe; +} + +static struct pipe_query * +nv40_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv40_query *q; + + q = CALLOC(1, sizeof(struct nv40_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + if (q->object) + nv40->nvws->res_free(&q->object); + FREE(q); +} + +static void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); + + BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(NULL); +} + +static boolean +nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + struct nouveau_winsys *nvws = nv40->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv40->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv40->screen->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv40->screen->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv40_init_query_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_query = nv40_query_create; + nv40->pipe.destroy_query = nv40_query_destroy; + nv40->pipe.begin_query = nv40_query_begin; + nv40->pipe.end_query = nv40_query_end; + nv40->pipe.get_query_result = nv40_query_result; +} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 00000000000..88a329ea245 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,381 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; /* We have 4 - but unsupported currently */ + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + if (screen->curie->grclass == NV40TCL) + return 1; + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surf) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture; + + return mt->buffer; +} + +static void * +nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_map; + void *map; + + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + + if (!mt->shadow_tex) { + unsigned old_tex_usage = surface->texture->tex_usage; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; + mt->shadow_tex = screen->texture_create(screen, surface->texture); + surface->texture->tex_usage = old_tex_usage; + + assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); + } + + mt->shadow_surface = screen->get_tex_surface + ( + screen, mt->shadow_tex, + surface->face, surface->level, surface->zslice, + surface->usage + ); + + surface_to_map = mt->shadow_surface; + } + else + surface_to_map = surface; + + assert(surface_to_map); + map = ws->buffer_map(ws, nv40_surface_buffer(surface_to_map), flags); + if (!map) + return NULL; + + return map + surface_to_map->offset; +} + +static void +nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_unmap; + + /* TODO: Copy from shadow just before push buffer is flushed instead. + There are probably some programs that map/unmap excessively + before rendering. */ + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + + assert(mt->shadow_tex); + + surface_to_unmap = mt->shadow_surface; + } + else + surface_to_unmap = surface; + + assert(surface_to_unmap); + + ws->buffer_unmap(ws, nv40_surface_buffer(surface_to_unmap)); + + if (surface_to_unmap != surface) { + struct nv40_screen *nvscreen = nv40_screen(screen); + + nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0, + surface_to_unmap, 0, 0, + surface->width, surface->height); + } +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->curie); + + FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); + struct nouveau_stateobj *so; + unsigned curie_class; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(nvws); + screen->eng2d->buf = nv40_surface_buffer; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV40TCL; + else + if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + default: + break; + } + + if (!curie_class) { + NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static curie initialisation */ + so = so_new(128, 0); + so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + + so_method(so, screen->curie, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->curie, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->curie, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->curie, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->curie, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->curie, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->curie, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->curie, 0x1e94, 1); + so_data (so, 0x00000001); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv40_screen_destroy; + + screen->pipe.get_name = nv40_screen_get_name; + screen->pipe.get_vendor = nv40_screen_get_vendor; + screen->pipe.get_param = nv40_screen_get_param; + screen->pipe.get_paramf = nv40_screen_get_paramf; + + screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + + screen->pipe.surface_map = nv40_surface_map; + screen->pipe.surface_unmap = nv40_surface_unmap; + + nv40_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 00000000000..4500aa0e5cc --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,37 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv40_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV40_STATE_MAX]; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ + return (struct nv40_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h new file mode 100644 index 00000000000..854dccf5486 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -0,0 +1,556 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +# define NV40_VP_INST_COND_FL 0 +# define NV40_VP_INST_COND_LT 1 +# define NV40_VP_INST_COND_EQ 2 +# define NV40_VP_INST_COND_LE 3 +# define NV40_VP_INST_COND_GT 4 +# define NV40_VP_INST_COND_NE 5 +# define NV40_VP_INST_COND_GE 6 +# define NV40_VP_INST_COND_TR 7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_MUL 0x02 +# define NV40_VP_INST_OP_ADD 0x03 +# define NV40_VP_INST_OP_MAD 0x04 +# define NV40_VP_INST_OP_DP3 0x05 +# define NV40_VP_INST_OP_DPH 0x06 +# define NV40_VP_INST_OP_DP4 0x07 +# define NV40_VP_INST_OP_DST 0x08 +# define NV40_VP_INST_OP_MIN 0x09 +# define NV40_VP_INST_OP_MAX 0x0A +# define NV40_VP_INST_OP_SLT 0x0B +# define NV40_VP_INST_OP_SGE 0x0C +# define NV40_VP_INST_OP_ARL 0x0D +# define NV40_VP_INST_OP_FRC 0x0E +# define NV40_VP_INST_OP_FLR 0x0F +# define NV40_VP_INST_OP_SEQ 0x10 +# define NV40_VP_INST_OP_SFL 0x11 +# define NV40_VP_INST_OP_SGT 0x12 +# define NV40_VP_INST_OP_SLE 0x13 +# define NV40_VP_INST_OP_SNE 0x14 +# define NV40_VP_INST_OP_STR 0x15 +# define NV40_VP_INST_OP_SSG 0x16 +# define NV40_VP_INST_OP_ARR 0x17 +# define NV40_VP_INST_OP_ARA 0x18 +# define NV40_VP_INST_OP_TXL 0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_RCP 0x02 +# define NV40_VP_INST_OP_RCC 0x03 +# define NV40_VP_INST_OP_RSQ 0x04 +# define NV40_VP_INST_OP_EXP 0x05 +# define NV40_VP_INST_OP_LOG 0x06 +# define NV40_VP_INST_OP_LIT 0x07 +# define NV40_VP_INST_OP_BRA 0x09 +# define NV40_VP_INST_OP_CAL 0x0B +# define NV40_VP_INST_OP_RET 0x0C +# define NV40_VP_INST_OP_LG2 0x0D +# define NV40_VP_INST_OP_EX2 0x0E +# define NV40_VP_INST_OP_SIN 0x0F +# define NV40_VP_INST_OP_COS 0x10 +# define NV40_VP_INST_OP_PUSHA 0x13 +# define NV40_VP_INST_OP_POPA 0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +# define NV40_VP_INST_IN_POS 0 +# define NV40_VP_INST_IN_WEIGHT 1 +# define NV40_VP_INST_IN_NORMAL 2 +# define NV40_VP_INST_IN_COL0 3 +# define NV40_VP_INST_IN_COL1 4 +# define NV40_VP_INST_IN_FOGC 5 +# define NV40_VP_INST_IN_TC0 8 +# define NV40_VP_INST_IN_TC(n) (8+n) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST_LAST (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * DP2 - MUL + ADD + * NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT 1 +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_OUT_REG_HALF (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT 9 +#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV40_FP_OP_OUT_X (1 << 9) +# define NV40_FP_OP_OUT_Y (1 <<10) +# define NV40_FP_OP_OUT_Z (1 <<11) +# define NV40_FP_OP_OUT_W (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT 13 +#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV40_FP_OP_INPUT_SRC_COL0 0x1 +# define NV40_FP_OP_INPUT_SRC_COL1 0x2 +# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV40_FP_OP_INPUT_SRC_TC0 0x4 +# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT 17 +#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT 22 +#define NV40_FP_OP_PRECISION_MASK (3 << 22) +# define NV40_FP_PRECISION_FP32 0 +# define NV40_FP_PRECISION_FP16 1 +# define NV40_FP_PRECISION_FX12 2 +#define NV40_FP_OP_OPCODE_SHIFT 24 +#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV40_FP_OP_OPCODE_NOP 0x00 +# define NV40_FP_OP_OPCODE_MOV 0x01 +# define NV40_FP_OP_OPCODE_MUL 0x02 +# define NV40_FP_OP_OPCODE_ADD 0x03 +# define NV40_FP_OP_OPCODE_MAD 0x04 +# define NV40_FP_OP_OPCODE_DP3 0x05 +# define NV40_FP_OP_OPCODE_DP4 0x06 +# define NV40_FP_OP_OPCODE_DST 0x07 +# define NV40_FP_OP_OPCODE_MIN 0x08 +# define NV40_FP_OP_OPCODE_MAX 0x09 +# define NV40_FP_OP_OPCODE_SLT 0x0A +# define NV40_FP_OP_OPCODE_SGE 0x0B +# define NV40_FP_OP_OPCODE_SLE 0x0C +# define NV40_FP_OP_OPCODE_SGT 0x0D +# define NV40_FP_OP_OPCODE_SNE 0x0E +# define NV40_FP_OP_OPCODE_SEQ 0x0F +# define NV40_FP_OP_OPCODE_FRC 0x10 +# define NV40_FP_OP_OPCODE_FLR 0x11 +# define NV40_FP_OP_OPCODE_KIL 0x12 +# define NV40_FP_OP_OPCODE_PK4B 0x13 +# define NV40_FP_OP_OPCODE_UP4B 0x14 +/* DDX/DDY can only write to XY */ +# define NV40_FP_OP_OPCODE_DDX 0x15 +# define NV40_FP_OP_OPCODE_DDY 0x16 +# define NV40_FP_OP_OPCODE_TEX 0x17 +# define NV40_FP_OP_OPCODE_TXP 0x18 +# define NV40_FP_OP_OPCODE_TXD 0x19 +# define NV40_FP_OP_OPCODE_RCP 0x1A +# define NV40_FP_OP_OPCODE_EX2 0x1C +# define NV40_FP_OP_OPCODE_LG2 0x1D +# define NV40_FP_OP_OPCODE_STR 0x20 +# define NV40_FP_OP_OPCODE_SFL 0x21 +# define NV40_FP_OP_OPCODE_COS 0x22 +# define NV40_FP_OP_OPCODE_SIN 0x23 +# define NV40_FP_OP_OPCODE_PK2H 0x24 +# define NV40_FP_OP_OPCODE_UP2H 0x25 +# define NV40_FP_OP_OPCODE_PK4UB 0x27 +# define NV40_FP_OP_OPCODE_UP4UB 0x28 +# define NV40_FP_OP_OPCODE_PK2US 0x29 +# define NV40_FP_OP_OPCODE_UP2US 0x2A +# define NV40_FP_OP_OPCODE_DP2A 0x2E +# define NV40_FP_OP_OPCODE_TXL 0x2F +# define NV40_FP_OP_OPCODE_TXB 0x31 +# define NV40_FP_OP_OPCODE_DIV 0x3A +# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +# define NV40_FP_OP_BRA_OPCODE_IF 0x2 +# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +# define NV40_FP_OP_BRA_OPCODE_REP 0x4 +# define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT 18 +#define NV40_FP_OP_COND_MASK (0x07 << 18) +# define NV40_FP_OP_COND_FL 0 +# define NV40_FP_OP_COND_LT 1 +# define NV40_FP_OP_COND_EQ 2 +# define NV40_FP_OP_COND_LE 3 +# define NV40_FP_OP_COND_GT 4 +# define NV40_FP_OP_COND_NE 5 +# define NV40_FP_OP_COND_GE 6 +# define NV40_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT 28 +#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X 0 +#define NV40_FP_OP_DST_SCALE_2X 1 +#define NV40_FP_OP_DST_SCALE_4X 2 +#define NV40_FP_OP_DST_SCALE_8X 3 +#define NV40_FP_OP_DST_SCALE_INV_2X 5 +#define NV40_FP_OP_DST_SCALE_INV_4X 6 +#define NV40_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT 0 +#define NV40_FP_REG_TYPE_MASK (3 << 0) +# define NV40_FP_REG_TYPE_TEMP 0 +# define NV40_FP_REG_TYPE_INPUT 1 +# define NV40_FP_REG_TYPE_CONST 2 +#define NV40_FP_REG_SRC_SHIFT 2 +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NV40_FP_REG_SRC_HALF (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT 9 +#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT 9 +#define NV40_FP_REG_SWZ_X_MASK (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT 11 +#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT 13 +#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT 15 +#define NV40_FP_REG_SWZ_W_MASK (3 << 15) +# define NV40_FP_SWIZZLE_X 0 +# define NV40_FP_SWIZZLE_Y 1 +# define NV40_FP_SWIZZLE_Z 2 +# define NV40_FP_SWIZZLE_W 3 +#define NV40_FP_REG_NEGATE (1 << 17) + +#ifndef NV40_SHADER_NO_FUCKEDNESS +#define NV40SR_NONE 0 +#define NV40SR_OUTPUT 1 +#define NV40SR_INPUT 2 +#define NV40SR_TEMP 3 +#define NV40SR_CONST 4 + +struct nv40_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int type, int index) +{ + struct nv40_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ + struct nv40_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} +#endif + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c new file mode 100644 index 00000000000..2eff25aa836 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -0,0 +1,740 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +static void * +nv40_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | + nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + if (cso->logicop_enable) { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->blend = hwcso; + nv40->dirty |= NV40_NEW_BLEND; +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV40TCL_TEX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + } + + return ret >> NV40TCL_TEX_WRAP_S_SHIFT; +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv40_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv40_sampler_state)); + + ps->fmt = 0; + if (!cso->normalized_coords) + ps->fmt |= NV40TCL_TEX_FORMAT_RECT; + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy >= 2.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; + break; + } + break; + } + + ps->filt = filter; + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } + + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + } + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv40->tex_sampler[unit] = sampler[unit]; + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_samplers; unit++) { + nv40->tex_sampler[unit] = NULL; + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_samplers = nr; + nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], miptree[unit]); + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], NULL); + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_textures = nr; + nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, curie, NV40TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : + NV40TCL_SHADE_MODEL_SMOOTH); + + so_method(so, curie, NV40TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, curie, NV40TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, curie, NV40TCL_POINT_SPRITE, 1); + if (cso->point_sprite) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->rasterizer = hwcso; + nv40->dirty |= NV40_NEW_RAST; + nv40->draw_dirty |= NV40_NEW_RAST; +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; + + so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->zsa = hwcso; + nv40->dirty |= NV40_NEW_ZSA; +} + +static void +nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv40_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->vertprog = hwcso; + nv40->dirty |= NV40_NEW_VERTPROG; + nv40->draw_dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nv40->draw, vp->draw); + nv40_vertprog_destroy(nv40, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv40_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->fragprog = hwcso; + nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_fragment_program *fp = hwcso; + + nv40_fragprog_destroy(nv40, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->blend_colour = *bcol; + nv40->dirty |= NV40_NEW_BCOL; +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->clip = *clip; + nv40->dirty |= NV40_NEW_UCP; + nv40->draw_dirty |= NV40_NEW_UCP; +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->constbuf[shader] = buf->buffer; + nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nv40->dirty |= NV40_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv40->dirty |= NV40_NEW_FRAGPROG; + } +} + +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->framebuffer = *fb; + nv40->dirty |= NV40_NEW_FB; +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->stipple, stipple->stipple, 4 * 32); + nv40->dirty |= NV40_NEW_STIPPLE; +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->scissor = *s; + nv40->dirty |= NV40_NEW_SCISSOR; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->viewport = *vpt; + nv40->dirty |= NV40_NEW_VIEWPORT; + nv40->draw_dirty |= NV40_NEW_VIEWPORT; +} + +static void +nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); + nv40->vtxbuf_nr = count; + + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); + nv40->vtxelt_nr = count; + + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->edgeflags = bitfield; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_blend_state = nv40_blend_state_create; + nv40->pipe.bind_blend_state = nv40_blend_state_bind; + nv40->pipe.delete_blend_state = nv40_blend_state_delete; + + nv40->pipe.create_sampler_state = nv40_sampler_state_create; + nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; + nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; + nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + + nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; + nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; + nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + + nv40->pipe.create_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_create; + nv40->pipe.bind_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_bind; + nv40->pipe.delete_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_delete; + + nv40->pipe.create_vs_state = nv40_vp_state_create; + nv40->pipe.bind_vs_state = nv40_vp_state_bind; + nv40->pipe.delete_vs_state = nv40_vp_state_delete; + + nv40->pipe.create_fs_state = nv40_fp_state_create; + nv40->pipe.bind_fs_state = nv40_fp_state_bind; + nv40->pipe.delete_fs_state = nv40_fp_state_delete; + + nv40->pipe.set_blend_color = nv40_set_blend_color; + nv40->pipe.set_clip_state = nv40_set_clip_state; + nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; + nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; + nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; + nv40->pipe.set_scissor_state = nv40_set_scissor_state; + nv40->pipe.set_viewport_state = nv40_set_viewport_state; + + nv40->pipe.set_edgeflags = nv40_set_edgeflags; + nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; + nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h new file mode 100644 index 00000000000..9c55903ae30 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -0,0 +1,91 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv40_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv40_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv40_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv40_vertex_program { + struct pipe_shader_state pipe; + + struct draw_vertex_shader *draw; + + boolean translated; + + struct pipe_clip_state ucp; + + struct nv40_vertex_program_exec *insns; + unsigned nr_insns; + struct nv40_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; + uint32_t clip_ctrl; + struct nouveau_stateobj *so; +}; + +struct nv40_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv40_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv40_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + struct nouveau_stateobj *so; +}; + +struct nv40_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct pipe_texture *shadow_tex; + struct pipe_surface *shadow_surface; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 00000000000..95e6d7394f4 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ + so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { + .validate = nv40_state_blend_validate, + .dirty = { + .pipe = NV40_NEW_BLEND, + .hw = NV40_STATE_BLEND + } +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv40->blend_colour; + + so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { + .validate = nv40_state_blend_colour_validate, + .dirty = { + .pipe = NV40_NEW_BCOL, + .hw = NV40_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c new file mode 100644 index 00000000000..ce859def108 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -0,0 +1,184 @@ +#include "nv40_context.h" +#include "nv40_state.h" +#include "draw/draw_context.h" + +static struct nv40_state_entry *render_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vbo, + NULL +}; + +static struct nv40_state_entry *swtnl_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vtxfmt, + NULL +}; + +static void +nv40_state_do_validate(struct nv40_context *nv40, + struct nv40_state_entry **states) +{ + const struct pipe_framebuffer_state *fb = &nv40->framebuffer; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + while (*states) { + struct nv40_state_entry *e = *states; + + if (nv40->dirty & e->dirty.pipe) { + if (e->validate(nv40)) + nv40->state.dirty |= (1ULL << e->dirty.hw); + } + + states++; + } + nv40->dirty = 0; +} + +void +nv40_state_emit(struct nv40_context *nv40) +{ + struct nv40_state *state = &nv40->state; + struct nv40_screen *screen = nv40->screen; + unsigned i, samplers; + uint64_t states; + + if (nv40->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV40_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv40->pctx_id; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv40->screen->state[i]); + if (state->hw[i]) + so_emit(nv40->nvws, nv40->screen->state[i]); + states &= ~(1ULL << i); + } + + if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | + (1ULL << NV40_STATE_FRAGTEX0))) { + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); + } + + state->dirty = 0; + + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv40->nvws, + state->hw[NV40_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); + if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); +} + +boolean +nv40_state_validate(struct nv40_context *nv40) +{ + boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; + + if (nv40->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv40->fallback_swtnl & nv40->dirty) + != nv40->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv40->pipe.flush(&nv40->pipe, 0, NULL); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS); + nv40->render_mode = HW; + } + + nv40_state_do_validate(nv40, render_states); + if (nv40->fallback_swtnl || nv40->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nv40_state_validate_swtnl(struct nv40_context *nv40) +{ + struct draw_context *draw = nv40->draw; + + /* Setup for swtnl */ + if (nv40->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); + nv40->pipe.flush(&nv40->pipe, 0, NULL); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS); + nv40->render_mode = SWTNL; + } + + if (nv40->draw_dirty & NV40_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nv40->vertprog->draw); + + if (nv40->draw_dirty & NV40_NEW_RAST) + draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + + if (nv40->draw_dirty & NV40_NEW_UCP) + draw_set_clip_state(draw, &nv40->clip); + + if (nv40->draw_dirty & NV40_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nv40->viewport); + + if (nv40->draw_dirty & NV40_NEW_ARRAYS) { + draw_set_edgeflags(draw, nv40->edgeflags); + draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); + draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); + } + + nv40_state_do_validate(nv40, swtnl_states); + if (nv40->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); + return FALSE; + } + + nv40->draw_dirty = 0; + return TRUE; +} + diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 00000000000..454abad31f4 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,162 @@ +#include "nv40_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surface) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + return mt->buffer; +} + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ + struct pipe_framebuffer_state *fb = &nv40->framebuffer; + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = 0; + for (i = 0; i < fb->nr_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | + log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | + log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; + } + else + rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); + so_reloc (so, nv40_surface_buffer(rt[0]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); + so_data (so, rt[0]->stride); + so_reloc (so, nv40_surface_buffer(rt[0]), rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); + so_reloc (so, nv40_surface_buffer(rt[1]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); + so_reloc (so, nv40_surface_buffer(rt[1]), rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->stride); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, nv40_surface_buffer(rt[2]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, nv40_surface_buffer(rt[2]), rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->stride); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, nv40_surface_buffer(rt[3]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, nv40_surface_buffer(rt[3]), rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->stride); + } + + if (zeta_format) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); + so_reloc (so, nv40_surface_buffer(zeta), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); + so_reloc (so, nv40_surface_buffer(zeta), zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->stride); + } + + so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv40->screen->curie, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + so_ref(so, &nv40->state.hw[NV40_STATE_FB]); + return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { + .validate = nv40_state_framebuffer_validate, + .dirty = { + .pipe = NV40_NEW_FB, + .hw = NV40_STATE_FB + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 00000000000..9ecda5990f0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ + so_ref(nv40->rasterizer->so, + &nv40->state.hw[NV40_STATE_RAST]); + return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { + .validate = nv40_state_rasterizer_validate, + .dirty = { + .pipe = NV40_NEW_RAST, + .hw = NV40_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 00000000000..285239ef419 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->scissor; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_SCISSOR] && + (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) + return FALSE; + nv40->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); + if (nv40->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); + return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { + .validate = nv40_state_scissor_validate, + .dirty = { + .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, + .hw = NV40_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 00000000000..b51024ad9b2 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv40->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); + return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { + .validate = nv40_state_stipple_validate, + .dirty = { + .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, + .hw = NV40_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 00000000000..869a55b4053 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,67 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ + struct pipe_viewport_state *vpt = &nv40->viewport; + struct nouveau_stateobj *so; + unsigned bypass; + + if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv40->state.hw[NV40_STATE_VIEWPORT] && + (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && + nv40->state.viewport_bypass == bypass) + return FALSE; + nv40->state.viewport_bypass = bypass; + + so = so_new(11, 0); + if (!bypass) { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 1); + } else { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 0x110); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); + return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { + .validate = nv40_state_viewport_validate, + .dirty = { + .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, + .hw = NV40_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 00000000000..fb760677c88 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ + so_ref(nv40->zsa->so, + &nv40->state.hw[NV40_STATE_ZSA]); + return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { + .validate = nv40_state_zsa_validate, + .dirty = { + .pipe = NV40_NEW_ZSA, + .hw = NV40_STATE_ZSA + } +}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c new file mode 100644 index 00000000000..c4a5fb20d97 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + + if (do_flip) { + desty += height; + while (height--) { + eng2d->copy(eng2d, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + return; + } + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ + nv40->pipe.surface_copy = nv40_surface_copy; + nv40->pipe.surface_fill = nv40_surface_fill; +} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c new file mode 100644 index 00000000000..8f1834628f7 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -0,0 +1,555 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV40TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV40TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, + unsigned ib_size) +{ + struct pipe_screen *pscreen = &nv40->screen->pipe; + unsigned type; + + if (!ib) { + nv40->idxbuf = NULL; + nv40->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv40->idxbuf || + type != nv40->idxbuf_format) { + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->idxbuf = ib; + nv40->idxbuf_format = type; + } + + return TRUE; +} + +static boolean +nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; + unsigned type, ncomp; + void *map; + + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV40TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + switch (ncomp) { + case 4: + so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); + break; + case 3: + so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + break; + case 2: + so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + break; + case 1: + so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +boolean +nv40_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; + + nv40_vbo_set_idxbuf(nv40, NULL, 0); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + + while (count) { + unsigned vc, nr; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static boolean +nv40_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map; + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + switch (ib_size) { + case 1: + nv40_draw_elements_u08(nv40, map, mode, start, count); + break; + case 2: + nv40_draw_elements_u16(nv40, map, mode, start, count); + break; + case 4: + nv40_draw_elements_u32(nv40, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + ws->buffer_unmap(ws, ib); + return TRUE; +} + +static boolean +nv40_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; + + while (count) { + unsigned nr, vc; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + return TRUE; +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + boolean idxbuf; + + idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + + if (idxbuf) { + nv40_draw_elements_vbo(pipe, mode, start, count); + } else { + nv40_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *curie = nv40->screen->curie; + struct pipe_buffer *ib = nv40->idxbuf; + unsigned ib_format = nv40->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + if (nv40->edgeflags) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + return FALSE; + } + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); + + for (hw = 0; hw < nv40->vtxelt_nr; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->stride) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV40TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, curie, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); + so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); + so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); + return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { + .validate = nv40_vbo_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS, + .hw = 0, + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c new file mode 100644 index 00000000000..1392fe956f7 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -0,0 +1,1070 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nv40_vpc { + struct nv40_vertex_program *vp; + + struct nv40_vertex_program_exec *vpi; + + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_address; + struct nv40_sreg *r_temp; + + struct nv40_sreg *imm; + unsigned nr_imm; + + unsigned hpos_idx; +}; + +static struct nv40_sreg +temp(struct nv40_vpc *vpc) +{ + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + +static struct nv40_sreg +constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv40_vertex_program *vp = vpc->vp; + struct nv40_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv40_sr(NV40SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_TEMP: + sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV40SR_INPUT: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + break; + case NV40SR_CONST: + sr |= (NV40_VP_SRC_REG_TYPE_CONST << + NV40_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV40SR_NONE: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + + switch (pos) { + case 0: + hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> + NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << + NV40_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> + NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << + NV40_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ + struct nv40_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV40SR_TEMP: + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + break; + case NV40SR_OUTPUT: + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + case NV40_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + default: + break; + } + + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + break; + default: + assert(0); + } +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, + struct nv40_sreg s2) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = vpc->r_temp[fsrc->SrcRegister.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv40_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = vpc->r_result[fdst->DstRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + dst = vpc->r_temp[fdst->DstRegister.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->DstRegister.Index]; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= tgsi_mask(1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= tgsi_mask(1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(vpc); + + if (mask) + arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(vpc); + arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); + arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv40_sreg src[3], dst, tmp; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int mask; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(vpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(vpc); + return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->DeclarationRange.First; + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_VP_INST_DEST_POS; + vpc->hpos_idx = idx; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV40_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + return TRUE; +} + +static boolean +nv40_vertprog_prepare(struct nv40_vpc *vpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, high_addr = -1, nr_imm = 0, i; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break; +#if 0 /* this would be nice.. except gallium doesn't track it */ + case TGSI_FILE_ADDRESS: + if (fdec->DeclarationRange.Last > high_addr) { + high_addr = + fdec->DeclarationRange.Last; + } + break; +#endif + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + return FALSE; + break; + default: + break; + } + } + break; +#if 1 /* yay, parse instructions looking for address regs instead */ + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + const struct tgsi_full_dst_register *fdst; + + finst = &p.FullToken.FullInstruction; + fdst = &finst->FullDstRegisters[0]; + + if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { + if (fdst->DstRegister.Index > high_addr) + high_addr = fdst->DstRegister.Index; + } + + } + break; +#endif + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + assert(vpc->imm); + } + + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = temp(vpc); + } + + vpc->r_temps_discard = 0; + return TRUE; +} + +static void +nv40_vertprog_translate(struct nv40_context *nv40, + struct nv40_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv40_vpc *vpc = NULL; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int i; + + vpc = CALLOC(1, sizeof(struct nv40_vpc)); + if (!vpc) + return; + vpc->vp = vp; + + if (!nv40_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code the the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +// assert(imm->Immediate.Size == 4); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv40_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { + struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_POS); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_CLIP(i)); + struct nv40_sreg ceqn = constant(vpc, -1, + nv40->clip.ucp[i][0], + nv40->clip.ucp[i][1], + nv40->clip.ucp[i][2], + nv40->clip.ucp[i][3]); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (vpc->r_temp) + FREE(vpc->r_temp); + if (vpc->r_address) + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); + FREE(vpc); +} + +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) +{ + struct nouveau_winsys *nvws = nv40->nvws; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + if (nv40->render_mode == HW) { + vp = nv40->vertprog; + constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + + if ((nv40->dirty & NV40_NEW_UCP) || + memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { + nv40_vertprog_destroy(nv40, vp); + memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + } + } else { + vp = nv40->swtnl.vertprog; + constbuf = NULL; + } + + /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; + nv40_vertprog_translate(nv40, vp); + if (!vp->translated) { + nv40->fallback_swtnl |= NV40_NEW_VERTPROG; + return FALSE; + } + +check_gpu_resources: + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv40->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(7, 0); + so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + so_data (so, vp->clip_ctrl); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv40->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV40_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv40_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) + ws->buffer_unmap(ws, constbuf); + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { + so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv40->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); +} + +struct nv40_state_entry nv40_state_vertprog = { + .validate = nv40_vertprog_validate, + .dirty = { + .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, + .hw = NV40_STATE_VERTPROG, + } +}; + |