From b8e56d4cddbd9c491b940e3ce5974c526802c752 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 18:37:43 +0100 Subject: nv30, nv40: unify all structures and headers, except shaders This patch unifies nv[34]0_screen.h, nv[34]0_context.h and nv[34]0_state.h The unified files are put in a new "nvfx" directory. nv30_context.h and nv40_context.h still exist to hold the function prototypes and include nvfx_context.h nv[34]0_screen.h and nv[34]0_state.h are deleted, replaced by the unified versions. nv40 includes some extra fields for swtnl and user clip planes support. These fields will be unused on nv30 until that functionality gets added to it too (by unification with nv40). --- src/gallium/drivers/nvfx/nvfx_context.h | 182 ++++++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_screen.h | 37 +++++++ src/gallium/drivers/nvfx/nvfx_state.h | 91 ++++++++++++++++ 3 files changed, 310 insertions(+) create mode 100644 src/gallium/drivers/nvfx/nvfx_context.h create mode 100644 src/gallium/drivers/nvfx/nvfx_screen.h create mode 100644 src/gallium/drivers/nvfx/nvfx_state.h (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h new file mode 100644 index 00000000000..9e89d8409ff --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -0,0 +1,182 @@ +#ifndef __NVFX_CONTEXT_H__ +#define __NVFX_CONTEXT_H__ + +#include + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nvfx_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nvfx_state_index { + NVFX_STATE_FB = 0, + NVFX_STATE_VIEWPORT = 1, + NVFX_STATE_BLEND = 2, + NVFX_STATE_RAST = 3, + NVFX_STATE_ZSA = 4, + NVFX_STATE_BCOL = 5, + NVFX_STATE_CLIP = 6, + NVFX_STATE_SCISSOR = 7, + NVFX_STATE_STIPPLE = 8, + NVFX_STATE_FRAGPROG = 9, + NVFX_STATE_VERTPROG = 10, + NVFX_STATE_FRAGTEX0 = 11, + NVFX_STATE_FRAGTEX1 = 12, + NVFX_STATE_FRAGTEX2 = 13, + NVFX_STATE_FRAGTEX3 = 14, + NVFX_STATE_FRAGTEX4 = 15, + NVFX_STATE_FRAGTEX5 = 16, + NVFX_STATE_FRAGTEX6 = 17, + NVFX_STATE_FRAGTEX7 = 18, + NVFX_STATE_FRAGTEX8 = 19, + NVFX_STATE_FRAGTEX9 = 20, + NVFX_STATE_FRAGTEX10 = 21, + NVFX_STATE_FRAGTEX11 = 22, + NVFX_STATE_FRAGTEX12 = 23, + NVFX_STATE_FRAGTEX13 = 24, + NVFX_STATE_FRAGTEX14 = 25, + NVFX_STATE_FRAGTEX15 = 26, + NVFX_STATE_VERTTEX0 = 27, + NVFX_STATE_VERTTEX1 = 28, + NVFX_STATE_VERTTEX2 = 29, + NVFX_STATE_VERTTEX3 = 30, + NVFX_STATE_VTXBUF = 31, + NVFX_STATE_VTXFMT = 32, + NVFX_STATE_VTXATTR = 33, + NVFX_STATE_SR = 34, + NVFX_STATE_MAX = 35 +}; + +#include "nvfx_screen.h" + +#define NVFX_NEW_BLEND (1 << 0) +#define NVFX_NEW_RAST (1 << 1) +#define NVFX_NEW_ZSA (1 << 2) +#define NVFX_NEW_SAMPLER (1 << 3) +#define NVFX_NEW_FB (1 << 4) +#define NVFX_NEW_STIPPLE (1 << 5) +#define NVFX_NEW_SCISSOR (1 << 6) +#define NVFX_NEW_VIEWPORT (1 << 7) +#define NVFX_NEW_BCOL (1 << 8) +#define NVFX_NEW_VERTPROG (1 << 9) +#define NVFX_NEW_FRAGPROG (1 << 10) +#define NVFX_NEW_ARRAYS (1 << 11) +#define NVFX_NEW_UCP (1 << 12) +#define NVFX_NEW_SR (1 << 13) + +struct nvfx_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nvfx_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nvfx_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nvfx_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NVFX_STATE_MAX]; +}; + +struct nvfx_vtxelt_state { + struct pipe_vertex_element pipe[16]; + unsigned num_elements; +}; + +struct nvfx_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nvfx_screen *screen; + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nvfx_state state; + struct { + struct nvfx_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; + + /* Context state */ + unsigned dirty, draw_dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nvfx_vertex_program *vertprog; + struct nvfx_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nvfx_rasterizer_state *rasterizer; + struct nvfx_zsa_state *zsa; + struct nvfx_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nvfx_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct nvfx_vtxelt_state *vtxelt; +}; + +static INLINE struct nvfx_context * +nvfx_context(struct pipe_context *pipe) +{ + return (struct nvfx_context *)pipe; +} + +struct nvfx_state_entry { + boolean (*validate)(struct nvfx_context *nvfx); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h new file mode 100644 index 00000000000..b56f2d4b3f3 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -0,0 +1,37 @@ +#ifndef __NVFX_SCREEN_H__ +#define __NVFX_SCREEN_H__ + +#include "nouveau/nouveau_screen.h" +#include "nouveau/nv04_surface_2d.h" + +struct nvfx_screen { + struct nouveau_screen base; + + struct nouveau_winsys *nvws; + + struct nvfx_context *cur_ctx; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *eng3d; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NVFX_STATE_MAX]; +}; + +static INLINE struct nvfx_screen * +nvfx_screen(struct pipe_screen *screen) +{ + return (struct nvfx_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h new file mode 100644 index 00000000000..b243b1020fb --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -0,0 +1,91 @@ +#ifndef __NVFX_STATE_H__ +#define __NVFX_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nvfx_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nvfx_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nvfx_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nvfx_vertex_program { + struct pipe_shader_state pipe; + + struct draw_vertex_shader *draw; + + boolean translated; + + struct pipe_clip_state ucp; + + struct nvfx_vertex_program_exec *insns; + unsigned nr_insns; + struct nvfx_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; + uint32_t clip_ctrl; + struct nouveau_stateobj *so; +}; + +struct nvfx_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nvfx_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nvfx_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + struct nouveau_stateobj *so; +}; + +#define NVFX_MAX_TEXTURE_LEVELS 16 + +struct nvfx_miptree { + struct pipe_texture base; + struct nouveau_bo *bo; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[NVFX_MAX_TEXTURE_LEVELS]; +}; + +#endif -- cgit v1.2.3 From 6518a1c853e82a42b28027b1304babd4f02f98ef Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 13:40:49 +0100 Subject: nv30, nv40: add is_nv4x member to context and screen structs This will make it faster to check for nv40. --- src/gallium/drivers/nv30/nv30_context.c | 2 ++ src/gallium/drivers/nv40/nv40_context.c | 2 ++ src/gallium/drivers/nv40/nv40_screen.c | 3 +++ src/gallium/drivers/nvfx/nvfx_context.h | 2 ++ src/gallium/drivers/nvfx/nvfx_screen.h | 2 ++ 5 files changed, 11 insertions(+) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 628b50d8dc3..afed8bb952a 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -71,6 +71,8 @@ nv30_create(struct pipe_screen *pscreen, void *priv) screen->base.channel->user_private = nvfx; screen->base.channel->flush_notify = nv30_state_flush_notify; + nvfx->is_nv4x = screen->is_nv4x; + nv30_init_query_functions(nvfx); nv30_init_surface_functions(nvfx); nv30_init_state_functions(nvfx); diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 721b5134388..6cc3a339e67 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -71,6 +71,8 @@ nv40_create(struct pipe_screen *pscreen, void *priv) screen->base.channel->user_private = nvfx; screen->base.channel->flush_notify = nv40_state_flush_notify; + nvfx->is_nv4x = screen->is_nv4x; + nv40_init_query_functions(nvfx); nv40_init_surface_functions(nvfx); nv40_init_state_functions(nvfx); diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 0fc8e187504..c64864d58db 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -184,6 +184,9 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) if (!screen) return NULL; + + screen->is_nv4x = ~0; + pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 9e89d8409ff..87bad54e630 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -118,6 +118,8 @@ struct nvfx_context { struct nouveau_winsys *nvws; struct nvfx_screen *screen; + unsigned is_nv4x; /* either 0 or ~0 */ + struct draw_context *draw; /* HW state derived from pipe states */ diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index b56f2d4b3f3..e076b876b02 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -11,6 +11,8 @@ struct nvfx_screen { struct nvfx_context *cur_ctx; + unsigned is_nv4x; /* either 0 or ~0 */ + /* HW graphics objects */ struct nv04_surface_2d *eng2d; struct nouveau_grobj *eng3d; -- cgit v1.2.3 From 6321a183319fdcb2ebee757b7f0922efe3f919db Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 19:52:22 +0100 Subject: nvfx: add nvfx directory to build system Will be used to hold source files unified between nv30 and nv40. Eventually all nv30 and nv40 code will be moved there and the nv30 and nv40 directories will be removed. --- configure.ac | 2 +- src/gallium/drivers/nvfx/Makefile | 11 +++++++++++ src/gallium/winsys/drm/nouveau/dri/Makefile | 7 +++++++ src/gallium/winsys/drm/nouveau/egl/Makefile | 7 +++++++ src/gallium/winsys/drm/nouveau/xorg/Makefile | 7 +++++++ 5 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/nvfx/Makefile (limited to 'src/gallium/drivers/nvfx') diff --git a/configure.ac b/configure.ac index 35fbcd9d85b..eb271e9d515 100644 --- a/configure.ac +++ b/configure.ac @@ -1360,7 +1360,7 @@ AC_ARG_ENABLE([gallium-nouveau], [enable_gallium_nouveau=no]) if test "x$enable_gallium_nouveau" = xyes; then GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS nouveau" - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nv30 nv40 nv50" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv30 nv40 nv50" fi dnl diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile new file mode 100644 index 00000000000..05cdda0cc02 --- /dev/null +++ b/src/gallium/drivers/nvfx/Makefile @@ -0,0 +1,11 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nvfx + +nvfx_dummy.c: + touch nvfx_dummy.c + +C_SOURCES = nvfx_dummy.c + +include ../../Makefile.template diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile index 7e95f79d03c..0cc60395ffe 100644 --- a/src/gallium/winsys/drm/nouveau/dri/Makefile +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -3,11 +3,18 @@ include $(TOP)/configs/current LIBNAME = nouveau_dri.so +# hideous hack +-Wl,--start-group: +-Wl,--end-group: + PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ + -Wl,--start-group \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ + -Wl,--end-group \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a diff --git a/src/gallium/winsys/drm/nouveau/egl/Makefile b/src/gallium/winsys/drm/nouveau/egl/Makefile index 2c352603320..0f5e6d64aa4 100644 --- a/src/gallium/winsys/drm/nouveau/egl/Makefile +++ b/src/gallium/winsys/drm/nouveau/egl/Makefile @@ -5,10 +5,17 @@ EGL_DRIVER_NAME = nouveau EGL_DRIVER_SOURCES = dummy.c EGL_DRIVER_LIBS = -ldrm_nouveau +# hideous hack +-Wl,--start-group: +-Wl,--end-group: + EGL_DRIVER_PIPES = \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ + -Wl,--start-group \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv40/libnvfx.a \ + -Wl,--end-group \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a diff --git a/src/gallium/winsys/drm/nouveau/xorg/Makefile b/src/gallium/winsys/drm/nouveau/xorg/Makefile index 179b50230b5..0607d82a6e4 100644 --- a/src/gallium/winsys/drm/nouveau/xorg/Makefile +++ b/src/gallium/winsys/drm/nouveau/xorg/Makefile @@ -15,11 +15,18 @@ INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/src/egl/main +# hideous hack +-Wl,--start-group: +-Wl,--end-group: + LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ + --Wl,--start-group \ $(TOP)/src/gallium/drivers/nv30/libnv30.a \ $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv40/libnvfx.a \ + --Wl,--end-group \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ $(GALLIUM_AUXILIARIES) -- cgit v1.2.3 From c5c7b69bda3fb49fd88b846feb6e65289a04488a Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 20:04:54 +0100 Subject: nv30, nv40: unify identical nv[34]0_clear.c --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_clear.c | 14 -------------- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_context.h | 4 ---- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_clear.c | 14 -------------- src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 4 ---- src/gallium/drivers/nvfx/Makefile | 6 ++---- src/gallium/drivers/nvfx/nvfx_clear.c | 14 ++++++++++++++ src/gallium/drivers/nvfx/nvfx_context.h | 4 ++++ 11 files changed, 22 insertions(+), 44 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_clear.c delete mode 100644 src/gallium/drivers/nv40/nv40_clear.c create mode 100644 src/gallium/drivers/nvfx/nvfx_clear.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index ed02075d131..27f19da75cc 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nv30 C_SOURCES = \ - nv30_clear.c \ nv30_context.c \ nv30_draw.c \ nv30_fragprog.c \ diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c deleted file mode 100644 index e7ba73de7ca..00000000000 --- a/src/gallium/drivers/nv30/nv30_clear.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_clear.h" - -#include "nv30_context.h" - -void -nv30_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index afed8bb952a..74be578705c 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -62,7 +62,7 @@ nv30_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.destroy = nv30_destroy; nvfx->pipe.draw_arrays = nv30_draw_arrays; nvfx->pipe.draw_elements = nv30_draw_elements; - nvfx->pipe.clear = nv30_clear; + nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nv30_flush; nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 2fc148cdedb..8032bcd9797 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -51,10 +51,6 @@ extern void nv30_draw_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -/* nv30_clear.c */ -extern void nv30_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - /* nvfx_context.c */ struct pipe_context * nv30_create(struct pipe_screen *pscreen, void *priv); diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 50e5e72b4ea..031c943de52 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nv40 C_SOURCES = \ - nv40_clear.c \ nv40_context.c \ nv40_draw.c \ nv40_fragprog.c \ diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c deleted file mode 100644 index 79de90434de..00000000000 --- a/src/gallium/drivers/nv40/nv40_clear.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_clear.h" - -#include "nv40_context.h" - -void -nv40_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 6cc3a339e67..cb249dd5d76 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -62,7 +62,7 @@ nv40_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.destroy = nv40_destroy; nvfx->pipe.draw_arrays = nv40_draw_arrays; nvfx->pipe.draw_elements = nv40_draw_elements; - nvfx->pipe.clear = nv40_clear; + nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nv40_flush; nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 7227c4a438b..f9c0a9eb29e 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -57,10 +57,6 @@ extern void nv40_draw_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); -/* nv40_clear.c */ -extern void nv40_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - /* nvfx_context.c */ struct pipe_context * nv40_create(struct pipe_screen *pscreen, void *priv); diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 05cdda0cc02..6959efa390f 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -3,9 +3,7 @@ include $(TOP)/configs/current LIBNAME = nvfx -nvfx_dummy.c: - touch nvfx_dummy.c - -C_SOURCES = nvfx_dummy.c +C_SOURCES = \ + nvfx_clear.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_clear.c b/src/gallium/drivers/nvfx/nvfx_clear.c new file mode 100644 index 00000000000..2be70fcee40 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_clear.c @@ -0,0 +1,14 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_clear.h" + +#include "nvfx_context.h" + +void +nvfx_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil) +{ + util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth, + stencil); +} diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 87bad54e630..0aaa4964e2b 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -181,4 +181,8 @@ struct nvfx_state_entry { } dirty; }; +/* nvfx_clear.c */ +extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil); + #endif -- cgit v1.2.3 From d084d189d03dc89a3161a131f1b386840c06ad61 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 20:07:10 +0100 Subject: nv30, nv40: unify identical nv[34]0_transfer.c --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_transfer.c | 182 ------------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nvfx/Makefile | 3 +- src/gallium/drivers/nvfx/nvfx_context.h | 3 + src/gallium/drivers/nvfx/nvfx_transfer.c | 182 +++++++++++++++++++++++++++++++ 10 files changed, 189 insertions(+), 189 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_transfer.c create mode 100644 src/gallium/drivers/nvfx/nvfx_transfer.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 27f19da75cc..f18295cefc2 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -21,7 +21,6 @@ C_SOURCES = \ nv30_state_viewport.c \ nv30_state_zsa.c \ nv30_surface.c \ - nv30_transfer.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 74be578705c..ee2c465bc6d 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -76,7 +76,7 @@ nv30_create(struct pipe_screen *pscreen, void *priv) nv30_init_query_functions(nvfx); nv30_init_surface_functions(nvfx); nv30_init_state_functions(nvfx); - nv30_init_transfer_functions(nvfx); + nvfx_init_transfer_functions(nvfx); /* Create, configure, and install fallback swtnl path */ nvfx->draw = draw_create(); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 8032bcd9797..9f28d49706f 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -6,7 +6,6 @@ extern void nv30_init_state_functions(struct nvfx_context *nvfx); extern void nv30_init_surface_functions(struct nvfx_context *nvfx); extern void nv30_init_query_functions(struct nvfx_context *nvfx); -extern void nv30_init_transfer_functions(struct nvfx_context *nvfx); extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c deleted file mode 100644 index 3d71df52b90..00000000000 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ /dev/null @@ -1,182 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "nouveau/nouveau_winsys.h" -#include "nv30_context.h" -#include "nvfx_screen.h" -#include "nvfx_state.h" - -struct nv30_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv30_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct pipe_screen *pscreen = pcontext->screen; - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - struct nv30_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv30_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nvfx_screen *nvscreen = nvfx_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, x, y, - w, h); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; -} - -static void -nv30_transfer_del(struct pipe_context *pcontext, - struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = pcontext->screen; - struct nvfx_screen *nvscreen = nvfx_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, - tx->surface, 0, 0, - tx->base.width, tx->base.height); - - pipe_surface_reference(&dst, NULL); - } - - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); -} - -static void * -nv30_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx) -{ - struct pipe_screen *pscreen = pcontext->screen; - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); - - if(!tx->direct) - return map + ns->base.offset; - else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); -} - -static void -nv30_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx) -{ - struct pipe_screen *pscreen = pcontext->screen; - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; - - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nv30_init_transfer_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.get_tex_transfer = nv30_transfer_new; - nvfx->pipe.tex_transfer_destroy = nv30_transfer_del; - nvfx->pipe.transfer_map = nv30_transfer_map; - nvfx->pipe.transfer_unmap = nv30_transfer_unmap; -} diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 031c943de52..8d09ef807f8 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -21,7 +21,6 @@ C_SOURCES = \ nv40_state_viewport.c \ nv40_state_zsa.c \ nv40_surface.c \ - nv40_transfer.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index cb249dd5d76..9934b582eef 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -76,7 +76,7 @@ nv40_create(struct pipe_screen *pscreen, void *priv) nv40_init_query_functions(nvfx); nv40_init_surface_functions(nvfx); nv40_init_state_functions(nvfx); - nv40_init_transfer_functions(nvfx); + nvfx_init_transfer_functions(nvfx); /* Create, configure, and install fallback swtnl path */ nvfx->draw = draw_create(); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index f9c0a9eb29e..e7c6d5ad86d 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -6,7 +6,6 @@ extern void nv40_init_state_functions(struct nvfx_context *nvfx); extern void nv40_init_surface_functions(struct nvfx_context *nvfx); extern void nv40_init_query_functions(struct nvfx_context *nvfx); -extern void nv40_init_transfer_functions(struct nvfx_context *nvfx); extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 6959efa390f..699cbedbc84 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = nvfx C_SOURCES = \ - nvfx_clear.c + nvfx_clear.c \ + nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 0aaa4964e2b..38d1142ff97 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -185,4 +185,7 @@ struct nvfx_state_entry { extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nvfx_transfer.c */ +extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c new file mode 100644 index 00000000000..409b354d582 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -0,0 +1,182 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "nouveau/nouveau_winsys.h" +#include "nvfx_context.h" +#include "nvfx_screen.h" +#include "nvfx_state.h" + +struct nvfx_transfer { + struct pipe_transfer base; + struct pipe_surface *surface; + boolean direct; +}; + +static void +nvfx_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, + struct pipe_texture *template) +{ + memset(template, 0, sizeof(struct pipe_texture)); + template->target = pt->target; + template->format = pt->format; + template->width0 = width; + template->height0 = height; + template->depth0 = 1; + template->last_level = 0; + template->nr_samples = pt->nr_samples; + + template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | + NOUVEAU_TEXTURE_USAGE_LINEAR; +} + +static struct pipe_transfer * +nvfx_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + struct nvfx_transfer *tx; + struct pipe_texture tx_tex_template, *tx_tex; + + tx = CALLOC_STRUCT(nvfx_transfer); + if (!tx) + return NULL; + + pipe_texture_reference(&tx->base.texture, pt); + tx->base.x = x; + tx->base.y = y; + tx->base.width = w; + tx->base.height = h; + tx->base.stride = mt->level[level].pitch; + tx->base.usage = usage; + tx->base.face = face; + tx->base.level = level; + tx->base.zslice = zslice; + + /* Direct access to texture */ + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || + debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && + pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) + { + tx->direct = true; + tx->surface = pscreen->get_tex_surface(pscreen, pt, + face, level, zslice, + pipe_transfer_buffer_flags(&tx->base)); + return &tx->base; + } + + tx->direct = false; + + nvfx_compatible_transfer_tex(pt, w, h, &tx_tex_template); + + tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); + if (!tx_tex) + { + FREE(tx); + return NULL; + } + + tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch; + + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, + 0, 0, 0, + pipe_transfer_buffer_flags(&tx->base)); + + pipe_texture_reference(&tx_tex, NULL); + + if (!tx->surface) + { + pipe_surface_reference(&tx->surface, NULL); + FREE(tx); + return NULL; + } + + if (usage & PIPE_TRANSFER_READ) { + struct nvfx_screen *nvscreen = nvfx_screen(pscreen); + struct pipe_surface *src; + + src = pscreen->get_tex_surface(pscreen, pt, + face, level, zslice, + PIPE_BUFFER_USAGE_GPU_READ); + + /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ + /* TODO: Check if SIFM can un-swizzle */ + nvscreen->eng2d->copy(nvscreen->eng2d, + tx->surface, 0, 0, + src, x, y, + w, h); + + pipe_surface_reference(&src, NULL); + } + + return &tx->base; +} + +static void +nvfx_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx) +{ + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; + + if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_screen *nvscreen = nvfx_screen(pscreen); + struct pipe_surface *dst; + + dst = pscreen->get_tex_surface(pscreen, ptx->texture, + ptx->face, ptx->level, ptx->zslice, + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); + + /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ + nvscreen->eng2d->copy(nvscreen->eng2d, + dst, tx->base.x, tx->base.y, + tx->surface, 0, 0, + tx->base.width, tx->base.height); + + pipe_surface_reference(&dst, NULL); + } + + pipe_surface_reference(&tx->surface, NULL); + pipe_texture_reference(&ptx->texture, NULL); + FREE(ptx); +} + +static void * +nvfx_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx) +{ + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; + struct nv04_surface *ns = (struct nv04_surface *)tx->surface; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; + void *map = pipe_buffer_map(pscreen, mt->buffer, + pipe_transfer_buffer_flags(ptx)); + + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); +} + +static void +nvfx_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx) +{ + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; + + pipe_buffer_unmap(pscreen, mt->buffer); +} + +void +nvfx_init_transfer_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.get_tex_transfer = nvfx_transfer_new; + nvfx->pipe.tex_transfer_destroy = nvfx_transfer_del; + nvfx->pipe.transfer_map = nvfx_transfer_map; + nvfx->pipe.transfer_unmap = nvfx_transfer_unmap; +} -- cgit v1.2.3 From 778c64da97272e7508dbcdf0bffdb699d1b04ce0 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 11:33:15 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_state_emit.c The files are the same except for swtnl support on nv40 and for texture cache flushing on nv40. Unify them, and use a macro to define 4 versions of render_states, for all combinations of nvfx and hwtnl/swtnl. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_context.h | 3 - src/gallium/drivers/nv30/nv30_state_emit.c | 121 ------------------ src/gallium/drivers/nv30/nv30_vbo.c | 14 +-- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 4 - src/gallium/drivers/nv40/nv40_draw.c | 6 +- src/gallium/drivers/nv40/nv40_state_emit.c | 189 ----------------------------- src/gallium/drivers/nv40/nv40_vbo.c | 14 +-- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 6 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 188 ++++++++++++++++++++++++++++ 14 files changed, 214 insertions(+), 338 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_emit.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_emit.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_emit.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index f18295cefc2..3067e450628 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv30_screen.c \ nv30_state.c \ nv30_state_blend.c \ - nv30_state_emit.c \ nv30_state_fb.c \ nv30_state_rasterizer.c \ nv30_state_scissor.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index ee2c465bc6d..6fe8cb3e324 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -69,7 +69,7 @@ nv30_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; screen->base.channel->user_private = nvfx; - screen->base.channel->flush_notify = nv30_state_flush_notify; + screen->base.channel->flush_notify = nvfx_state_flush_notify; nvfx->is_nv4x = screen->is_nv4x; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 9f28d49706f..a6767da4dc1 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -24,9 +24,6 @@ extern void nv30_fragprog_destroy(struct nvfx_context *, extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ -extern boolean nv30_state_validate(struct nvfx_context *nvfx); -extern void nv30_state_emit(struct nvfx_context *nvfx); -extern void nv30_state_flush_notify(struct nouveau_channel *chan); extern struct nvfx_state_entry nv30_state_rasterizer; extern struct nvfx_state_entry nv30_state_scissor; extern struct nvfx_state_entry nv30_state_stipple; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c deleted file mode 100644 index 6df93618da8..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ /dev/null @@ -1,121 +0,0 @@ -#include "nv30_context.h" -#include "nvfx_state.h" - -static struct nvfx_state_entry *render_states[] = { - &nv30_state_framebuffer, - &nv30_state_rasterizer, - &nv30_state_scissor, - &nv30_state_stipple, - &nv30_state_fragprog, - &nv30_state_fragtex, - &nv30_state_vertprog, - &nv30_state_blend, - &nv30_state_blend_colour, - &nv30_state_zsa, - &nv30_state_sr, - &nv30_state_viewport, - &nv30_state_vbo, - NULL -}; - -static void -nv30_state_do_validate(struct nvfx_context *nvfx, - struct nvfx_state_entry **states) -{ - while (*states) { - struct nvfx_state_entry *e = *states; - - if (nvfx->dirty & e->dirty.pipe) { - if (e->validate(nvfx)) - nvfx->state.dirty |= (1ULL << e->dirty.hw); - } - - states++; - } - nvfx->dirty = 0; -} - -void -nv30_state_emit(struct nvfx_context *nvfx) -{ - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct nvfx_state *state = &nvfx->state; - struct nvfx_screen *screen = nvfx->screen; - unsigned i; - uint64_t states; - - /* XXX: racy! - */ - if (nvfx != screen->cur_ctx) { - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nvfx; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nvfx->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nvfx->screen->state[i]); - states &= ~(1ULL << i); - } - - state->dirty = 0; -} - -void -nv30_state_flush_notify(struct nouveau_channel *chan) -{ - struct nvfx_context *nvfx = chan->user_private; - struct nvfx_state *state = &nvfx->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NVFX_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]); - if (state->hw[NVFX_STATE_VTXBUF] /*&& nvfx->render_mode == HW*/) - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]); -} - -boolean -nv30_state_validate(struct nvfx_context *nvfx) -{ -#if 0 - boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; - - if (nvfx->render_mode != HW) { - /* Don't even bother trying to go back to hw if none - * of the states that caused swtnl previously have changed. - */ - if ((nvfx->fallback_swtnl & nvfx->dirty) - != nvfx->fallback_swtnl) - return FALSE; - - /* Attempt to go to hwtnl again */ - nvfx->pipe.flush(&nvfx->pipe, 0, NULL); - nvfx->dirty |= (NVFX_NEW_VIEWPORT | - NVFX_NEW_VERTPROG | - NVFX_NEW_ARRAYS); - nvfx->render_mode = HW; - } -#endif - nv30_state_do_validate(nvfx, render_states); -#if 0 - if (nvfx->fallback_swtnl || nvfx->fallback_swrast) - return FALSE; - - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); -#endif - return TRUE; -} diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 119fa59890e..2b4401e5330 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -175,7 +175,7 @@ nv30_draw_arrays(struct pipe_context *pipe, unsigned restart = 0; nv30_vbo_set_idxbuf(nvfx, NULL, 0); - if (FORCE_SWTNL || !nv30_state_validate(nvfx)) { + if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ return; @@ -184,7 +184,7 @@ nv30_draw_arrays(struct pipe_context *pipe, while (count) { unsigned vc, nr; - nv30_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -238,7 +238,7 @@ nv30_draw_elements_u08(struct nvfx_context *nvfx, void *ib, uint8_t *elts = (uint8_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -289,7 +289,7 @@ nv30_draw_elements_u16(struct nvfx_context *nvfx, void *ib, uint16_t *elts = (uint16_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -340,7 +340,7 @@ nv30_draw_elements_u32(struct nvfx_context *nvfx, void *ib, uint32_t *elts = (uint32_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, mode, start, count, &restart); @@ -416,7 +416,7 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, while (count) { unsigned nr, vc; - nv30_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -465,7 +465,7 @@ nv30_draw_elements(struct pipe_context *pipe, boolean idxbuf; idxbuf = nv30_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (FORCE_SWTNL || !nv30_state_validate(nvfx)) { + if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { /*return nv30_draw_elements_swtnl(pipe, NULL, 0, mode, start, count);*/ return; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 8d09ef807f8..b0c0c09d517 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_blend.c \ - nv40_state_emit.c \ nv40_state_fb.c \ nv40_state_rasterizer.c \ nv40_state_scissor.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 9934b582eef..12f57377cde 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -69,7 +69,7 @@ nv40_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; screen->base.channel->user_private = nvfx; - screen->base.channel->flush_notify = nv40_state_flush_notify; + screen->base.channel->flush_notify = nvfx_state_flush_notify; nvfx->is_nv4x = screen->is_nv4x; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index e7c6d5ad86d..0b875bcb065 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -28,10 +28,6 @@ extern void nv40_fragprog_destroy(struct nvfx_context *, extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ -extern boolean nv40_state_validate(struct nvfx_context *nvfx); -extern boolean nv40_state_validate_swtnl(struct nvfx_context *nvfx); -extern void nv40_state_emit(struct nvfx_context *nvfx); -extern void nv40_state_flush_notify(struct nouveau_channel *chan); extern struct nvfx_state_entry nv40_state_rasterizer; extern struct nvfx_state_entry nv40_state_scissor; extern struct nvfx_state_entry nv40_state_stipple; diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index cce1c64621d..87d2689d54b 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -98,7 +98,7 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, assert(0); } FIRE_RING(chan); - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); } /* Switch primitive modes if necessary */ @@ -245,10 +245,10 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, unsigned i; void *map; - if (!nv40_state_validate_swtnl(nvfx)) + if (!nvfx_state_validate_swtnl(nvfx)) return; nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); for (i = 0; i < nvfx->vtxbuf_nr; i++) { map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c deleted file mode 100644 index 5c437f9969b..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ /dev/null @@ -1,189 +0,0 @@ -#include "nv40_context.h" -#include "nvfx_state.h" -#include "draw/draw_context.h" - -static struct nvfx_state_entry *render_states[] = { - &nv40_state_framebuffer, - &nv40_state_rasterizer, - &nv40_state_scissor, - &nv40_state_stipple, - &nv40_state_fragprog, - &nv40_state_fragtex, - &nv40_state_vertprog, - &nv40_state_blend, - &nv40_state_blend_colour, - &nv40_state_zsa, - &nv40_state_sr, - &nv40_state_viewport, - &nv40_state_vbo, - NULL -}; - -static struct nvfx_state_entry *swtnl_states[] = { - &nv40_state_framebuffer, - &nv40_state_rasterizer, - &nv40_state_scissor, - &nv40_state_stipple, - &nv40_state_fragprog, - &nv40_state_fragtex, - &nv40_state_vertprog, - &nv40_state_blend, - &nv40_state_blend_colour, - &nv40_state_zsa, - &nv40_state_sr, - &nv40_state_viewport, - &nv40_state_vtxfmt, - NULL -}; - -static void -nv40_state_do_validate(struct nvfx_context *nvfx, - struct nvfx_state_entry **states) -{ - while (*states) { - struct nvfx_state_entry *e = *states; - - if (nvfx->dirty & e->dirty.pipe) { - if (e->validate(nvfx)) - nvfx->state.dirty |= (1ULL << e->dirty.hw); - } - - states++; - } - nvfx->dirty = 0; -} - -void -nv40_state_emit(struct nvfx_context *nvfx) -{ - struct nvfx_state *state = &nvfx->state; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; - uint64_t states; - - /* XXX: race conditions - */ - if (nvfx != screen->cur_ctx) { - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nvfx; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nvfx->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nvfx->screen->state[i]); - states &= ~(1ULL << i); - } - - if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) | - (1ULL << NVFX_STATE_FRAGTEX0))) { - BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 1); - } - - state->dirty = 0; -} - -void -nv40_state_flush_notify(struct nouveau_channel *chan) -{ - struct nvfx_context *nvfx = chan->user_private; - struct nvfx_state *state = &nvfx->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NVFX_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]); - if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW) - so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]); -} - -boolean -nv40_state_validate(struct nvfx_context *nvfx) -{ - boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; - - if (nvfx->render_mode != HW) { - /* Don't even bother trying to go back to hw if none - * of the states that caused swtnl previously have changed. - */ - if ((nvfx->fallback_swtnl & nvfx->dirty) - != nvfx->fallback_swtnl) - return FALSE; - - /* Attempt to go to hwtnl again */ - nvfx->pipe.flush(&nvfx->pipe, 0, NULL); - nvfx->dirty |= (NVFX_NEW_VIEWPORT | - NVFX_NEW_VERTPROG | - NVFX_NEW_ARRAYS); - nvfx->render_mode = HW; - } - - nv40_state_do_validate(nvfx, render_states); - if (nvfx->fallback_swtnl || nvfx->fallback_swrast) - return FALSE; - - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); - - return TRUE; -} - -boolean -nv40_state_validate_swtnl(struct nvfx_context *nvfx) -{ - struct draw_context *draw = nvfx->draw; - - /* Setup for swtnl */ - if (nvfx->render_mode == HW) { - NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); - nvfx->pipe.flush(&nvfx->pipe, 0, NULL); - nvfx->dirty |= (NVFX_NEW_VIEWPORT | - NVFX_NEW_VERTPROG | - NVFX_NEW_ARRAYS); - nvfx->render_mode = SWTNL; - } - - if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) - draw_bind_vertex_shader(draw, nvfx->vertprog->draw); - - if (nvfx->draw_dirty & NVFX_NEW_RAST) - draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe); - - if (nvfx->draw_dirty & NVFX_NEW_UCP) - draw_set_clip_state(draw, &nvfx->clip); - - if (nvfx->draw_dirty & NVFX_NEW_VIEWPORT) - draw_set_viewport_state(draw, &nvfx->viewport); - - if (nvfx->draw_dirty & NVFX_NEW_ARRAYS) { - draw_set_vertex_buffers(draw, nvfx->vtxbuf_nr, nvfx->vtxbuf); - draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); - } - - nv40_state_do_validate(nvfx, swtnl_states); - if (nvfx->fallback_swrast) { - NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast); - return FALSE; - } - - nvfx->draw_dirty = 0; - return TRUE; -} - diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 0738d5c93b4..456b508d438 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -176,7 +176,7 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned restart; nv40_vbo_set_idxbuf(nvfx, NULL, 0); - if (FORCE_SWTNL || !nv40_state_validate(nvfx)) { + if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { nv40_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; @@ -185,7 +185,7 @@ nv40_draw_arrays(struct pipe_context *pipe, while (count) { unsigned vc, nr; - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -239,7 +239,7 @@ nv40_draw_elements_u08(struct nvfx_context *nvfx, void *ib, uint8_t *elts = (uint8_t *)ib + start; unsigned vc, push, restart; - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -290,7 +290,7 @@ nv40_draw_elements_u16(struct nvfx_context *nvfx, void *ib, uint16_t *elts = (uint16_t *)ib + start; unsigned vc, push, restart; - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -341,7 +341,7 @@ nv40_draw_elements_u32(struct nvfx_context *nvfx, void *ib, uint32_t *elts = (uint32_t *)ib + start; unsigned vc, push, restart; - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, mode, start, count, &restart); @@ -417,7 +417,7 @@ nv40_draw_elements_vbo(struct pipe_context *pipe, while (count) { unsigned nr, vc; - nv40_state_emit(nvfx); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -466,7 +466,7 @@ nv40_draw_elements(struct pipe_context *pipe, boolean idxbuf; idxbuf = nv40_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (FORCE_SWTNL || !nv40_state_validate(nvfx)) { + if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { nv40_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 699cbedbc84..0eb1bebeb30 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -5,6 +5,7 @@ LIBNAME = nvfx C_SOURCES = \ nvfx_clear.c \ + nvfx_state_emit.c \ nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 38d1142ff97..3d4bc6bbc93 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -185,6 +185,12 @@ struct nvfx_state_entry { extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nvfx_state_emit.c */ +extern void nvfx_state_flush_notify(struct nouveau_channel *chan); +extern boolean nvfx_state_validate(struct nvfx_context *nvfx); +extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); +extern void nvfx_state_emit(struct nvfx_context *nvfx); + /* nvfx_transfer.c */ extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c new file mode 100644 index 00000000000..9582ba9648c --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -0,0 +1,188 @@ +#include "nv30/nv30_context.h" +#include "nv40/nv40_context.h" +#include "nvfx_state.h" +#include "draw/draw_context.h" + +#define RENDER_STATES(name, nvxx, vbo) \ +static struct nvfx_state_entry *name##_render_states[] = { \ + &nvxx##_state_framebuffer, \ + &nvxx##_state_rasterizer, \ + &nvxx##_state_scissor, \ + &nvxx##_state_stipple, \ + &nvxx##_state_fragprog, \ + &nvxx##_state_fragtex, \ + &nvxx##_state_vertprog, \ + &nvxx##_state_blend, \ + &nvxx##_state_blend_colour, \ + &nvxx##_state_zsa, \ + &nvxx##_state_sr, \ + &nvxx##_state_viewport, \ + &nvxx##_state_##vbo, \ + NULL \ +} + +RENDER_STATES(nv30, nv30, vbo); +RENDER_STATES(nv30_swtnl, nv30, vbo); /* TODO: replace with vtxfmt once draw is unified */ +RENDER_STATES(nv40, nv40, vbo); +RENDER_STATES(nv40_swtnl, nv40, vtxfmt); + +static void +nvfx_state_do_validate(struct nvfx_context *nvfx, + struct nvfx_state_entry **states) +{ + while (*states) { + struct nvfx_state_entry *e = *states; + + if (nvfx->dirty & e->dirty.pipe) { + if (e->validate(nvfx)) + nvfx->state.dirty |= (1ULL << e->dirty.hw); + } + + states++; + } + nvfx->dirty = 0; +} + +void +nvfx_state_emit(struct nvfx_context *nvfx) +{ + struct nvfx_state *state = &nvfx->state; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + uint64_t states; + + /* XXX: race conditions + */ + if (nvfx != screen->cur_ctx) { + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_ctx = nvfx; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nvfx->screen->state[i]); + if (state->hw[i]) + so_emit(chan, nvfx->screen->state[i]); + states &= ~(1ULL << i); + } + + /* TODO: could nv30 need this or something similar too? */ + if(nvfx->is_nv4x) { + if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) | + (1ULL << NVFX_STATE_FRAGTEX0))) { + BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); + } + } + state->dirty = 0; +} + +void +nvfx_state_flush_notify(struct nouveau_channel *chan) +{ + struct nvfx_context *nvfx = chan->user_private; + struct nvfx_state *state = &nvfx->state; + unsigned i, samplers; + + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(chan, + state->hw[NVFX_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]); + if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW) + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]); +} + +boolean +nvfx_state_validate(struct nvfx_context *nvfx) +{ + boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; + + if (nvfx->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nvfx->fallback_swtnl & nvfx->dirty) + != nvfx->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->dirty |= (NVFX_NEW_VIEWPORT | + NVFX_NEW_VERTPROG | + NVFX_NEW_ARRAYS); + nvfx->render_mode = HW; + } + + if(!nvfx->is_nv4x) + nvfx_state_do_validate(nvfx, nv30_render_states); + else + nvfx_state_do_validate(nvfx, nv40_render_states); + + if (nvfx->fallback_swtnl || nvfx->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nvfx_state_validate_swtnl(struct nvfx_context *nvfx) +{ + struct draw_context *draw = nvfx->draw; + + /* Setup for swtnl */ + if (nvfx->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->dirty |= (NVFX_NEW_VIEWPORT | + NVFX_NEW_VERTPROG | + NVFX_NEW_ARRAYS); + nvfx->render_mode = SWTNL; + } + + if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nvfx->vertprog->draw); + + if (nvfx->draw_dirty & NVFX_NEW_RAST) + draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe); + + if (nvfx->draw_dirty & NVFX_NEW_UCP) + draw_set_clip_state(draw, &nvfx->clip); + + if (nvfx->draw_dirty & NVFX_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nvfx->viewport); + + if (nvfx->draw_dirty & NVFX_NEW_ARRAYS) { + draw_set_vertex_buffers(draw, nvfx->vtxbuf_nr, nvfx->vtxbuf); + draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); + } + + if(!nvfx->is_nv4x) + nvfx_state_do_validate(nvfx, nv30_swtnl_render_states); + else + nvfx_state_do_validate(nvfx, nv40_swtnl_render_states); + + if (nvfx->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast); + return FALSE; + } + + nvfx->draw_dirty = 0; + return TRUE; +} -- cgit v1.2.3 From 0b55e1cd17801a03d6fbb7ce46f25aa2b086bff4 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 20:37:17 +0100 Subject: nv30, nv40: unify identical nv[34]0_state_blend.c --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 2 -- src/gallium/drivers/nv30/nv30_state_blend.c | 41 ----------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 2 -- src/gallium/drivers/nv40/nv40_state_blend.c | 41 ----------------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 3 +++ src/gallium/drivers/nvfx/nvfx_state_blend.c | 41 +++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 4 +-- 10 files changed, 47 insertions(+), 90 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_blend.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_blend.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_blend.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 3067e450628..66dad8f3586 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -12,7 +12,6 @@ C_SOURCES = \ nv30_query.c \ nv30_screen.c \ nv30_state.c \ - nv30_state_blend.c \ nv30_state_fb.c \ nv30_state_rasterizer.c \ nv30_state_scissor.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index a6767da4dc1..e6194b23f5f 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -29,8 +29,6 @@ extern struct nvfx_state_entry nv30_state_scissor; extern struct nvfx_state_entry nv30_state_stipple; extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; -extern struct nvfx_state_entry nv30_state_blend; -extern struct nvfx_state_entry nv30_state_blend_colour; extern struct nvfx_state_entry nv30_state_zsa; extern struct nvfx_state_entry nv30_state_viewport; extern struct nvfx_state_entry nv30_state_framebuffer; diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c deleted file mode 100644 index de368e5bd79..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_blend_validate(struct nvfx_context *nvfx) -{ - so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]); - return TRUE; -} - -struct nvfx_state_entry nv30_state_blend = { - .validate = nv30_state_blend_validate, - .dirty = { - .pipe = NVFX_NEW_BLEND, - .hw = NVFX_STATE_BLEND - } -}; - -static boolean -nv30_state_blend_colour_validate(struct nvfx_context *nvfx) -{ - struct nouveau_stateobj *so = so_new(1, 1, 0); - struct pipe_blend_color *bcol = &nvfx->blend_colour; - - so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv30_state_blend_colour = { - .validate = nv30_state_blend_colour_validate, - .dirty = { - .pipe = NVFX_NEW_BCOL, - .hw = NVFX_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index b0c0c09d517..739ca433876 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -12,7 +12,6 @@ C_SOURCES = \ nv40_query.c \ nv40_screen.c \ nv40_state.c \ - nv40_state_blend.c \ nv40_state_fb.c \ nv40_state_rasterizer.c \ nv40_state_scissor.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 0b875bcb065..28a79e4ecff 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -33,8 +33,6 @@ extern struct nvfx_state_entry nv40_state_scissor; extern struct nvfx_state_entry nv40_state_stipple; extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; -extern struct nvfx_state_entry nv40_state_blend; -extern struct nvfx_state_entry nv40_state_blend_colour; extern struct nvfx_state_entry nv40_state_zsa; extern struct nvfx_state_entry nv40_state_viewport; extern struct nvfx_state_entry nv40_state_framebuffer; diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c deleted file mode 100644 index bb06b4888da..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_blend_validate(struct nvfx_context *nvfx) -{ - so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]); - return TRUE; -} - -struct nvfx_state_entry nv40_state_blend = { - .validate = nv40_state_blend_validate, - .dirty = { - .pipe = NVFX_NEW_BLEND, - .hw = NVFX_STATE_BLEND - } -}; - -static boolean -nv40_state_blend_colour_validate(struct nvfx_context *nvfx) -{ - struct nouveau_stateobj *so = so_new(1, 1, 0); - struct pipe_blend_color *bcol = &nvfx->blend_colour; - - so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv40_state_blend_colour = { - .validate = nv40_state_blend_colour_validate, - .dirty = { - .pipe = NVFX_NEW_BCOL, - .hw = NVFX_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 0eb1bebeb30..d124de89ec8 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -6,6 +6,7 @@ LIBNAME = nvfx C_SOURCES = \ nvfx_clear.c \ nvfx_state_emit.c \ + nvfx_state_blend.c \ nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 3d4bc6bbc93..bbffb8a18d2 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -181,6 +181,9 @@ struct nvfx_state_entry { } dirty; }; +extern struct nvfx_state_entry nvfx_state_blend; +extern struct nvfx_state_entry nvfx_state_blend_colour; + /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); diff --git a/src/gallium/drivers/nvfx/nvfx_state_blend.c b/src/gallium/drivers/nvfx/nvfx_state_blend.c new file mode 100644 index 00000000000..03b6ef8117d --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_blend.c @@ -0,0 +1,41 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_blend_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_blend = { + .validate = nvfx_state_blend_validate, + .dirty = { + .pipe = NVFX_NEW_BLEND, + .hw = NVFX_STATE_BLEND + } +}; + +static boolean +nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *so = so_new(1, 1, 0); + struct pipe_blend_color *bcol = &nvfx->blend_colour; + + so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_blend_colour = { + .validate = nvfx_state_blend_colour_validate, + .dirty = { + .pipe = NVFX_NEW_BCOL, + .hw = NVFX_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 9582ba9648c..059bbb14b72 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -12,8 +12,8 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvxx##_state_fragprog, \ &nvxx##_state_fragtex, \ &nvxx##_state_vertprog, \ - &nvxx##_state_blend, \ - &nvxx##_state_blend_colour, \ + &nvfx_state_blend, \ + &nvfx_state_blend_colour, \ &nvxx##_state_zsa, \ &nvxx##_state_sr, \ &nvxx##_state_viewport, \ -- cgit v1.2.3 From ada801222b3c984c260165415864a8f511145251 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 23:16:01 +0100 Subject: nv30, nv40: unify identical nv[34]0_state_rasterizer.c --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_state_rasterizer.c | 17 ----------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nv40/nv40_state_rasterizer.c | 17 ----------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 1 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_state_rasterizer.c | 17 +++++++++++++++++ 10 files changed, 20 insertions(+), 39 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_rasterizer.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_rasterizer.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_rasterizer.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 66dad8f3586..668f5965b16 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv30_screen.c \ nv30_state.c \ nv30_state_fb.c \ - nv30_state_rasterizer.c \ nv30_state_scissor.c \ nv30_state_stipple.c \ nv30_state_viewport.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index e6194b23f5f..fbafbec383a 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -24,7 +24,6 @@ extern void nv30_fragprog_destroy(struct nvfx_context *, extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ -extern struct nvfx_state_entry nv30_state_rasterizer; extern struct nvfx_state_entry nv30_state_scissor; extern struct nvfx_state_entry nv30_state_stipple; extern struct nvfx_state_entry nv30_state_fragprog; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c deleted file mode 100644 index 1a83da52047..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_rasterizer.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_rasterizer_validate(struct nvfx_context *nvfx) -{ - so_ref(nvfx->rasterizer->so, - &nvfx->state.hw[NVFX_STATE_RAST]); - return TRUE; -} - -struct nvfx_state_entry nv30_state_rasterizer = { - .validate = nv30_state_rasterizer_validate, - .dirty = { - .pipe = NVFX_NEW_RAST, - .hw = NVFX_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 739ca433876..25ad2974bf0 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_fb.c \ - nv40_state_rasterizer.c \ nv40_state_scissor.c \ nv40_state_stipple.c \ nv40_state_viewport.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 28a79e4ecff..fc8b06bd0f3 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -28,7 +28,6 @@ extern void nv40_fragprog_destroy(struct nvfx_context *, extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ -extern struct nvfx_state_entry nv40_state_rasterizer; extern struct nvfx_state_entry nv40_state_scissor; extern struct nvfx_state_entry nv40_state_stipple; extern struct nvfx_state_entry nv40_state_fragprog; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c deleted file mode 100644 index d6136a26ebe..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_rasterizer.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_rasterizer_validate(struct nvfx_context *nvfx) -{ - so_ref(nvfx->rasterizer->so, - &nvfx->state.hw[NVFX_STATE_RAST]); - return TRUE; -} - -struct nvfx_state_entry nv40_state_rasterizer = { - .validate = nv40_state_rasterizer_validate, - .dirty = { - .pipe = NVFX_NEW_RAST, - .hw = NVFX_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index d124de89ec8..f1843b61df0 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ nvfx_clear.c \ nvfx_state_emit.c \ nvfx_state_blend.c \ + nvfx_state_rasterizer.c \ nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index bbffb8a18d2..c7ce17ab382 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -183,6 +183,7 @@ struct nvfx_state_entry { extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; +extern struct nvfx_state_entry nvfx_state_rasterizer; /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 059bbb14b72..60df50aa1cc 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -6,7 +6,7 @@ #define RENDER_STATES(name, nvxx, vbo) \ static struct nvfx_state_entry *name##_render_states[] = { \ &nvxx##_state_framebuffer, \ - &nvxx##_state_rasterizer, \ + &nvfx_state_rasterizer, \ &nvxx##_state_scissor, \ &nvxx##_state_stipple, \ &nvxx##_state_fragprog, \ diff --git a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c new file mode 100644 index 00000000000..0d35ecbf209 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_rasterizer_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->rasterizer->so, + &nvfx->state.hw[NVFX_STATE_RAST]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_rasterizer = { + .validate = nvfx_state_rasterizer_validate, + .dirty = { + .pipe = NVFX_NEW_RAST, + .hw = NVFX_STATE_RAST + } +}; -- cgit v1.2.3 From 938c6905cfa4e25c8e898c8d91ee0fe8174abe0b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 23:17:41 +0100 Subject: nv30, nv40: unify identical nv[34]0_state_scissor.c --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_state_scissor.c | 36 --------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nv40/nv40_state_scissor.c | 36 --------------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 1 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_state_scissor.c | 36 +++++++++++++++++++++++++++ 10 files changed, 39 insertions(+), 77 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_scissor.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_scissor.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_scissor.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 668f5965b16..bbfca55e495 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv30_screen.c \ nv30_state.c \ nv30_state_fb.c \ - nv30_state_scissor.c \ nv30_state_stipple.c \ nv30_state_viewport.c \ nv30_state_zsa.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index fbafbec383a..5a5311e5a9c 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -24,7 +24,6 @@ extern void nv30_fragprog_destroy(struct nvfx_context *, extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ -extern struct nvfx_state_entry nv30_state_scissor; extern struct nvfx_state_entry nv30_state_stipple; extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c deleted file mode 100644 index e91680e2d19..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_scissor_validate(struct nvfx_context *nvfx) -{ - struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - struct pipe_scissor_state *s = &nvfx->scissor; - struct nouveau_stateobj *so; - - if (nvfx->state.hw[NVFX_STATE_SCISSOR] && - (rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) - return FALSE; - nvfx->state.scissor_enabled = rast->scissor; - - so = so_new(1, 2, 0); - so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2); - if (nvfx->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv30_state_scissor = { - .validate = nv30_state_scissor_validate, - .dirty = { - .pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST, - .hw = NVFX_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 25ad2974bf0..02bbb15f81d 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_fb.c \ - nv40_state_scissor.c \ nv40_state_stipple.c \ nv40_state_viewport.c \ nv40_state_zsa.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index fc8b06bd0f3..d3464ad7a3b 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -28,7 +28,6 @@ extern void nv40_fragprog_destroy(struct nvfx_context *, extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ -extern struct nvfx_state_entry nv40_state_scissor; extern struct nvfx_state_entry nv40_state_stipple; extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c deleted file mode 100644 index 11ec5c0878b..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_scissor_validate(struct nvfx_context *nvfx) -{ - struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - struct pipe_scissor_state *s = &nvfx->scissor; - struct nouveau_stateobj *so; - - if (nvfx->state.hw[NVFX_STATE_SCISSOR] && - (rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) - return FALSE; - nvfx->state.scissor_enabled = rast->scissor; - - so = so_new(1, 2, 0); - so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2); - if (nvfx->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv40_state_scissor = { - .validate = nv40_state_scissor_validate, - .dirty = { - .pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST, - .hw = NVFX_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index f1843b61df0..8f4edb6543b 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -8,6 +8,7 @@ C_SOURCES = \ nvfx_state_emit.c \ nvfx_state_blend.c \ nvfx_state_rasterizer.c \ + nvfx_state_scissor.c \ nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index c7ce17ab382..d5817ac5f89 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -184,6 +184,7 @@ struct nvfx_state_entry { extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; extern struct nvfx_state_entry nvfx_state_rasterizer; +extern struct nvfx_state_entry nvfx_state_scissor; /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 60df50aa1cc..cf73f81fb2c 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -7,7 +7,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvxx##_state_framebuffer, \ &nvfx_state_rasterizer, \ - &nvxx##_state_scissor, \ + &nvfx_state_scissor, \ &nvxx##_state_stipple, \ &nvxx##_state_fragprog, \ &nvxx##_state_fragtex, \ diff --git a/src/gallium/drivers/nvfx/nvfx_state_scissor.c b/src/gallium/drivers/nvfx/nvfx_state_scissor.c new file mode 100644 index 00000000000..940d8cb5c0c --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_scissor.c @@ -0,0 +1,36 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_scissor_validate(struct nvfx_context *nvfx) +{ + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct pipe_scissor_state *s = &nvfx->scissor; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_SCISSOR] && + (rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) + return FALSE; + nvfx->state.scissor_enabled = rast->scissor; + + so = so_new(1, 2, 0); + so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2); + if (nvfx->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_scissor = { + .validate = nvfx_state_scissor_validate, + .dirty = { + .pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST, + .hw = NVFX_STATE_SCISSOR + } +}; -- cgit v1.2.3 From 64d882637dc97b332eb3c0f457376f86b75c8c5f Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 23:19:40 +0100 Subject: nv30, nv40: unify identical nv[34]0_state_zsa.c --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 2 -- src/gallium/drivers/nv30/nv30_state_zsa.c | 41 ------------------------------ src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 2 -- src/gallium/drivers/nv40/nv40_state_zsa.c | 41 ------------------------------ src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 2 ++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 4 +-- src/gallium/drivers/nvfx/nvfx_state_zsa.c | 41 ++++++++++++++++++++++++++++++ 10 files changed, 46 insertions(+), 90 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_zsa.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_zsa.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_zsa.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index bbfca55e495..8f198ff2b70 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -15,7 +15,6 @@ C_SOURCES = \ nv30_state_fb.c \ nv30_state_stipple.c \ nv30_state_viewport.c \ - nv30_state_zsa.c \ nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 5a5311e5a9c..2ee12ef5d8f 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -27,12 +27,10 @@ extern void nv30_fragtex_bind(struct nvfx_context *); extern struct nvfx_state_entry nv30_state_stipple; extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; -extern struct nvfx_state_entry nv30_state_zsa; extern struct nvfx_state_entry nv30_state_viewport; extern struct nvfx_state_entry nv30_state_framebuffer; extern struct nvfx_state_entry nv30_state_fragtex; extern struct nvfx_state_entry nv30_state_vbo; -extern struct nvfx_state_entry nv30_state_sr; /* nv30_vbo.c */ extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c deleted file mode 100644 index 0832408edf2..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_zsa.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_zsa_validate(struct nvfx_context *nvfx) -{ - so_ref(nvfx->zsa->so, - &nvfx->state.hw[NVFX_STATE_ZSA]); - return TRUE; -} - -struct nvfx_state_entry nv30_state_zsa = { - .validate = nv30_state_zsa_validate, - .dirty = { - .pipe = NVFX_NEW_ZSA, - .hw = NVFX_STATE_ZSA - } -}; - -static boolean -nv30_state_sr_validate(struct nvfx_context *nvfx) -{ - struct nouveau_stateobj *so = so_new(2, 2, 0); - struct pipe_stencil_ref *sr = &nvfx->stencil_ref; - - so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv30_state_sr = { - .validate = nv30_state_sr_validate, - .dirty = { - .pipe = NVFX_NEW_SR, - .hw = NVFX_STATE_SR - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 02bbb15f81d..1cad9409ed4 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -15,7 +15,6 @@ C_SOURCES = \ nv40_state_fb.c \ nv40_state_stipple.c \ nv40_state_viewport.c \ - nv40_state_zsa.c \ nv40_surface.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index d3464ad7a3b..3dcbbae30b3 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -31,13 +31,11 @@ extern void nv40_fragtex_bind(struct nvfx_context *); extern struct nvfx_state_entry nv40_state_stipple; extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; -extern struct nvfx_state_entry nv40_state_zsa; extern struct nvfx_state_entry nv40_state_viewport; extern struct nvfx_state_entry nv40_state_framebuffer; extern struct nvfx_state_entry nv40_state_fragtex; extern struct nvfx_state_entry nv40_state_vbo; extern struct nvfx_state_entry nv40_state_vtxfmt; -extern struct nvfx_state_entry nv40_state_sr; /* nv40_vbo.c */ extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c deleted file mode 100644 index 00facd520af..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_zsa.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_zsa_validate(struct nvfx_context *nvfx) -{ - so_ref(nvfx->zsa->so, - &nvfx->state.hw[NVFX_STATE_ZSA]); - return TRUE; -} - -struct nvfx_state_entry nv40_state_zsa = { - .validate = nv40_state_zsa_validate, - .dirty = { - .pipe = NVFX_NEW_ZSA, - .hw = NVFX_STATE_ZSA - } -}; - -static boolean -nv40_state_sr_validate(struct nvfx_context *nvfx) -{ - struct nouveau_stateobj *so = so_new(2, 2, 0); - struct pipe_stencil_ref *sr = &nvfx->stencil_ref; - - so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv40_state_sr = { - .validate = nv40_state_sr_validate, - .dirty = { - .pipe = NVFX_NEW_SR, - .hw = NVFX_STATE_SR - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 8f4edb6543b..b077713b047 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ nvfx_state_blend.c \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ + nvfx_state_zsa.c \ nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index d5817ac5f89..d97cf30a1df 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -185,6 +185,8 @@ extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; +extern struct nvfx_state_entry nvfx_state_sr; +extern struct nvfx_state_entry nvfx_state_zsa; /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index cf73f81fb2c..a30af174e3f 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -14,8 +14,8 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvxx##_state_vertprog, \ &nvfx_state_blend, \ &nvfx_state_blend_colour, \ - &nvxx##_state_zsa, \ - &nvxx##_state_sr, \ + &nvfx_state_zsa, \ + &nvfx_state_sr, \ &nvxx##_state_viewport, \ &nvxx##_state_##vbo, \ NULL \ diff --git a/src/gallium/drivers/nvfx/nvfx_state_zsa.c b/src/gallium/drivers/nvfx/nvfx_state_zsa.c new file mode 100644 index 00000000000..c84fd041c1e --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_zsa.c @@ -0,0 +1,41 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_zsa_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->zsa->so, + &nvfx->state.hw[NVFX_STATE_ZSA]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_zsa = { + .validate = nvfx_state_zsa_validate, + .dirty = { + .pipe = NVFX_NEW_ZSA, + .hw = NVFX_STATE_ZSA + } +}; + +static boolean +nvfx_state_sr_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *so = so_new(2, 2, 0); + struct pipe_stencil_ref *sr = &nvfx->stencil_ref; + + so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); + so_data (so, sr->ref_value[0]); + so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1); + so_data (so, sr->ref_value[1]); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_sr = { + .validate = nvfx_state_sr_validate, + .dirty = { + .pipe = NVFX_NEW_SR, + .hw = NVFX_STATE_SR + } +}; -- cgit v1.2.3 From e392e0b148d6b499322e58a84f300e2e0be49e29 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 19:22:57 +0100 Subject: nv30, nv40: unify nv[34]0_state_stipple.c The files are identical, except for the fact that the nv40 version forgets to unreference the stateobj. Unified to the correct nv30 version. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_state_stipple.c | 40 --------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nv40/nv40_state_stipple.c | 39 -------------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 1 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_state_stipple.c | 40 +++++++++++++++++++++++++++ 10 files changed, 43 insertions(+), 84 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_stipple.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_stipple.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_stipple.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 8f198ff2b70..5541a366181 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv30_screen.c \ nv30_state.c \ nv30_state_fb.c \ - nv30_state_stipple.c \ nv30_state_viewport.c \ nv30_surface.c \ nv30_vbo.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 2ee12ef5d8f..1c2572aa86d 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -24,7 +24,6 @@ extern void nv30_fragprog_destroy(struct nvfx_context *, extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ -extern struct nvfx_state_entry nv30_state_stipple; extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; extern struct nvfx_state_entry nv30_state_viewport; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c deleted file mode 100644 index eceb0c57f97..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ /dev/null @@ -1,40 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_stipple_validate(struct nvfx_context *nvfx) -{ - struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nouveau_stateobj *so; - - if (nvfx->state.hw[NVFX_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(2, 33, 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nvfx->stipple[i]); - } else { - so = so_new(1, 1, 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv30_state_stipple = { - .validate = nv30_state_stipple_validate, - .dirty = { - .pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST, - .hw = NVFX_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 1cad9409ed4..a101cfc80c7 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -13,7 +13,6 @@ C_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_fb.c \ - nv40_state_stipple.c \ nv40_state_viewport.c \ nv40_surface.c \ nv40_vbo.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 3dcbbae30b3..127502dd5fd 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -28,7 +28,6 @@ extern void nv40_fragprog_destroy(struct nvfx_context *, extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ -extern struct nvfx_state_entry nv40_state_stipple; extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; extern struct nvfx_state_entry nv40_state_viewport; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c deleted file mode 100644 index e537e08e1d2..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_stipple_validate(struct nvfx_context *nvfx) -{ - struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nouveau_stateobj *so; - - if (nvfx->state.hw[NVFX_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(2, 33, 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nvfx->stipple[i]); - } else { - so = so_new(1, 1, 0); - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]); - return TRUE; -} - -struct nvfx_state_entry nv40_state_stipple = { - .validate = nv40_state_stipple_validate, - .dirty = { - .pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST, - .hw = NVFX_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index b077713b047..e96c9aa6bcf 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ nvfx_state_blend.c \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ + nvfx_state_stipple.c \ nvfx_state_zsa.c \ nvfx_transfer.c diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index d97cf30a1df..8f5013a9d6b 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -186,6 +186,7 @@ extern struct nvfx_state_entry nvfx_state_blend_colour; extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; +extern struct nvfx_state_entry nvfx_state_stipple; extern struct nvfx_state_entry nvfx_state_zsa; /* nvfx_clear.c */ diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index a30af174e3f..04c0429ce19 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -8,7 +8,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvxx##_state_framebuffer, \ &nvfx_state_rasterizer, \ &nvfx_state_scissor, \ - &nvxx##_state_stipple, \ + &nvfx_state_stipple, \ &nvxx##_state_fragprog, \ &nvxx##_state_fragtex, \ &nvxx##_state_vertprog, \ diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c new file mode 100644 index 00000000000..57cd3c936a7 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_stipple.c @@ -0,0 +1,40 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_stipple_validate(struct nvfx_context *nvfx) +{ + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(2, 33, 0); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nvfx->stipple[i]); + } else { + so = so_new(1, 1, 0); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_stipple = { + .validate = nvfx_state_stipple_validate, + .dirty = { + .pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST, + .hw = NVFX_STATE_STIPPLE, + } +}; -- cgit v1.2.3 From 8611a31bb401fcc2bdc0b3624859fffff7236c4b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 19:32:29 +0100 Subject: nv30, nv40: unify nv[34]0_miptree.c The only difference between nv30 and nv40 is that nv30 allowed swizzling for more texture types. This patch preserves the existing behavior, using conditional code. Note however that this does not make sense, since all texture types can be swizzled on nv40 and probably on nv30 too. However, the handling of swizzled surfaces in the current 2D code is partially broken, so it's best not to touch this. A whole rewrite of the 2D code will be submitted, which will solve this problem. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 2 - src/gallium/drivers/nv30/nv30_miptree.c | 240 ------------------------------- src/gallium/drivers/nv30/nv30_screen.c | 2 +- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 2 - src/gallium/drivers/nv40/nv40_miptree.c | 236 ------------------------------ src/gallium/drivers/nv40/nv40_screen.c | 2 +- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_miptree.c | 247 ++++++++++++++++++++++++++++++++ 10 files changed, 250 insertions(+), 484 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_miptree.c delete mode 100644 src/gallium/drivers/nv40/nv40_miptree.c create mode 100644 src/gallium/drivers/nvfx/nvfx_miptree.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 5541a366181..d8de297f128 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -8,7 +8,6 @@ C_SOURCES = \ nv30_draw.c \ nv30_fragprog.c \ nv30_fragtex.c \ - nv30_miptree.c \ nv30_query.c \ nv30_screen.c \ nv30_state.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 1c2572aa86d..46b36ee2e50 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -7,8 +7,6 @@ extern void nv30_init_state_functions(struct nvfx_context *nvfx); extern void nv30_init_surface_functions(struct nvfx_context *nvfx); extern void nv30_init_query_functions(struct nvfx_context *nvfx); -extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); - /* nv30_draw.c */ extern struct draw_stage *nv30_draw_render_stage(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c deleted file mode 100644 index ada355a4440..00000000000 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ /dev/null @@ -1,240 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" - -#include "nv30_context.h" -#include "../nouveau/nv04_surface_2d.h" - -static void -nv30_miptree_layout(struct nvfx_miptree *nv30mt) -{ - struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_RENDER_TARGET | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_SCANOUT); - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth0; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); - else - nv30mt->level[l].pitch = util_format_get_stride(pt->format, width); - - nv30mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = u_minify(width, 1); - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l < pt->last_level; l++) { - nv30mt->level[l].image_offset[f] = offset; - - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) - offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64); - else - offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv30mt->total_size = offset; -} - -static struct pipe_texture * -nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) -{ - struct nvfx_miptree *mt; - unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE; - - mt = MALLOC(sizeof(struct nvfx_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - - /* Swizzled textures must be POT */ - if (pt->width0 & (pt->width0 - 1) || - pt->height0 & (pt->height0 - 1)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else { - switch (pt->format) { - /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - { - if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - break; - } - default: - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - } - } - - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. - * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. - * This also happens for small mipmaps of large textures. */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - nv30_miptree_layout(mt); - - mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, - mt->total_size); - if (!mt->buffer) { - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - - return &mt->base; -} - -static struct pipe_texture * -nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nvfx_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nvfx_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - /* Assume whoever created this buffer expects it to be linear for now */ - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static void -nv30_miptree_destroy(struct pipe_texture *pt) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - int l; - - pipe_buffer_reference(&mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - - FREE(mt); -} - -static struct pipe_surface * -nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nvfx_miptree *nv30mt = (struct nvfx_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nv30mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = nv30mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ns->base.offset = nv30mt->level[level].image_offset[zslice]; - } else { - ns->base.offset = nv30mt->level[level].image_offset[0]; - } - - /* create a linear temporary that we can render into if necessary. - * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so - * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ - if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base; - - return &ns->base; -} - -static void -nv30_miptree_surface_del(struct pipe_surface *ps) -{ - struct nv04_surface* ns = (struct nv04_surface*)ps; - if(ns->backing) - { - struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) - screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv30_miptree_surface_del(&ns->backing->base); - } - - pipe_texture_reference(&ps->texture, NULL); - FREE(ps); -} - -void -nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv30_miptree_create; - pscreen->texture_destroy = nv30_miptree_destroy; - pscreen->get_tex_surface = nv30_miptree_surface_new; - pscreen->tex_surface_destroy = nv30_miptree_surface_del; - - nouveau_screen(pscreen)->texture_blanket = nv30_miptree_blanket; -} diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 305dfa83865..3a48255c18e 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -213,7 +213,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->is_format_supported = nv30_screen_surface_format_supported; pscreen->context_create = nv30_create; - nv30_screen_init_miptree_functions(pscreen); + nvfx_screen_init_miptree_functions(pscreen); /* 3D object */ switch (dev->chipset & 0xf0) { diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index a101cfc80c7..bf68338e3f3 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -8,7 +8,6 @@ C_SOURCES = \ nv40_draw.c \ nv40_fragprog.c \ nv40_fragtex.c \ - nv40_miptree.c \ nv40_query.c \ nv40_screen.c \ nv40_state.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 127502dd5fd..fe44452f811 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -7,8 +7,6 @@ extern void nv40_init_state_functions(struct nvfx_context *nvfx); extern void nv40_init_surface_functions(struct nvfx_context *nvfx); extern void nv40_init_query_functions(struct nvfx_context *nvfx); -extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); - /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nvfx_context *nvfx); extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c deleted file mode 100644 index caec47058fe..00000000000 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ /dev/null @@ -1,236 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" - -#include "nv40_context.h" -#include "../nouveau/nv04_surface_2d.h" - - - -static void -nv40_miptree_layout(struct nvfx_miptree *mt) -{ - struct pipe_texture *pt = &mt->base; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_RENDER_TARGET | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_SCANOUT); - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth0; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); - else - mt->level[l].pitch = util_format_get_stride(pt->format, width); - - mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = u_minify(width, 1); - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l < pt->last_level; l++) { - mt->level[l].image_offset[f] = offset; - - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) - offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); - else - offset += mt->level[l].pitch * u_minify(pt->height0, l); - } - - mt->level[l].image_offset[f] = offset; - offset += mt->level[l].pitch * u_minify(pt->height0, l); - } - - mt->total_size = offset; -} - -static struct pipe_texture * -nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) -{ - struct nvfx_miptree *mt; - unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE; - - mt = MALLOC(sizeof(struct nvfx_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - - /* Swizzled textures must be POT */ - if (pt->width0 & (pt->width0 - 1) || - pt->height0 & (pt->height0 - 1)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else { - switch (pt->format) { - /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_R16_SNORM: - { - if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - break; - } - default: - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - } - } - - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. - * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. - * This also happens for small mipmaps of large textures. */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - nv40_miptree_layout(mt); - - mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); - if (!mt->buffer) { - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static struct pipe_texture * -nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nvfx_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nvfx_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - /* Assume whoever created this buffer expects it to be linear for now */ - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static void -nv40_miptree_destroy(struct pipe_texture *pt) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - int l; - - pipe_buffer_reference(&mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - - FREE(mt); -} - -static struct pipe_surface * -nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ns->base.offset = mt->level[level].image_offset[zslice]; - } else { - ns->base.offset = mt->level[level].image_offset[0]; - } - - /* create a linear temporary that we can render into if necessary. - * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so - * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ - if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base; - - return &ns->base; -} - -static void -nv40_miptree_surface_del(struct pipe_surface *ps) -{ - struct nv04_surface* ns = (struct nv04_surface*)ps; - if(ns->backing) - { - struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) - screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv40_miptree_surface_del(&ns->backing->base); - } - - pipe_texture_reference(&ps->texture, NULL); - FREE(ps); -} - -void -nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv40_miptree_create; - pscreen->texture_destroy = nv40_miptree_destroy; - pscreen->get_tex_surface = nv40_miptree_surface_new; - pscreen->tex_surface_destroy = nv40_miptree_surface_del; - - nouveau_screen(pscreen)->texture_blanket = nv40_miptree_blanket; -} - diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index c64864d58db..0603b7d1c1b 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -203,7 +203,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->is_format_supported = nv40_screen_surface_format_supported; pscreen->context_create = nv40_create; - nv40_screen_init_miptree_functions(pscreen); + nvfx_screen_init_miptree_functions(pscreen); /* 3D object */ switch (dev->chipset & 0xf0) { diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index e96c9aa6bcf..5073168604f 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -6,6 +6,7 @@ LIBNAME = nvfx C_SOURCES = \ nvfx_clear.c \ nvfx_state_emit.c \ + nvfx_miptree.c \ nvfx_state_blend.c \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c new file mode 100644 index 00000000000..5dced8dae47 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -0,0 +1,247 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_math.h" + +#include "nvfx_context.h" +#include "../nouveau/nv04_surface_2d.h" + + + +static void +nvfx_miptree_layout(struct nvfx_miptree *mt) +{ + struct pipe_texture *pt = &mt->base; + uint width = pt->width0; + uint offset = 0; + int nr_faces, l, f; + uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | + PIPE_TEXTURE_USAGE_DEPTH_STENCIL | + PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_SCANOUT); + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth0; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) + mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); + else + mt->level[l].pitch = util_format_get_stride(pt->format, width); + + mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = u_minify(width, 1); + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l < pt->last_level; l++) { + mt->level[l].image_offset[f] = offset; + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && + u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) + offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64); + else + offset += mt->level[l].pitch * u_minify(pt->height0, l); + } + + mt->level[l].image_offset[f] = offset; + offset += mt->level[l].pitch * u_minify(pt->height0, l); + } + + mt->total_size = offset; +} + +static struct pipe_texture * +nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct nvfx_miptree *mt; + unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE; + + mt = MALLOC(sizeof(struct nvfx_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + pipe_reference_init(&mt->base.reference, 1); + mt->base.screen = pscreen; + + /* Swizzled textures must be POT */ + if (pt->width0 & (pt->width0 - 1) || + pt->height0 & (pt->height0 - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + /* TODO: we can actually swizzle these formats on nv40, we + are just preserving the pre-unification behavior. + The whole 2D code is going to be rewritten anyway. */ + if(nvfx_screen(pscreen)->is_nv4x) { + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + break; + } + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R16_SNORM: + { + if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + break; + } + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + + nvfx_miptree_layout(mt); + + mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + mt->bo = nouveau_bo(mt->buffer); + return &mt->base; +} + +static struct pipe_texture * +nvfx_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nvfx_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth0 != 1) + return NULL; + + mt = CALLOC_STRUCT(nvfx_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + pipe_reference_init(&mt->base.reference, 1); + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + /* Assume whoever created this buffer expects it to be linear for now */ + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + + pipe_buffer_reference(&mt->buffer, pb); + mt->bo = nouveau_bo(mt->buffer); + return &mt->base; +} + +static void +nvfx_miptree_destroy(struct pipe_texture *pt) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + int l; + + pipe_buffer_reference(&mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + FREE(mt); +} + +static struct pipe_surface * +nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + struct nv04_surface *ns; + + ns = CALLOC_STRUCT(nv04_surface); + if (!ns) + return NULL; + pipe_texture_reference(&ns->base.texture, pt); + ns->base.format = pt->format; + ns->base.width = u_minify(pt->width0, level); + ns->base.height = u_minify(pt->height0, level); + ns->base.usage = flags; + pipe_reference_init(&ns->base.reference, 1); + ns->base.face = face; + ns->base.level = level; + ns->base.zslice = zslice; + ns->pitch = mt->level[level].pitch; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ns->base.offset = mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ns->base.offset = mt->level[level].image_offset[zslice]; + } else { + ns->base.offset = mt->level[level].image_offset[0]; + } + + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base; + + return &ns->base; +} + +static void +nvfx_miptree_surface_del(struct pipe_surface *ps) +{ + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nvfx_miptree_surface_del(&ns->backing->base); + } + + pipe_texture_reference(&ps->texture, NULL); + FREE(ps); +} + +void +nvfx_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nvfx_miptree_create; + pscreen->texture_destroy = nvfx_miptree_destroy; + pscreen->get_tex_surface = nvfx_miptree_surface_new; + pscreen->tex_surface_destroy = nvfx_miptree_surface_del; + + nouveau_screen(pscreen)->texture_blanket = nvfx_miptree_blanket; +} -- cgit v1.2.3 From 9937116c7b15468088a224da478d927347a76f32 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 19:39:24 +0100 Subject: nv30, nv40: unify nv[34]0_query.c The files are identical except formatting. --- src/gallium/drivers/nv30/Makefile | 2 - src/gallium/drivers/nv30/nv30_context.c | 4 +- src/gallium/drivers/nv30/nv30_context.h | 2 - src/gallium/drivers/nv30/nv30_query.c | 127 -------------------------------- src/gallium/drivers/nv30/nv30_surface.c | 62 ---------------- src/gallium/drivers/nv40/Makefile | 2 - src/gallium/drivers/nv40/nv40_context.c | 4 +- src/gallium/drivers/nv40/nv40_context.h | 2 - src/gallium/drivers/nv40/nv40_query.c | 127 -------------------------------- src/gallium/drivers/nv40/nv40_surface.c | 64 ---------------- src/gallium/drivers/nvfx/Makefile | 2 + src/gallium/drivers/nvfx/nvfx_context.h | 3 + src/gallium/drivers/nvfx/nvfx_query.c | 127 ++++++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_surface.c | 62 ++++++++++++++++ 14 files changed, 198 insertions(+), 392 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_query.c delete mode 100644 src/gallium/drivers/nv30/nv30_surface.c delete mode 100644 src/gallium/drivers/nv40/nv40_query.c delete mode 100644 src/gallium/drivers/nv40/nv40_surface.c create mode 100644 src/gallium/drivers/nvfx/nvfx_query.c create mode 100644 src/gallium/drivers/nvfx/nvfx_surface.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index d8de297f128..4f9798de0c6 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -8,12 +8,10 @@ C_SOURCES = \ nv30_draw.c \ nv30_fragprog.c \ nv30_fragtex.c \ - nv30_query.c \ nv30_screen.c \ nv30_state.c \ nv30_state_fb.c \ nv30_state_viewport.c \ - nv30_surface.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 6fe8cb3e324..f13458d50a3 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -73,8 +73,8 @@ nv30_create(struct pipe_screen *pscreen, void *priv) nvfx->is_nv4x = screen->is_nv4x; - nv30_init_query_functions(nvfx); - nv30_init_surface_functions(nvfx); + nvfx_init_query_functions(nvfx); + nvfx_init_surface_functions(nvfx); nv30_init_state_functions(nvfx); nvfx_init_transfer_functions(nvfx); diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 46b36ee2e50..a0a1c335ec1 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -4,8 +4,6 @@ #include "nvfx_context.h" extern void nv30_init_state_functions(struct nvfx_context *nvfx); -extern void nv30_init_surface_functions(struct nvfx_context *nvfx); -extern void nv30_init_query_functions(struct nvfx_context *nvfx); /* nv30_draw.c */ extern struct draw_stage *nv30_draw_render_stage(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c deleted file mode 100644 index 53b11a89430..00000000000 --- a/src/gallium/drivers/nv30/nv30_query.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv30_context.h" - -struct nv30_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv30_query * -nv30_query(struct pipe_query *pipe) -{ - return (struct nv30_query *)pipe; -} - -static struct pipe_query * -nv30_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv30_query *q; - - q = CALLOC(1, sizeof(struct nv30_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_query *q = nv30_query(pq); - - if (q->object) - nouveau_resource_free(&q->object); - FREE(q); -} - -static void -nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv30_query *q = nv30_query(pq); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - /* Happens when end_query() is called, then another begin_query() - * without querying the result in-between. For now we'll wait for - * the existing query to notify completion, but it could be better. - */ - if (q->object) { - uint64_t tmp; - pipe->get_query_result(pipe, pq, 1, &tmp); - } - - if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) - assert(0); - nouveau_notifier_reset(nvfx->screen->query, q->object->start); - - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); - - q->ready = FALSE; -} - -static void -nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - struct nv30_query *q = nv30_query(pq); - - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(chan); -} - -static boolean -nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv30_query *q = nv30_query(pq); - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nouveau_notifier_status(nvfx->screen->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - - nouveau_notifier_wait_status(nvfx->screen->query, - q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, 0); - } - - q->result = nouveau_notifier_return_val(nvfx->screen->query, - q->object->start); - q->ready = TRUE; - nouveau_resource_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv30_init_query_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.create_query = nv30_query_create; - nvfx->pipe.destroy_query = nv30_query_destroy; - nvfx->pipe.begin_query = nv30_query_begin; - nvfx->pipe.end_query = nv30_query_end; - nvfx->pipe.get_query_result = nv30_query_result; -} diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c deleted file mode 100644 index 613a9fa4921..00000000000 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ /dev/null @@ -1,62 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "nv30_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_tile.h" - -static void -nv30_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; - - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} - -static void -nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; - - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} - -void -nv30_init_surface_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.surface_copy = nv30_surface_copy; - nvfx->pipe.surface_fill = nv30_surface_fill; -} diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index bf68338e3f3..0b3607be897 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -8,12 +8,10 @@ C_SOURCES = \ nv40_draw.c \ nv40_fragprog.c \ nv40_fragtex.c \ - nv40_query.c \ nv40_screen.c \ nv40_state.c \ nv40_state_fb.c \ nv40_state_viewport.c \ - nv40_surface.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 12f57377cde..441b038b052 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -73,8 +73,8 @@ nv40_create(struct pipe_screen *pscreen, void *priv) nvfx->is_nv4x = screen->is_nv4x; - nv40_init_query_functions(nvfx); - nv40_init_surface_functions(nvfx); + nvfx_init_query_functions(nvfx); + nvfx_init_surface_functions(nvfx); nv40_init_state_functions(nvfx); nvfx_init_transfer_functions(nvfx); diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index fe44452f811..4353d78cd23 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -4,8 +4,6 @@ #include "nvfx_context.h" extern void nv40_init_state_functions(struct nvfx_context *nvfx); -extern void nv40_init_surface_functions(struct nvfx_context *nvfx); -extern void nv40_init_query_functions(struct nvfx_context *nvfx); /* nv40_draw.c */ extern struct draw_stage *nv40_draw_render_stage(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c deleted file mode 100644 index 48cfc4d593d..00000000000 --- a/src/gallium/drivers/nv40/nv40_query.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv40_context.h" - -struct nv40_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv40_query * -nv40_query(struct pipe_query *pipe) -{ - return (struct nv40_query *)pipe; -} - -static struct pipe_query * -nv40_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv40_query *q; - - q = CALLOC(1, sizeof(struct nv40_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_query *q = nv40_query(pq); - - if (q->object) - nouveau_resource_free(&q->object); - FREE(q); -} - -static void -nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - /* Happens when end_query() is called, then another begin_query() - * without querying the result in-between. For now we'll wait for - * the existing query to notify completion, but it could be better. - */ - if (q->object) { - uint64_t tmp; - pipe->get_query_result(pipe, pq, 1, &tmp); - } - - if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) - assert(0); - nouveau_notifier_reset(nvfx->screen->query, q->object->start); - - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); - - q->ready = FALSE; -} - -static void -nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(chan); -} - -static boolean -nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv40_query *q = nv40_query(pq); - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nouveau_notifier_status(nvfx->screen->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - nouveau_notifier_wait_status(nvfx->screen->query, - q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, - 0); - } - - q->result = nouveau_notifier_return_val(nvfx->screen->query, - q->object->start); - q->ready = TRUE; - nouveau_resource_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv40_init_query_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.create_query = nv40_query_create; - nvfx->pipe.destroy_query = nv40_query_destroy; - nvfx->pipe.begin_query = nv40_query_begin; - nvfx->pipe.end_query = nv40_query_end; - nvfx->pipe.get_query_result = nv40_query_result; -} diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c deleted file mode 100644 index 328c23b8b4f..00000000000 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ /dev/null @@ -1,64 +0,0 @@ - -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "util/u_tile.h" - -#include "nv40_context.h" - -static void -nv40_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; - - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} - -static void -nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; - - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} - -void -nv40_init_surface_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.surface_copy = nv40_surface_copy; - nvfx->pipe.surface_fill = nv40_surface_fill; -} diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 5073168604f..f51ab1856b6 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -7,11 +7,13 @@ C_SOURCES = \ nvfx_clear.c \ nvfx_state_emit.c \ nvfx_miptree.c \ + nvfx_query.c \ nvfx_state_blend.c \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ nvfx_state_stipple.c \ nvfx_state_zsa.c \ + nvfx_surface.c \ nvfx_transfer.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 8f5013a9d6b..5f61d4450d1 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -189,6 +189,9 @@ extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; extern struct nvfx_state_entry nvfx_state_zsa; +extern void nvfx_init_query_functions(struct nvfx_context *nvfx); +extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); + /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c new file mode 100644 index 00000000000..acbaf75a236 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -0,0 +1,127 @@ +#include "pipe/p_context.h" + +#include "nvfx_context.h" + +struct nvfx_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nvfx_query * +nvfx_query(struct pipe_query *pipe) +{ + return (struct nvfx_query *)pipe; +} + +static struct pipe_query * +nvfx_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nvfx_query *q; + + q = CALLOC(1, sizeof(struct nvfx_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nvfx_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_query *q = nvfx_query(pq); + + if (q->object) + nouveau_resource_free(&q->object); + FREE(q); +} + +static void +nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_query *q = nvfx_query(pq); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nouveau_notifier_reset(nvfx->screen->query, q->object->start); + + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); + + q->ready = FALSE; +} + +static void +nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_query *q = nvfx_query(pq); + + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(chan); +} + +static boolean +nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_query *q = nvfx_query(pq); + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nouveau_notifier_status(nvfx->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + + nouveau_notifier_wait_status(nvfx->screen->query, + q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, 0); + } + + q->result = nouveau_notifier_return_val(nvfx->screen->query, + q->object->start); + q->ready = TRUE; + nouveau_resource_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nvfx_init_query_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_query = nvfx_query_create; + nvfx->pipe.destroy_query = nvfx_query_destroy; + nvfx->pipe.begin_query = nvfx_query_begin; + nvfx->pipe.end_query = nvfx_query_end; + nvfx->pipe.get_query_result = nvfx_query_result; +} diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c new file mode 100644 index 00000000000..8a05ad0a571 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -0,0 +1,62 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nvfx_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_tile.h" + +static void +nvfx_surface_copy(struct pipe_context *pipe, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nvfx_init_surface_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.surface_copy = nvfx_surface_copy; + nvfx->pipe.surface_fill = nvfx_surface_fill; +} -- cgit v1.2.3 From 4d93ef1a015c8ec7341b0793e87ec375014436b3 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 20 Feb 2010 23:30:59 +0100 Subject: nv30, nv40: unify nv[34]0_state_viewport.c The files are identical, except for an extra comment in nv30. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_state_viewport.c | 42 -------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nv40/nv40_state_viewport.c | 38 ----------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 1 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_state_viewport.c | 38 +++++++++++++++++++++++ 10 files changed, 41 insertions(+), 85 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_viewport.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_viewport.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_viewport.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 4f9798de0c6..6ee0d8f248a 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -11,7 +11,6 @@ C_SOURCES = \ nv30_screen.c \ nv30_state.c \ nv30_state_fb.c \ - nv30_state_viewport.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index a0a1c335ec1..37def47b61d 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -22,7 +22,6 @@ extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; -extern struct nvfx_state_entry nv30_state_viewport; extern struct nvfx_state_entry nv30_state_framebuffer; extern struct nvfx_state_entry nv30_state_fragtex; extern struct nvfx_state_entry nv30_state_vbo; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c deleted file mode 100644 index 2e9d5b14c7e..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ /dev/null @@ -1,42 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_viewport_validate(struct nvfx_context *nvfx) -{ - struct pipe_viewport_state *vpt = &nvfx->viewport; - struct nouveau_stateobj *so; - - if (nvfx->state.hw[NVFX_STATE_VIEWPORT] && - !(nvfx->dirty & NVFX_NEW_VIEWPORT)) - return FALSE; - - so = so_new(3, 10, 0); - so_method(so, nvfx->screen->eng3d, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); -/* so_method(so, nvfx->screen->eng3d, 0x1d78, 1); - so_data (so, 1); -*/ - /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ - so_method(so, nvfx->screen->eng3d, 0x1d78, 1); - so_data (so, 1); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv30_state_viewport = { - .validate = nv30_state_viewport_validate, - .dirty = { - .pipe = NVFX_NEW_VIEWPORT | NVFX_NEW_RAST, - .hw = NVFX_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 0b3607be897..85d6ce98861 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -11,7 +11,6 @@ C_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_fb.c \ - nv40_state_viewport.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 4353d78cd23..a3000eeca04 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -26,7 +26,6 @@ extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; -extern struct nvfx_state_entry nv40_state_viewport; extern struct nvfx_state_entry nv40_state_framebuffer; extern struct nvfx_state_entry nv40_state_fragtex; extern struct nvfx_state_entry nv40_state_vbo; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c deleted file mode 100644 index bf73e1119e6..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ /dev/null @@ -1,38 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_viewport_validate(struct nvfx_context *nvfx) -{ - struct pipe_viewport_state *vpt = &nvfx->viewport; - struct nouveau_stateobj *so; - - if (nvfx->state.hw[NVFX_STATE_VIEWPORT] && - !(nvfx->dirty & NVFX_NEW_VIEWPORT)) - return FALSE; - - so = so_new(2, 9, 0); - so_method(so, nvfx->screen->eng3d, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - so_method(so, nvfx->screen->eng3d, 0x1d78, 1); - so_data (so, 1); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv40_state_viewport = { - .validate = nv40_state_viewport_validate, - .dirty = { - .pipe = NVFX_NEW_VIEWPORT | NVFX_NEW_RAST, - .hw = NVFX_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index f51ab1856b6..2f198d9d1a2 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -12,6 +12,7 @@ C_SOURCES = \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ nvfx_state_stipple.c \ + nvfx_state_viewport.c \ nvfx_state_zsa.c \ nvfx_surface.c \ nvfx_transfer.c diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 5f61d4450d1..b6e6cf8fd95 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -187,6 +187,7 @@ extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; +extern struct nvfx_state_entry nvfx_state_viewport; extern struct nvfx_state_entry nvfx_state_zsa; extern void nvfx_init_query_functions(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 04c0429ce19..b86cb44936a 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -16,7 +16,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_blend_colour, \ &nvfx_state_zsa, \ &nvfx_state_sr, \ - &nvxx##_state_viewport, \ + &nvfx_state_viewport, \ &nvxx##_state_##vbo, \ NULL \ } diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c new file mode 100644 index 00000000000..72057a80f84 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_viewport.c @@ -0,0 +1,38 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_viewport_validate(struct nvfx_context *nvfx) +{ + struct pipe_viewport_state *vpt = &nvfx->viewport; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_VIEWPORT] && + !(nvfx->dirty & NVFX_NEW_VIEWPORT)) + return FALSE; + + so = so_new(2, 9, 0); + so_method(so, nvfx->screen->eng3d, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, 1); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_viewport = { + .validate = nvfx_state_viewport_validate, + .dirty = { + .pipe = NVFX_NEW_VIEWPORT | NVFX_NEW_RAST, + .hw = NVFX_STATE_VIEWPORT + } +}; -- cgit v1.2.3 From cd0d03adab65a9586e0c5d60e9ee487677914f3b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 00:59:30 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_state_fb.c The files are significantly different due to: 1. nv30 support 2 render targets, nv40 4 2. z-buffer pitch is set differently 3. nv30 has a limitation of colour_bits >= zeta_bits. This may not actually exist in the driver though 4. nv30 points color0 at depth in the depth-only case 5. nv30 sets NV34TCL_VIEWPORT_TX_ORIGIN to 0. This is probably unnecessary This patch attempts to unify the two files and preserve the existing behavior. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 1 - src/gallium/drivers/nv30/nv30_state_fb.c | 173 --------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 1 - src/gallium/drivers/nv40/nv40_state_fb.c | 175 --------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 1 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_state_fb.c | 234 +++++++++++++++++++++++++++++ 10 files changed, 237 insertions(+), 353 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state_fb.c delete mode 100644 src/gallium/drivers/nv40/nv40_state_fb.c create mode 100644 src/gallium/drivers/nvfx/nvfx_state_fb.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 6ee0d8f248a..b5728a34f83 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -10,7 +10,6 @@ C_SOURCES = \ nv30_fragtex.c \ nv30_screen.c \ nv30_state.c \ - nv30_state_fb.c \ nv30_vbo.c \ nv30_vertprog.c diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 37def47b61d..c203425cc91 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -22,7 +22,6 @@ extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; -extern struct nvfx_state_entry nv30_state_framebuffer; extern struct nvfx_state_entry nv30_state_fragtex; extern struct nvfx_state_entry nv30_state_vbo; diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c deleted file mode 100644 index e9e215dccea..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ /dev/null @@ -1,173 +0,0 @@ -#include "nv30_context.h" -#include "nouveau/nouveau_util.h" - -static boolean -nv30_state_framebuffer_validate(struct nvfx_context *nvfx) -{ - struct pipe_framebuffer_state *fb = &nvfx->framebuffer; - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nv04_surface *rt[2], *zeta = NULL; - uint32_t rt_enable = 0, rt_format = 0; - int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(12, 18, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - struct nvfx_miptree *nv30mt; - int colour_bits = 32, zeta_bits = 32; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { - assert(colour_format == fb->cbufs[i]->format); - } else { - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) - rt_enable |= NV34TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) { - /* Render to at least a colour buffer */ - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else if (fb->zsbuf) { - depth_only = 1; - - /* Render to depth buffer only */ - if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else { - return FALSE; - } - - switch (colour_format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_B5G6R5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - colour_bits = 16; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - zeta_bits = 16; - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (colour_bits > zeta_bits) { - return FALSE; - } - - if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) { - struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); - uint32_t pitch = rt0->pitch; - - if (zeta) { - pitch |= (zeta->pitch << 16); - } else { - pitch |= (pitch << 16); - } - - nv30mt = (struct nvfx_miptree *) rt0->base.texture; - so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); - so_data (so, pitch); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - nv30mt = (struct nvfx_miptree *)rt[1]->base.texture; - so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt[1]->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch); - } - - if (zeta_format) { - nv30mt = (struct nvfx_miptree *)zeta->base.texture; - so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), zeta->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - /* TODO: allocate LMA depth buffer */ - } - - so_method(so, eng3d, NV34TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, eng3d, 0x1d88, 1); - so_data (so, (1 << 12) | h); - /* Wonder why this is needed, context should all be set to zero on init */ - so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1); - so_data (so, 0); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv30_state_framebuffer = { - .validate = nv30_state_framebuffer_validate, - .dirty = { - .pipe = NVFX_NEW_FB, - .hw = NVFX_STATE_FB - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 85d6ce98861..f78e81ac4b3 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -10,7 +10,6 @@ C_SOURCES = \ nv40_fragtex.c \ nv40_screen.c \ nv40_state.c \ - nv40_state_fb.c \ nv40_vbo.c \ nv40_vertprog.c diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index a3000eeca04..3840134ce69 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -26,7 +26,6 @@ extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; -extern struct nvfx_state_entry nv40_state_framebuffer; extern struct nvfx_state_entry nv40_state_fragtex; extern struct nvfx_state_entry nv40_state_vbo; extern struct nvfx_state_entry nv40_state_vtxfmt; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c deleted file mode 100644 index 95735e40a38..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ /dev/null @@ -1,175 +0,0 @@ -#include "nv40_context.h" -#include "nouveau/nouveau_util.h" - -static struct pipe_buffer * -nv40_do_surface_buffer(struct pipe_surface *surface) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)surface->texture; - return mt->buffer; -} - -#define nv40_surface_buffer(ps) nouveau_bo(nv40_do_surface_buffer(ps)) - -static boolean -nv40_state_framebuffer_validate(struct nvfx_context *nvfx) -{ - struct nouveau_channel *chan = nvfx->screen->base.channel; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct pipe_framebuffer_state *fb = &nvfx->framebuffer; - struct nv04_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format; - int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(18, 24, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - - rt_enable = 0; - for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { - assert(colour_format == fb->cbufs[i]->format); - } else { - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } - } - - if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | - NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV40TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | - log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_B5G6R5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, nv40_surface_buffer(&rt[0]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch); - so_reloc (so, nv40_surface_buffer(&rt[0]->base), - rt[0]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, nv40_surface_buffer(&rt[1]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, nv40_surface_buffer(&rt[1]->base), - rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_data (so, rt[1]->pitch); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, nv40_surface_buffer(&rt[2]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&rt[2]->base), - rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, nv40_surface_buffer(&rt[3]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&rt[3]->base), - rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch); - } - - if (zeta_format) { - so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); - so_reloc (so, nv40_surface_buffer(&zeta->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&zeta->base), - zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch); - } - - so_method(so, eng3d, NV40TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, eng3d, 0x1d88, 1); - so_data (so, (1 << 12) | h); - - so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); - so_ref(NULL, &so); - return TRUE; -} - -struct nvfx_state_entry nv40_state_framebuffer = { - .validate = nv40_state_framebuffer_validate, - .dirty = { - .pipe = NVFX_NEW_FB, - .hw = NVFX_STATE_FB - } -}; diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 2f198d9d1a2..2f80681e5cf 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ nvfx_miptree.c \ nvfx_query.c \ nvfx_state_blend.c \ + nvfx_state_fb.c \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ nvfx_state_stipple.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index b6e6cf8fd95..28daa1d2e7a 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -183,6 +183,7 @@ struct nvfx_state_entry { extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; +extern struct nvfx_state_entry nvfx_state_framebuffer; extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index b86cb44936a..d3088e42112 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -5,7 +5,7 @@ #define RENDER_STATES(name, nvxx, vbo) \ static struct nvfx_state_entry *name##_render_states[] = { \ - &nvxx##_state_framebuffer, \ + &nvfx_state_framebuffer, \ &nvfx_state_rasterizer, \ &nvfx_state_scissor, \ &nvfx_state_stipple, \ diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c new file mode 100644 index 00000000000..dd64ba4193c --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -0,0 +1,234 @@ +#include "nvfx_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nvfx_do_surface_buffer(struct pipe_surface *surface) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surface->texture; + return mt->buffer; +} + +#define nvfx_surface_buffer(ps) nouveau_bo(nvfx_do_surface_buffer(ps)) + +static boolean +nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) +{ + struct pipe_framebuffer_state *fb = &nvfx->framebuffer; + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nv04_surface *rt[4], *zeta = NULL; + uint32_t rt_enable = 0, rt_format = 0; + int i, colour_format = 0, zeta_format = 0; + int depth_only = 0; + struct nouveau_stateobj *so = so_new(18, 24, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + int colour_bits = 32, zeta_bits = 32; + + if(!nvfx->is_nv4x) + assert(fb->nr_cbufs <= 2); + else + assert(fb->nr_cbufs <= 4); + + for (i = 0; i < fb->nr_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = (struct nv04_surface *)fb->cbufs[i]; + } + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | + NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = (struct nv04_surface *)fb->zsbuf; + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | + NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { + /* Render to at least a colour buffer */ + if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else if (fb->zsbuf) { + depth_only = 1; + + /* Render to depth buffer only */ + if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else { + return FALSE; + } + + switch (colour_format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + colour_bits = 16; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + zeta_bits = 16; + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { + /* TODO: does this limitation really exist? + TODO: can it be worked around somehow? */ + return FALSE; + } + + if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) + || ((!nvfx->is_nv4x) && depth_only)) { + struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + uint32_t pitch = rt0->pitch; + + if(!nvfx->is_nv4x) + { + if (zeta) { + pitch |= (zeta->pitch << 16); + } else { + pitch |= (pitch << 16); + } + } + + so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); + so_reloc (so, nvfx_surface_buffer(&rt0->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); + so_data (so, pitch); + so_reloc (so, nvfx_surface_buffer(&rt[0]->base), + rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); + so_reloc (so, nvfx_surface_buffer(&rt[1]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); + so_reloc (so, nvfx_surface_buffer(&rt[1]->base), + rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_data (so, rt[1]->pitch); + } + + if(nvfx->is_nv4x) + { + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, nvfx_surface_buffer(&rt[2]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, nvfx_surface_buffer(&rt[2]->base), + rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, nvfx_surface_buffer(&rt[3]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, nvfx_surface_buffer(&rt[3]->base), + rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch); + } + } + + if (zeta_format) { + so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); + so_reloc (so, nvfx_surface_buffer(&zeta->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); + /* TODO: reverse engineer LMA */ + so_reloc (so, nvfx_surface_buffer(&zeta->base), + zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch); + } + } + + so_method(so, eng3d, NV34TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, eng3d, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + if(!nvfx->is_nv4x) { + /* Wonder why this is needed, context should all be set to zero on init */ + /* TODO: we can most likely remove this, after putting it in context init */ + so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + so_data (so, 0); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_framebuffer = { + .validate = nvfx_state_framebuffer_validate, + .dirty = { + .pipe = NVFX_NEW_FB, + .hw = NVFX_STATE_FB + } +}; -- cgit v1.2.3 From d9e396ce4a124529fa92ad967f2b3ff72534079b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 02:26:25 +0100 Subject: nv30, nv40: non-trivially partially unify nv[34]0_shader.h shader.h is similar, except for the following differences: 1. The instruction sets are not exactly the same, but mostly similar 2. Vertex program fields are in different bit positions This patch unifies all parts of nv[34]0_shader.h except the vertex program fields. Vertex opcodes are also changed so that the constant names includes SCA if it is a scalar opcode and VEC if it is a vector opcode. --- src/gallium/drivers/nv30/nv30_fragprog.c | 172 ++++++------- src/gallium/drivers/nv30/nv30_shader.h | 323 +----------------------- src/gallium/drivers/nv30/nv30_vertprog.c | 134 +++++----- src/gallium/drivers/nv40/nv40_draw.c | 2 +- src/gallium/drivers/nv40/nv40_fragprog.c | 160 ++++++------ src/gallium/drivers/nv40/nv40_shader.h | 380 +---------------------------- src/gallium/drivers/nv40/nv40_vertprog.c | 166 ++++++------- src/gallium/drivers/nvfx/nvfx_shader.h | 407 +++++++++++++++++++++++++++++++ 8 files changed, 726 insertions(+), 1018 deletions(-) create mode 100644 src/gallium/drivers/nvfx/nvfx_shader.h (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index ae246ffd647..4ce16b8f0e3 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -19,14 +19,14 @@ #define MASK_Z 4 #define MASK_W 8 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV30_FP_OP_COND_TR -#include "nv30_shader.h" +#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X +#define DEF_CTEST NVFX_FP_OP_COND_TR +#include "nvfx_shader.h" -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) -#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) #define MAX_CONSTS 128 #define MAX_IMM 32 @@ -50,21 +50,21 @@ struct nv30_fpc { } consts[MAX_CONSTS]; int nr_consts; - struct nv30_sreg imm[MAX_IMM]; + struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; }; -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg temp(struct nv30_fpc *fpc) { int idx; idx = fpc->temp_temp_count++; idx += fpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg constant(struct nv30_fpc *fpc, int pipe, float vals[4]) { int idx; @@ -76,14 +76,14 @@ constant(struct nv30_fpc *fpc, int pipe, float vals[4]) fpc->consts[idx].pipe = pipe; if (pipe == -1) memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv30_sr(NV30SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ + nv30_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ (d), (m), (s0), (s1), (s2)) #define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ + nv30_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ (d), (m), (s0), none, none) static void @@ -96,25 +96,25 @@ grow_insns(struct nv30_fpc *fpc, int size) } static void -emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +emit_src(struct nv30_fpc *fpc, int pos, struct nvfx_sreg src) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; uint32_t sr = 0; switch (src.type) { - case NV30SR_INPUT: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); break; - case NV30SR_OUTPUT: - sr |= NV30_FP_REG_SRC_HALF; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; /* fall-through */ - case NV30SR_TEMP: - sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV30_FP_REG_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; - case NV30SR_CONST: + case NVFXSR_CONST: grow_insns(fpc, 4); hw = &fp->insn[fpc->inst_offset]; if (fpc->consts[src.index].pipe >= 0) { @@ -132,61 +132,61 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); break; - case NV30SR_NONE: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); break; default: assert(0); } if (src.negate) - sr |= NV30_FP_REG_NEGATE; + sr |= NVFX_FP_REG_NEGATE; if (src.abs) hw[1] |= (1 << (29 + pos)); - sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); hw[pos + 1] |= sr; } static void -emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +emit_dst(struct nv30_fpc *fpc, struct nvfx_sreg dst) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; switch (dst.type) { - case NV30SR_TEMP: + case NVFXSR_TEMP: if (fpc->num_regs < (dst.index + 1)) fpc->num_regs = dst.index + 1; break; - case NV30SR_OUTPUT: + case NVFXSR_OUTPUT: if (dst.index == 1) { fp->fp_control |= 0xe; } else { - hw[0] |= NV30_FP_OP_OUT_REG_HALF; + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; } break; - case NV30SR_NONE: + case NVFXSR_NONE: hw[0] |= (1 << 30); break; default: assert(0); } - hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); } static void nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw; @@ -196,22 +196,22 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); - if (op == NV30_FP_OP_OPCODE_KIL) + if (op == NVFX_FP_OP_OPCODE_KIL) fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); + hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); if (sat) - hw[0] |= NV30_FP_OP_OUT_SAT; + hw[0] |= NVFX_FP_OP_OUT_SAT; if (dst.cc_update) - hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); emit_dst(fpc, dst); emit_src(fpc, 0, s0); @@ -221,25 +221,25 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, static void nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { struct nvfx_fragment_program *fp = fpc->fp; nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); + fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); fp->samplers |= (1 << unit); } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) { - struct nv30_sreg src; + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, + src = nvfx_sr(NVFXSR_INPUT, fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: @@ -250,7 +250,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); + src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index + 1); if (fpc->high_temp < src.index) fpc->high_temp = src.index; break; @@ -259,9 +259,9 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) */ case TGSI_FILE_OUTPUT: if (fsrc->Register.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); + return nvfx_sr(NVFXSR_OUTPUT, 0); else - return nv30_sr(NV30SR_OUTPUT, 1); + return nvfx_sr(NVFXSR_OUTPUT, 1); break; default: NOUVEAU_ERR("bad src file\n"); @@ -277,27 +277,27 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) return src; } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { int idx; switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: if (fdst->Register.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); + return nvfx_sr(NVFXSR_OUTPUT, 0); else - return nv30_sr(NV30SR_OUTPUT, 1); + return nvfx_sr(NVFXSR_OUTPUT, 1); break; case TGSI_FILE_TEMPORARY: idx = fdst->Register.Index + 1; if (fpc->high_temp < idx) fpc->high_temp = idx; - return nv30_sr(NV30SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); case TGSI_FILE_NULL: - return nv30_sr(NV30SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); default: NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nv30_sr(NV30SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); } } @@ -315,10 +315,10 @@ tgsi_mask(uint tgsi) static boolean src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv30_sreg *src) + struct nvfx_sreg *src) { - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); uint mask = 0; uint c; @@ -350,8 +350,8 @@ static boolean nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, const struct tgsi_full_instruction *finst) { - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg src[3], dst, tmp; + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; int mask, sat, unit = 0; int ai = -1, ci = -1; int i; @@ -435,12 +435,12 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = nv30_sr(NV30SR_NONE, 0); + tmp = nvfx_sr(NVFXSR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV30_VP_INST_COND_GE; + dst.cc_test = NVFX_VP_INST_COND_GE; arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NV30_VP_INST_COND_LT; + dst.cc_test = NVFX_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; case TGSI_OPCODE_COS: @@ -474,10 +474,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, 0, KIL, none, 0, none, none, none); break; case TGSI_OPCODE_KIL: - dst = nv30_sr(NV30SR_NONE, 0); + dst = nvfx_sr(NVFXSR_NONE, 0); dst.cc_update = 1; arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; + dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; arith(fpc, 0, KIL, dst, 0, none, none, none); break; case TGSI_OPCODE_LG2: @@ -485,7 +485,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; // case TGSI_OPCODE_LIT: case TGSI_OPCODE_LRP: - arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); + arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAD: arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); @@ -503,7 +503,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: - arith(fpc, sat, POW, dst, mask, src[0], src[1], none); + arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_RCP: arith(fpc, sat, RCP, dst, mask, src[0], none, none); @@ -512,10 +512,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, assert(0); break; case TGSI_OPCODE_RFL: - arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); + arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_RSQ: - arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: /* avoid overwriting the source */ @@ -590,25 +590,25 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV30_FP_OP_INPUT_SRC_POSITION; + hw = NVFX_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.Index == 0) { - hw = NV30_FP_OP_INPUT_SRC_COL0; + hw = NVFX_FP_OP_INPUT_SRC_COL0; } else if (fdec->Semantic.Index == 1) { - hw = NV30_FP_OP_INPUT_SRC_COL1; + hw = NVFX_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV30_FP_OP_INPUT_SRC_FOGC; + hw = NVFX_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: if (fdec->Semantic.Index <= 7) { - hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); @@ -702,7 +702,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) tgsi_parse_free(&p); /*if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) fpc->r_temp[i] = temp(fpc); fpc->r_temps_discard = 0; diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h index dd3a36f78f3..f19efb5aa4d 100644 --- a/src/gallium/drivers/nv30/nv30_shader.h +++ b/src/gallium/drivers/nv30/nv30_shader.h @@ -72,14 +72,6 @@ #define NV30_VP_INST_COND_TEST_ENABLE (1<<14) #define NV30_VP_INST_COND_SHIFT 11 #define NV30_VP_INST_COND_MASK (0x07 << 11) -# define NV30_VP_INST_COND_FL 0 /* guess */ -# define NV30_VP_INST_COND_LT 1 -# define NV30_VP_INST_COND_EQ 2 -# define NV30_VP_INST_COND_LE 3 -# define NV30_VP_INST_COND_GT 4 -# define NV30_VP_INST_COND_NE 5 -# define NV30_VP_INST_COND_GE 6 -# define NV30_VP_INST_COND_TR 7 /* guess */ #define NV30_VP_INST_COND_SWZ_X_SHIFT 9 #define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) #define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 @@ -98,59 +90,12 @@ /* DWORD 1 */ #define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 #define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) -# define NV30_VP_INST_OP_NOP 0x00 -# define NV30_VP_INST_OP_RCP 0x02 -# define NV30_VP_INST_OP_RCC 0x03 -# define NV30_VP_INST_OP_RSQ 0x04 -# define NV30_VP_INST_OP_EXP 0x05 -# define NV30_VP_INST_OP_LOG 0x06 -# define NV30_VP_INST_OP_LIT 0x07 -# define NV30_VP_INST_OP_BRA 0x09 -# define NV30_VP_INST_OP_CAL 0x0B -# define NV30_VP_INST_OP_RET 0x0C -# define NV30_VP_INST_OP_LG2 0x0D -# define NV30_VP_INST_OP_EX2 0x0E -# define NV30_VP_INST_OP_SIN 0x0F -# define NV30_VP_INST_OP_COS 0x10 #define NV30_VP_INST_VEC_OPCODE_SHIFT 23 #define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) -# define NV30_VP_INST_OP_NOPV 0x00 -# define NV30_VP_INST_OP_MOV 0x01 -# define NV30_VP_INST_OP_MUL 0x02 -# define NV30_VP_INST_OP_ADD 0x03 -# define NV30_VP_INST_OP_MAD 0x04 -# define NV30_VP_INST_OP_DP3 0x05 -# define NV30_VP_INST_OP_DP4 0x07 -# define NV30_VP_INST_OP_DPH 0x06 -# define NV30_VP_INST_OP_DST 0x08 -# define NV30_VP_INST_OP_MIN 0x09 -# define NV30_VP_INST_OP_MAX 0x0A -# define NV30_VP_INST_OP_SLT 0x0B -# define NV30_VP_INST_OP_SGE 0x0C -# define NV30_VP_INST_OP_ARL 0x0D -# define NV30_VP_INST_OP_FRC 0x0E -# define NV30_VP_INST_OP_FLR 0x0F -# define NV30_VP_INST_OP_SEQ 0x10 -# define NV30_VP_INST_OP_SFL 0x11 -# define NV30_VP_INST_OP_SGT 0x12 -# define NV30_VP_INST_OP_SLE 0x13 -# define NV30_VP_INST_OP_SNE 0x14 -# define NV30_VP_INST_OP_STR 0x15 -# define NV30_VP_INST_OP_SSG 0x16 -# define NV30_VP_INST_OP_ARR 0x17 -# define NV30_VP_INST_OP_ARA 0x18 #define NV30_VP_INST_CONST_SRC_SHIFT 14 #define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) #define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ #define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ -# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ -# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ -# define NV30_VP_INST_IN_NORMAL 2 -# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ -# define NV30_VP_INST_IN_COL1 4 -# define NV30_VP_INST_IN_FOGC 5 -# define NV30_VP_INST_IN_TC0 8 -# define NV30_VP_INST_IN_TC(n) (8+n) #define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ #define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ @@ -190,8 +135,6 @@ # define NV30_VP_INST_DEST_PSZ 6 # define NV30_VP_INST_DEST_TC(n) (8+n) -#define NV30_VP_INST_LAST (1 << 0) - /* Useful to split the source selection regs into their pieces */ #define NV30_VP_SRC0_HIGH_SHIFT 6 #define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 @@ -221,270 +164,6 @@ #define NV30_VP_SRC_REG_TYPE_INPUT 2 #define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 - * otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO - * is implemented simply by not writing to the relevant components of the destination. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - */ - -//== Opcode / Destination selection == -#define NV30_FP_OP_PROGRAM_END (1 << 0) -#define NV30_FP_OP_OUT_REG_SHIFT 1 -#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ -/* Needs to be set when writing outputs to get expected result.. */ -#define NV30_FP_OP_OUT_REG_HALF (1 << 7) -#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV30_FP_OP_OUTMASK_SHIFT 9 -#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV30_FP_OP_OUT_X (1<<9) -# define NV30_FP_OP_OUT_Y (1<<10) -# define NV30_FP_OP_OUT_Z (1<<11) -# define NV30_FP_OP_OUT_W (1<<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV30_FP_OP_INPUT_SRC_SHIFT 13 -#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV30_FP_OP_INPUT_SRC_COL0 0x1 -# define NV30_FP_OP_INPUT_SRC_COL1 0x2 -# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV30_FP_OP_INPUT_SRC_TC0 0x4 -# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -#define NV30_FP_OP_TEX_UNIT_SHIFT 17 -#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ -#define NV30_FP_OP_PRECISION_SHIFT 22 -#define NV30_FP_OP_PRECISION_MASK (3 << 22) -# define NV30_FP_PRECISION_FP32 0 -# define NV30_FP_PRECISION_FP16 1 -# define NV30_FP_PRECISION_FX12 2 -#define NV30_FP_OP_OPCODE_SHIFT 24 -#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV30_FP_OP_OPCODE_NOP 0x00 -# define NV30_FP_OP_OPCODE_MOV 0x01 -# define NV30_FP_OP_OPCODE_MUL 0x02 -# define NV30_FP_OP_OPCODE_ADD 0x03 -# define NV30_FP_OP_OPCODE_MAD 0x04 -# define NV30_FP_OP_OPCODE_DP3 0x05 -# define NV30_FP_OP_OPCODE_DP4 0x06 -# define NV30_FP_OP_OPCODE_DST 0x07 -# define NV30_FP_OP_OPCODE_MIN 0x08 -# define NV30_FP_OP_OPCODE_MAX 0x09 -# define NV30_FP_OP_OPCODE_SLT 0x0A -# define NV30_FP_OP_OPCODE_SGE 0x0B -# define NV30_FP_OP_OPCODE_SLE 0x0C -# define NV30_FP_OP_OPCODE_SGT 0x0D -# define NV30_FP_OP_OPCODE_SNE 0x0E -# define NV30_FP_OP_OPCODE_SEQ 0x0F -# define NV30_FP_OP_OPCODE_FRC 0x10 -# define NV30_FP_OP_OPCODE_FLR 0x11 -# define NV30_FP_OP_OPCODE_KIL 0x12 -# define NV30_FP_OP_OPCODE_PK4B 0x13 -# define NV30_FP_OP_OPCODE_UP4B 0x14 -# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ -# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ -# define NV30_FP_OP_OPCODE_TEX 0x17 -# define NV30_FP_OP_OPCODE_TXP 0x18 -# define NV30_FP_OP_OPCODE_TXD 0x19 -# define NV30_FP_OP_OPCODE_RCP 0x1A -# define NV30_FP_OP_OPCODE_RSQ 0x1B -# define NV30_FP_OP_OPCODE_EX2 0x1C -# define NV30_FP_OP_OPCODE_LG2 0x1D -# define NV30_FP_OP_OPCODE_LIT 0x1E -# define NV30_FP_OP_OPCODE_LRP 0x1F -# define NV30_FP_OP_OPCODE_STR 0x20 -# define NV30_FP_OP_OPCODE_SFL 0x21 -# define NV30_FP_OP_OPCODE_COS 0x22 -# define NV30_FP_OP_OPCODE_SIN 0x23 -# define NV30_FP_OP_OPCODE_PK2H 0x24 -# define NV30_FP_OP_OPCODE_UP2H 0x25 -# define NV30_FP_OP_OPCODE_POW 0x26 -# define NV30_FP_OP_OPCODE_PK4UB 0x27 -# define NV30_FP_OP_OPCODE_UP4UB 0x28 -# define NV30_FP_OP_OPCODE_PK2US 0x29 -# define NV30_FP_OP_OPCODE_UP2US 0x2A -# define NV30_FP_OP_OPCODE_DP2A 0x2E -# define NV30_FP_OP_OPCODE_TXB 0x31 -# define NV30_FP_OP_OPCODE_RFL 0x36 -# define NV30_FP_OP_OPCODE_DIV 0x3A -#define NV30_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV30_FP_OP_OUT_ABS (1 << 29) -#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV30_FP_OP_COND_SHIFT 18 -#define NV30_FP_OP_COND_MASK (0x07 << 18) -# define NV30_FP_OP_COND_FL 0 -# define NV30_FP_OP_COND_LT 1 -# define NV30_FP_OP_COND_EQ 2 -# define NV30_FP_OP_COND_LE 3 -# define NV30_FP_OP_COND_GT 4 -# define NV30_FP_OP_COND_NE 5 -# define NV30_FP_OP_COND_GE 6 -# define NV30_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV30_FP_OP_DST_SCALE_SHIFT 28 -#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV30_FP_OP_DST_SCALE_1X 0 -#define NV30_FP_OP_DST_SCALE_2X 1 -#define NV30_FP_OP_DST_SCALE_4X 2 -#define NV30_FP_OP_DST_SCALE_8X 3 -#define NV30_FP_OP_DST_SCALE_INV_2X 5 -#define NV30_FP_OP_DST_SCALE_INV_4X 6 -#define NV30_FP_OP_DST_SCALE_INV_8X 7 - - -/* high order bits of SRC2 */ -#define NV30_FP_OP_INDEX_INPUT (1 << 30) - -//== Register selection == -#define NV30_FP_REG_TYPE_SHIFT 0 -#define NV30_FP_REG_TYPE_MASK (3 << 0) -# define NV30_FP_REG_TYPE_TEMP 0 -# define NV30_FP_REG_TYPE_INPUT 1 -# define NV30_FP_REG_TYPE_CONST 2 -#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ -#define NV30_FP_REG_SRC_MASK (31 << 2) -#define NV30_FP_REG_SRC_HALF (1 << 8) -#define NV30_FP_REG_SWZ_ALL_SHIFT 9 -#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV30_FP_REG_SWZ_X_SHIFT 9 -#define NV30_FP_REG_SWZ_X_MASK (3 << 9) -#define NV30_FP_REG_SWZ_Y_SHIFT 11 -#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV30_FP_REG_SWZ_Z_SHIFT 13 -#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV30_FP_REG_SWZ_W_SHIFT 15 -#define NV30_FP_REG_SWZ_W_MASK (3 << 15) -# define NV30_FP_SWIZZLE_X 0 -# define NV30_FP_SWIZZLE_Y 1 -# define NV30_FP_SWIZZLE_Z 2 -# define NV30_FP_SWIZZLE_W 3 -#define NV30_FP_REG_NEGATE (1 << 17) - -#define NV30SR_NONE 0 -#define NV30SR_OUTPUT 1 -#define NV30SR_INPUT 2 -#define NV30SR_TEMP 3 -#define NV30SR_CONST 4 - -struct nv30_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv30_sreg -nv30_sr(int type, int index) -{ - struct nv30_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv30_sreg -nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) -{ - struct nv30_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv30_sreg -nv30_sr_neg(struct nv30_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_abs(struct nv30_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_scale(struct nv30_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} +#include "nvfx_shader.h" #endif diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index cf910e34b11..ec6d63889bc 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -35,9 +35,9 @@ #define DEF_CTEST 0 #include "nv30_shader.h" -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) struct nv30_vpc { struct nvfx_vertex_program *vp; @@ -49,21 +49,21 @@ struct nv30_vpc { int high_temp; int temp_temp_count; - struct nv30_sreg *imm; + struct nvfx_sreg *imm; unsigned nr_imm; }; -static struct nv30_sreg +static struct nvfx_sreg temp(struct nv30_vpc *vpc) { int idx; idx = vpc->temp_temp_count++; idx += vpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } -static struct nv30_sreg +static struct nvfx_sreg constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) { struct nvfx_vertex_program *vp = vpc->vp; @@ -73,7 +73,7 @@ constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) if (pipe >= 0) { for (idx = 0; idx < vp->nr_consts; idx++) { if (vp->consts[idx].index == pipe) - return nv30_sr(NV30SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } } @@ -86,37 +86,37 @@ constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) vpd->value[1] = y; vpd->value[2] = z; vpd->value[3] = w; - return nv30_sr(NV30SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + nv30_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) +emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) { struct nvfx_vertex_program *vp = vpc->vp; uint32_t sr = 0; switch (src.type) { - case NV30SR_TEMP: + case NVFXSR_TEMP: sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); break; - case NV30SR_INPUT: + case NVFXSR_INPUT: sr |= (NV30_VP_SRC_REG_TYPE_INPUT << NV30_VP_SRC_REG_TYPE_SHIFT); vp->ir |= (1 << src.index); hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); break; - case NV30SR_CONST: + case NVFXSR_CONST: sr |= (NV30_VP_SRC_REG_TYPE_CONST << NV30_VP_SRC_REG_TYPE_SHIFT); assert(vpc->vpi->const_index == -1 || vpc->vpi->const_index == src.index); vpc->vpi->const_index = src.index; break; - case NV30SR_NONE: + case NVFXSR_NONE: sr |= (NV30_VP_SRC_REG_TYPE_INPUT << NV30_VP_SRC_REG_TYPE_SHIFT); break; @@ -164,15 +164,15 @@ emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) } static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) +emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) { struct nvfx_vertex_program *vp = vpc->vp; switch (dst.type) { - case NV30SR_TEMP: + case NVFXSR_TEMP: hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); break; - case NV30SR_OUTPUT: + case NVFXSR_OUTPUT: switch (dst.index) { case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; @@ -207,9 +207,9 @@ emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) static void nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, - struct nv30_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, + struct nvfx_sreg s2) { struct nvfx_vertex_program *vp = vpc->vp; uint32_t *hw; @@ -221,7 +221,7 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, hw = vpc->vpi->data; - hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); + hw[0] |= (NVFX_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | @@ -231,7 +231,7 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, // hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; // hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - if (dst.type == NV30SR_OUTPUT) { + if (dst.type == NVFXSR_OUTPUT) { if (slot) hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); else @@ -249,13 +249,13 @@ nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, emit_src(vpc, hw, 2, s2); } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv30_sreg src; + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); + src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); @@ -266,7 +266,7 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { case TGSI_FILE_TEMPORARY: if (vpc->high_temp < fsrc->Register.Index) vpc->high_temp = fsrc->Register.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); + src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index); break; default: NOUVEAU_ERR("bad src file\n"); @@ -282,18 +282,18 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { return src; } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv30_sreg dst; + struct nvfx_sreg dst; switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - dst = nv30_sr(NV30SR_OUTPUT, + dst = nvfx_sr(NVFXSR_OUTPUT, vpc->output_map[fdst->Register.Index]); break; case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); + dst = nvfx_sr(NVFXSR_TEMP, fdst->Register.Index); if (vpc->high_temp < dst.index) vpc->high_temp = dst.index; break; @@ -321,8 +321,8 @@ static boolean nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, const struct tgsi_full_instruction *finst) { - struct nv30_sreg src[3], dst, tmp; - struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int mask; int ai = -1, ci = -1; int i; @@ -351,7 +351,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -365,7 +365,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -383,96 +383,96 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); break; case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); break; case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + arith(vpc, VEC, ARL, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + arith(vpc, VEC, FLR, dst, mask, src[0], none, none); break; case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + arith(vpc, VEC, FRC, dst, mask, src[0], none, none); break; case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + arith(vpc, VEC, MOV, dst, mask, src[0], none, none); break; case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + arith(vpc, SCA, LG2, tmp, MASK_X, none, none, swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, + arith(vpc, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); break; case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_RET: break; case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + arith(vpc, SCA, RSQ, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SGT: - arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); break; case TGSI_OPCODE_XPD: tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, + arith(vpc, VEC, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -564,7 +564,7 @@ nv30_vertprog_prepare(struct nv30_vpc *vpc) tgsi_parse_free(&p); if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); assert(vpc->imm); } @@ -639,7 +639,7 @@ nv30_vertprog_translate(struct nvfx_context *nvfx, } } - vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; vp->translated = TRUE; out_err: tgsi_parse_free(&parse); diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 87d2689d54b..4ed87779fd6 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -8,7 +8,7 @@ #include "draw/draw_pipe.h" #include "nv40_context.h" -#define NV40_SHADER_NO_FUCKEDNESS +#define NVFX_SHADER_NO_FUCKEDNESS #include "nv40_shader.h" /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 2a0ab0cf310..e044f367a0b 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -18,14 +18,14 @@ #define MASK_Z 4 #define MASK_W 8 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV40_FP_OP_COND_TR -#include "nv40_shader.h" +#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X +#define DEF_CTEST NVFX_FP_OP_COND_TR +#include "nvfx_shader.h" -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) -#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) #define MAX_CONSTS 128 #define MAX_IMM 32 @@ -36,8 +36,8 @@ struct nv40_fpc { unsigned r_temps; unsigned r_temps_discard; - struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nv40_sreg *r_temp; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_temp; int num_regs; @@ -50,11 +50,11 @@ struct nv40_fpc { } consts[MAX_CONSTS]; int nr_consts; - struct nv40_sreg imm[MAX_IMM]; + struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; }; -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg temp(struct nv40_fpc *fpc) { int idx = ffs(~fpc->r_temps) - 1; @@ -62,12 +62,12 @@ temp(struct nv40_fpc *fpc) if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nv40_sr(NV40SR_TEMP, 0); + return nvfx_sr(NVFXSR_TEMP, 0); } fpc->r_temps |= (1 << idx); fpc->r_temps_discard |= (1 << idx); - return nv40_sr(NV40SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } static INLINE void @@ -77,7 +77,7 @@ release_temps(struct nv40_fpc *fpc) fpc->r_temps_discard = 0; } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg constant(struct nv40_fpc *fpc, int pipe, float vals[4]) { int idx; @@ -89,14 +89,14 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4]) fpc->consts[idx].pipe = pipe; if (pipe == -1) memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + nv40_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ (d), (m), (s0), (s1), (s2)) #define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + nv40_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ (d), (m), (s0), none, none) static void @@ -109,25 +109,25 @@ grow_insns(struct nv40_fpc *fpc, int size) } static void -emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +emit_src(struct nv40_fpc *fpc, int pos, struct nvfx_sreg src) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; uint32_t sr = 0; switch (src.type) { - case NV40SR_INPUT: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); break; - case NV40SR_OUTPUT: - sr |= NV40_FP_REG_SRC_HALF; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; /* fall-through */ - case NV40SR_TEMP: - sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; - case NV40SR_CONST: + case NVFXSR_CONST: if (!fpc->have_const) { grow_insns(fpc, 4); fpc->have_const = 1; @@ -149,61 +149,61 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); break; - case NV40SR_NONE: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); break; default: assert(0); } if (src.negate) - sr |= NV40_FP_REG_NEGATE; + sr |= NVFX_FP_REG_NEGATE; if (src.abs) hw[1] |= (1 << (29 + pos)); - sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); hw[pos + 1] |= sr; } static void -emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +emit_dst(struct nv40_fpc *fpc, struct nvfx_sreg dst) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; switch (dst.type) { - case NV40SR_TEMP: + case NVFXSR_TEMP: if (fpc->num_regs < (dst.index + 1)) fpc->num_regs = dst.index + 1; break; - case NV40SR_OUTPUT: + case NVFXSR_OUTPUT: if (dst.index == 1) { fp->fp_control |= 0xe; } else { - hw[0] |= NV40_FP_OP_OUT_REG_HALF; + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; } break; - case NV40SR_NONE: + case NVFXSR_NONE: hw[0] |= (1 << 30); break; default: assert(0); } - hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); } static void nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw; @@ -214,22 +214,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); - if (op == NV40_FP_OP_OPCODE_KIL) + if (op == NVFX_FP_OP_OPCODE_KIL) fp->fp_control |= NV40TCL_FP_CONTROL_KIL; - hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); if (sat) - hw[0] |= NV40_FP_OP_OUT_SAT; + hw[0] |= NVFX_FP_OP_OUT_SAT; if (dst.cc_update) - hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); emit_dst(fpc, dst); emit_src(fpc, 0, s0); @@ -239,25 +239,25 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, static void nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { struct nvfx_fragment_program *fp = fpc->fp; nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); fp->samplers |= (1 << unit); } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, + src = nvfx_sr(NVFXSR_INPUT, fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: @@ -288,7 +288,7 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) return src; } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: @@ -296,10 +296,10 @@ tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { case TGSI_FILE_TEMPORARY: return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: - return nv40_sr(NV40SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); default: NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nv40_sr(NV40SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); } } @@ -317,10 +317,10 @@ tgsi_mask(uint tgsi) static boolean src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) + struct nvfx_sreg *src) { - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); uint mask = 0; uint c; @@ -352,8 +352,8 @@ static boolean nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, const struct tgsi_full_instruction *finst) { - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg src[3], dst, tmp; + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; int mask, sat, unit; int ai = -1, ci = -1, ii = -1; int i; @@ -445,12 +445,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = nv40_sr(NV40SR_NONE, 0); + tmp = nvfx_sr(NVFXSR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV40_VP_INST_COND_GE; + dst.cc_test = NVFX_VP_INST_COND_GE; arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NV40_VP_INST_COND_LT; + dst.cc_test = NVFX_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; case TGSI_OPCODE_COS: @@ -512,10 +512,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, 0, KIL, none, 0, none, none, none); break; case TGSI_OPCODE_KIL: - dst = nv40_sr(NV40SR_NONE, 0); + dst = nvfx_sr(NVFXSR_NONE, 0); dst.cc_update = 1; arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; arith(fpc, 0, KIL, dst, 0, none, none, none); break; case TGSI_OPCODE_LG2: @@ -662,25 +662,25 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV40_FP_OP_INPUT_SRC_POSITION; + hw = NVFX_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_FP_OP_INPUT_SRC_COL0; + hw = NVFX_FP_OP_INPUT_SRC_COL0; } else if (fdec->Semantic.Index == 1) { - hw = NV40_FP_OP_INPUT_SRC_COL1; + hw = NVFX_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV40_FP_OP_INPUT_SRC_FOGC; + hw = NVFX_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: if (fdec->Semantic.Index <= 7) { - hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); @@ -723,7 +723,7 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, return FALSE; } - fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); fpc->r_temps |= (1 << hw); return TRUE; } @@ -787,7 +787,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) tgsi_parse_free(&p); if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) fpc->r_temp[i] = temp(fpc); fpc->r_temps_discard = 0; diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h index 854dccf5486..8d28137e9de 100644 --- a/src/gallium/drivers/nv40/nv40_shader.h +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -48,14 +48,6 @@ #define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) #define NV40_VP_INST_COND_SHIFT 10 #define NV40_VP_INST_COND_MASK (0x7 << 10) -# define NV40_VP_INST_COND_FL 0 -# define NV40_VP_INST_COND_LT 1 -# define NV40_VP_INST_COND_EQ 2 -# define NV40_VP_INST_COND_LE 3 -# define NV40_VP_INST_COND_GT 4 -# define NV40_VP_INST_COND_NE 5 -# define NV40_VP_INST_COND_GE 6 -# define NV40_VP_INST_COND_TR 7 #define NV40_VP_INST_COND_SWZ_X_SHIFT 8 #define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) #define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 @@ -84,63 +76,12 @@ /* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ #define NV40_VP_INST_VEC_OPCODE_SHIFT 22 #define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_MUL 0x02 -# define NV40_VP_INST_OP_ADD 0x03 -# define NV40_VP_INST_OP_MAD 0x04 -# define NV40_VP_INST_OP_DP3 0x05 -# define NV40_VP_INST_OP_DPH 0x06 -# define NV40_VP_INST_OP_DP4 0x07 -# define NV40_VP_INST_OP_DST 0x08 -# define NV40_VP_INST_OP_MIN 0x09 -# define NV40_VP_INST_OP_MAX 0x0A -# define NV40_VP_INST_OP_SLT 0x0B -# define NV40_VP_INST_OP_SGE 0x0C -# define NV40_VP_INST_OP_ARL 0x0D -# define NV40_VP_INST_OP_FRC 0x0E -# define NV40_VP_INST_OP_FLR 0x0F -# define NV40_VP_INST_OP_SEQ 0x10 -# define NV40_VP_INST_OP_SFL 0x11 -# define NV40_VP_INST_OP_SGT 0x12 -# define NV40_VP_INST_OP_SLE 0x13 -# define NV40_VP_INST_OP_SNE 0x14 -# define NV40_VP_INST_OP_STR 0x15 -# define NV40_VP_INST_OP_SSG 0x16 -# define NV40_VP_INST_OP_ARR 0x17 -# define NV40_VP_INST_OP_ARA 0x18 -# define NV40_VP_INST_OP_TXL 0x19 #define NV40_VP_INST_SCA_OPCODE_SHIFT 27 #define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_RCP 0x02 -# define NV40_VP_INST_OP_RCC 0x03 -# define NV40_VP_INST_OP_RSQ 0x04 -# define NV40_VP_INST_OP_EXP 0x05 -# define NV40_VP_INST_OP_LOG 0x06 -# define NV40_VP_INST_OP_LIT 0x07 -# define NV40_VP_INST_OP_BRA 0x09 -# define NV40_VP_INST_OP_CAL 0x0B -# define NV40_VP_INST_OP_RET 0x0C -# define NV40_VP_INST_OP_LG2 0x0D -# define NV40_VP_INST_OP_EX2 0x0E -# define NV40_VP_INST_OP_SIN 0x0F -# define NV40_VP_INST_OP_COS 0x10 -# define NV40_VP_INST_OP_PUSHA 0x13 -# define NV40_VP_INST_OP_POPA 0x14 #define NV40_VP_INST_CONST_SRC_SHIFT 12 #define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) #define NV40_VP_INST_INPUT_SRC_SHIFT 8 #define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) -# define NV40_VP_INST_IN_POS 0 -# define NV40_VP_INST_IN_WEIGHT 1 -# define NV40_VP_INST_IN_NORMAL 2 -# define NV40_VP_INST_IN_COL0 3 -# define NV40_VP_INST_IN_COL1 4 -# define NV40_VP_INST_IN_FOGC 5 -# define NV40_VP_INST_IN_TC0 8 -# define NV40_VP_INST_IN_TC(n) (8+n) #define NV40_VP_INST_SRC0H_SHIFT 0 #define NV40_VP_INST_SRC0H_MASK (0xFF << 0) #define NV40_VP_INST1_KNOWN ( \ @@ -194,7 +135,6 @@ # define NV40_VP_INST_DEST_TC(n) (7+n) # define NV40_VP_INST_DEST_TEMP 0x1F #define NV40_VP_INST_INDEX_CONST (1 << 1) -#define NV40_VP_INST_LAST (1 << 0) #define NV40_VP_INST3_KNOWN ( \ NV40_VP_INST_SRC2L_MASK |\ NV40_VP_INST_SCA_WRITEMASK_MASK |\ @@ -232,325 +172,7 @@ # define NV40_VP_SRC_REG_TYPE_INPUT 2 # define NV40_VP_SRC_REG_TYPE_CONST 3 +#include "nvfx_shader.h" -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, - * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and - * SWIZZLE_ONE. - * - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as - * SWIZZLE_ZERO is implemented simply by not writing to the relevant components - * of the destination. - * - * Looping - * Loops appear to be fairly expensive on NV40 at least, the proprietary - * driver goes to a lot of effort to avoid using the native looping - * instructions. If the total number of *executed* instructions between - * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. - * The maximum loop count is 255. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - * DP2 - MUL + ADD - * NRM - */ - -//== Opcode / Destination selection == -#define NV40_FP_OP_PROGRAM_END (1 << 0) -#define NV40_FP_OP_OUT_REG_SHIFT 1 -#define NV40_FP_OP_OUT_REG_MASK (63 << 1) -/* Needs to be set when writing outputs to get expected result.. */ -#define NV40_FP_OP_OUT_REG_HALF (1 << 7) -#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV40_FP_OP_OUTMASK_SHIFT 9 -#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV40_FP_OP_OUT_X (1 << 9) -# define NV40_FP_OP_OUT_Y (1 <<10) -# define NV40_FP_OP_OUT_Z (1 <<11) -# define NV40_FP_OP_OUT_W (1 <<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV40_FP_OP_INPUT_SRC_SHIFT 13 -#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV40_FP_OP_INPUT_SRC_COL0 0x1 -# define NV40_FP_OP_INPUT_SRC_COL1 0x2 -# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV40_FP_OP_INPUT_SRC_TC0 0x4 -# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -# define NV40_FP_OP_INPUT_SRC_FACING 0xE -#define NV40_FP_OP_TEX_UNIT_SHIFT 17 -#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) -#define NV40_FP_OP_PRECISION_SHIFT 22 -#define NV40_FP_OP_PRECISION_MASK (3 << 22) -# define NV40_FP_PRECISION_FP32 0 -# define NV40_FP_PRECISION_FP16 1 -# define NV40_FP_PRECISION_FX12 2 -#define NV40_FP_OP_OPCODE_SHIFT 24 -#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV40_FP_OP_OPCODE_NOP 0x00 -# define NV40_FP_OP_OPCODE_MOV 0x01 -# define NV40_FP_OP_OPCODE_MUL 0x02 -# define NV40_FP_OP_OPCODE_ADD 0x03 -# define NV40_FP_OP_OPCODE_MAD 0x04 -# define NV40_FP_OP_OPCODE_DP3 0x05 -# define NV40_FP_OP_OPCODE_DP4 0x06 -# define NV40_FP_OP_OPCODE_DST 0x07 -# define NV40_FP_OP_OPCODE_MIN 0x08 -# define NV40_FP_OP_OPCODE_MAX 0x09 -# define NV40_FP_OP_OPCODE_SLT 0x0A -# define NV40_FP_OP_OPCODE_SGE 0x0B -# define NV40_FP_OP_OPCODE_SLE 0x0C -# define NV40_FP_OP_OPCODE_SGT 0x0D -# define NV40_FP_OP_OPCODE_SNE 0x0E -# define NV40_FP_OP_OPCODE_SEQ 0x0F -# define NV40_FP_OP_OPCODE_FRC 0x10 -# define NV40_FP_OP_OPCODE_FLR 0x11 -# define NV40_FP_OP_OPCODE_KIL 0x12 -# define NV40_FP_OP_OPCODE_PK4B 0x13 -# define NV40_FP_OP_OPCODE_UP4B 0x14 -/* DDX/DDY can only write to XY */ -# define NV40_FP_OP_OPCODE_DDX 0x15 -# define NV40_FP_OP_OPCODE_DDY 0x16 -# define NV40_FP_OP_OPCODE_TEX 0x17 -# define NV40_FP_OP_OPCODE_TXP 0x18 -# define NV40_FP_OP_OPCODE_TXD 0x19 -# define NV40_FP_OP_OPCODE_RCP 0x1A -# define NV40_FP_OP_OPCODE_EX2 0x1C -# define NV40_FP_OP_OPCODE_LG2 0x1D -# define NV40_FP_OP_OPCODE_STR 0x20 -# define NV40_FP_OP_OPCODE_SFL 0x21 -# define NV40_FP_OP_OPCODE_COS 0x22 -# define NV40_FP_OP_OPCODE_SIN 0x23 -# define NV40_FP_OP_OPCODE_PK2H 0x24 -# define NV40_FP_OP_OPCODE_UP2H 0x25 -# define NV40_FP_OP_OPCODE_PK4UB 0x27 -# define NV40_FP_OP_OPCODE_UP4UB 0x28 -# define NV40_FP_OP_OPCODE_PK2US 0x29 -# define NV40_FP_OP_OPCODE_UP2US 0x2A -# define NV40_FP_OP_OPCODE_DP2A 0x2E -# define NV40_FP_OP_OPCODE_TXL 0x2F -# define NV40_FP_OP_OPCODE_TXB 0x31 -# define NV40_FP_OP_OPCODE_DIV 0x3A -# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C -/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ -# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 -# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 -# define NV40_FP_OP_BRA_OPCODE_IF 0x2 -# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 -# define NV40_FP_OP_BRA_OPCODE_REP 0x4 -# define NV40_FP_OP_BRA_OPCODE_RET 0x5 -#define NV40_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV40_FP_OP_OUT_ABS (1 << 29) -#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV40_FP_OP_COND_SHIFT 18 -#define NV40_FP_OP_COND_MASK (0x07 << 18) -# define NV40_FP_OP_COND_FL 0 -# define NV40_FP_OP_COND_LT 1 -# define NV40_FP_OP_COND_EQ 2 -# define NV40_FP_OP_COND_LE 3 -# define NV40_FP_OP_COND_GT 4 -# define NV40_FP_OP_COND_NE 5 -# define NV40_FP_OP_COND_GE 6 -# define NV40_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) -#define NV40_FP_OP_DST_SCALE_SHIFT 28 -#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV40_FP_OP_DST_SCALE_1X 0 -#define NV40_FP_OP_DST_SCALE_2X 1 -#define NV40_FP_OP_DST_SCALE_4X 2 -#define NV40_FP_OP_DST_SCALE_8X 3 -#define NV40_FP_OP_DST_SCALE_INV_2X 5 -#define NV40_FP_OP_DST_SCALE_INV_4X 6 -#define NV40_FP_OP_DST_SCALE_INV_8X 7 - -/* SRC1 LOOP */ -#define NV40_FP_OP_LOOP_INCR_SHIFT 19 -#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) -#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 -#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) -#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 -#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) - -/* SRC1 IF */ -#define NV40_FP_OP_ELSE_ID_SHIFT 2 -#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) - -/* SRC1 CAL */ -#define NV40_FP_OP_IADDR_SHIFT 2 -#define NV40_FP_OP_IADDR_MASK (0xFF << 2) - -/* SRC1 REP - * I have no idea why there are 3 count values here.. but they - * have always been filled with the same value in my tests so - * far.. - */ -#define NV40_FP_OP_REP_COUNT1_SHIFT 2 -#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) -#define NV40_FP_OP_REP_COUNT2_SHIFT 10 -#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) -#define NV40_FP_OP_REP_COUNT3_SHIFT 19 -#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) - -/* SRC2 REP/IF */ -#define NV40_FP_OP_END_ID_SHIFT 2 -#define NV40_FP_OP_END_ID_MASK (0xFF << 2) - -// SRC2 high-order -#define NV40_FP_OP_INDEX_INPUT (1 << 30) -#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 -#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) - -//== Register selection == -#define NV40_FP_REG_TYPE_SHIFT 0 -#define NV40_FP_REG_TYPE_MASK (3 << 0) -# define NV40_FP_REG_TYPE_TEMP 0 -# define NV40_FP_REG_TYPE_INPUT 1 -# define NV40_FP_REG_TYPE_CONST 2 -#define NV40_FP_REG_SRC_SHIFT 2 -#define NV40_FP_REG_SRC_MASK (63 << 2) -#define NV40_FP_REG_SRC_HALF (1 << 8) -#define NV40_FP_REG_SWZ_ALL_SHIFT 9 -#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV40_FP_REG_SWZ_X_SHIFT 9 -#define NV40_FP_REG_SWZ_X_MASK (3 << 9) -#define NV40_FP_REG_SWZ_Y_SHIFT 11 -#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV40_FP_REG_SWZ_Z_SHIFT 13 -#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV40_FP_REG_SWZ_W_SHIFT 15 -#define NV40_FP_REG_SWZ_W_MASK (3 << 15) -# define NV40_FP_SWIZZLE_X 0 -# define NV40_FP_SWIZZLE_Y 1 -# define NV40_FP_SWIZZLE_Z 2 -# define NV40_FP_SWIZZLE_W 3 -#define NV40_FP_REG_NEGATE (1 << 17) - -#ifndef NV40_SHADER_NO_FUCKEDNESS -#define NV40SR_NONE 0 -#define NV40SR_OUTPUT 1 -#define NV40SR_INPUT 2 -#define NV40SR_TEMP 3 -#define NV40SR_CONST 4 - -struct nv40_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv40_sreg -nv40_sr(int type, int index) -{ - struct nv40_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv40_sreg -nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) -{ - struct nv40_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv40_sreg -nv40_sr_neg(struct nv40_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_abs(struct nv40_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_scale(struct nv40_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} #endif -#endif diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index a199f0766e4..752cd0d1b3d 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -34,9 +34,9 @@ #define DEF_CTEST 0 #include "nv40_shader.h" -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) #define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) @@ -47,17 +47,17 @@ struct nv40_vpc { unsigned r_temps; unsigned r_temps_discard; - struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nv40_sreg *r_address; - struct nv40_sreg *r_temp; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_address; + struct nvfx_sreg *r_temp; - struct nv40_sreg *imm; + struct nvfx_sreg *imm; unsigned nr_imm; unsigned hpos_idx; }; -static struct nv40_sreg +static struct nvfx_sreg temp(struct nv40_vpc *vpc) { int idx = ffs(~vpc->r_temps) - 1; @@ -65,12 +65,12 @@ temp(struct nv40_vpc *vpc) if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nv40_sr(NV40SR_TEMP, 0); + return nvfx_sr(NVFXSR_TEMP, 0); } vpc->r_temps |= (1 << idx); vpc->r_temps_discard |= (1 << idx); - return nv40_sr(NV40SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } static INLINE void @@ -80,7 +80,7 @@ release_temps(struct nv40_vpc *vpc) vpc->r_temps_discard = 0; } -static struct nv40_sreg +static struct nvfx_sreg constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) { struct nvfx_vertex_program *vp = vpc->vp; @@ -90,7 +90,7 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) if (pipe >= 0) { for (idx = 0; idx < vp->nr_consts; idx++) { if (vp->consts[idx].index == pipe) - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } } @@ -103,37 +103,37 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) vpd->value[1] = y; vpd->value[2] = z; vpd->value[3] = w; - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + nv40_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) { struct nvfx_vertex_program *vp = vpc->vp; uint32_t sr = 0; switch (src.type) { - case NV40SR_TEMP: + case NVFXSR_TEMP: sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); break; - case NV40SR_INPUT: + case NVFXSR_INPUT: sr |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT); vp->ir |= (1 << src.index); hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); break; - case NV40SR_CONST: + case NVFXSR_CONST: sr |= (NV40_VP_SRC_REG_TYPE_CONST << NV40_VP_SRC_REG_TYPE_SHIFT); assert(vpc->vpi->const_index == -1 || vpc->vpi->const_index == src.index); vpc->vpi->const_index = src.index; break; - case NV40SR_NONE: + case NVFXSR_NONE: sr |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT); break; @@ -174,12 +174,12 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) } static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) { struct nvfx_vertex_program *vp = vpc->vp; switch (dst.type) { - case NV40SR_TEMP: + case NVFXSR_TEMP: hw[3] |= NV40_VP_INST_DEST_MASK; if (slot == 0) { hw[0] |= (dst.index << @@ -189,7 +189,7 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) NV40_VP_INST_SCA_DEST_TEMP_SHIFT); } break; - case NV40SR_OUTPUT: + case NVFXSR_OUTPUT: switch (dst.index) { case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; @@ -255,9 +255,9 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) static void nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, - struct nv40_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, + struct nvfx_sreg s2) { struct nvfx_vertex_program *vp = vpc->vp; uint32_t *hw; @@ -269,7 +269,7 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, hw = vpc->vpi->data; - hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); + hw[0] |= (NVFX_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | @@ -291,13 +291,13 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, emit_src(vpc, hw, 2, s2); } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); + src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); @@ -322,9 +322,9 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { return src; } -static INLINE struct nv40_sreg +static INLINE struct nvfx_sreg tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv40_sreg dst; + struct nvfx_sreg dst; switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: @@ -358,10 +358,10 @@ tgsi_mask(uint tgsi) static boolean src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) + struct nvfx_sreg *src) { - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc); uint mask = 0; uint c; @@ -384,7 +384,7 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, *src = temp(vpc); if (mask) - arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + arith(vpc, VEC, MOV, *src, mask, tgsi, none, none); return FALSE; } @@ -393,8 +393,8 @@ static boolean nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, const struct tgsi_full_instruction *finst) { - struct nv40_sreg src[3], dst, tmp; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int mask; int ai = -1, ci = -1, ii = -1; int i; @@ -434,7 +434,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -445,7 +445,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -456,7 +456,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -474,93 +474,93 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); break; case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); break; case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + arith(vpc, VEC, ARL, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + arith(vpc, VEC, FLR, dst, mask, src[0], none, none); break; case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + arith(vpc, VEC, FRC, dst, mask, src[0], none, none); break; case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + arith(vpc, VEC, MOV, dst, mask, src[0], none, none); break; case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + arith(vpc, SCA, LG2, tmp, MASK_X, none, none, swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, + arith(vpc, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); break; case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_RET: break; case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); + arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); break; case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); break; case TGSI_OPCODE_XPD: tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, + arith(vpc, VEC, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -630,7 +630,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } - vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); return TRUE; } @@ -702,18 +702,18 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) tgsi_parse_free(&p); if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); assert(vpc->imm); } if (++high_temp) { - vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) vpc->r_temp[i] = temp(vpc); } if (++high_addr) { - vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); for (i = 0; i < high_addr; i++) vpc->r_address[i] = temp(vpc); } @@ -728,7 +728,7 @@ nv40_vertprog_translate(struct nvfx_context *nvfx, { struct tgsi_parse_context parse; struct nv40_vpc *vpc = NULL; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int i; vpc = CALLOC(1, sizeof(struct nv40_vpc)); @@ -785,24 +785,24 @@ nv40_vertprog_translate(struct nvfx_context *nvfx, } /* Write out HPOS if it was redirected to a temp earlier */ - if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { - struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, + if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { + struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, NV40_VP_INST_DEST_POS); - struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none); } /* Insert code to handle user clip planes */ for (i = 0; i < vp->ucp.nr; i++) { - struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, + struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, NV40_VP_INST_DEST_CLIP(i)); - struct nv40_sreg ceqn = constant(vpc, -1, + struct nvfx_sreg ceqn = constant(vpc, -1, nvfx->clip.ucp[i][0], nvfx->clip.ucp[i][1], nvfx->clip.ucp[i][2], nvfx->clip.ucp[i][3]); - struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; unsigned mask; switch (i) { @@ -814,10 +814,10 @@ nv40_vertprog_translate(struct nvfx_context *nvfx, goto out_err; } - arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); } - vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; vp->translated = TRUE; out_err: tgsi_parse_free(&parse); diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h new file mode 100644 index 00000000000..191131a40a1 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -0,0 +1,407 @@ +#ifndef __NVFX_SHADER_H__ +#define __NVFX_SHADER_H__ + +/* this will resolve to either the NV30 or the NV40 version + * depending on the current hardware */ +/* unusual, but very fast and compact method */ +#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c)))) + +#define NVFX_VP_INST_SLOT_VEC 0 +#define NVFX_VP_INST_SLOT_SCA 1 + +#define NVFX_VP_INST_COND_FL 0 /* guess */ +#define NVFX_VP_INST_COND_LT 1 +#define NVFX_VP_INST_COND_EQ 2 +#define NVFX_VP_INST_COND_LE 3 +#define NVFX_VP_INST_COND_GT 4 +#define NVFX_VP_INST_COND_NE 5 +#define NVFX_VP_INST_COND_GE 6 +#define NVFX_VP_INST_COND_TR 7 /* guess */ + +#define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +#define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +#define NVFX_VP_INST_IN_NORMAL 2 +#define NVFX_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +#define NVFX_VP_INST_IN_COL1 4 +#define NVFX_VP_INST_IN_FOGC 5 +#define NVFX_VP_INST_IN_TC0 8 +#define NVFX_VP_INST_IN_TC(n) (8+n) + +#define NVFX_VP_INST_SCA_OP_NOP 0x00 +#define NVFX_VP_INST_SCA_OP_MOV 0x01 +#define NVFX_VP_INST_SCA_OP_RCP 0x02 +#define NVFX_VP_INST_SCA_OP_RCC 0x03 +#define NVFX_VP_INST_SCA_OP_RSQ 0x04 +#define NVFX_VP_INST_SCA_OP_EXP 0x05 +#define NVFX_VP_INST_SCA_OP_LOG 0x06 +#define NVFX_VP_INST_SCA_OP_LIT 0x07 +#define NVFX_VP_INST_SCA_OP_BRA 0x09 +#define NVFX_VP_INST_SCA_OP_CAL 0x0B +#define NVFX_VP_INST_SCA_OP_RET 0x0C +#define NVFX_VP_INST_SCA_OP_LG2 0x0D +#define NVFX_VP_INST_SCA_OP_EX2 0x0E +#define NVFX_VP_INST_SCA_OP_SIN 0x0F +#define NVFX_VP_INST_SCA_OP_COS 0x10 + +#define NV40_VP_INST_SCA_OP_PUSHA 0x13 +#define NV40_VP_INST_SCA_OP_POPA 0x14 + +#define NVFX_VP_INST_VEC_OP_NOP 0x00 +#define NVFX_VP_INST_VEC_OP_MOV 0x01 +#define NVFX_VP_INST_VEC_OP_MUL 0x02 +#define NVFX_VP_INST_VEC_OP_ADD 0x03 +#define NVFX_VP_INST_VEC_OP_MAD 0x04 +#define NVFX_VP_INST_VEC_OP_DP3 0x05 +#define NVFX_VP_INST_VEC_OP_DPH 0x06 +#define NVFX_VP_INST_VEC_OP_DP4 0x07 +#define NVFX_VP_INST_VEC_OP_DST 0x08 +#define NVFX_VP_INST_VEC_OP_MIN 0x09 +#define NVFX_VP_INST_VEC_OP_MAX 0x0A +#define NVFX_VP_INST_VEC_OP_SLT 0x0B +#define NVFX_VP_INST_VEC_OP_SGE 0x0C +#define NVFX_VP_INST_VEC_OP_ARL 0x0D +#define NVFX_VP_INST_VEC_OP_FRC 0x0E +#define NVFX_VP_INST_VEC_OP_FLR 0x0F +#define NVFX_VP_INST_VEC_OP_SEQ 0x10 +#define NVFX_VP_INST_VEC_OP_SFL 0x11 +#define NVFX_VP_INST_VEC_OP_SGT 0x12 +#define NVFX_VP_INST_VEC_OP_SLE 0x13 +#define NVFX_VP_INST_VEC_OP_SNE 0x14 +#define NVFX_VP_INST_VEC_OP_STR 0x15 +#define NVFX_VP_INST_VEC_OP_SSG 0x16 +#define NVFX_VP_INST_VEC_OP_ARR 0x17 +#define NVFX_VP_INST_VEC_OP_ARA 0x18 + +#define NV40_VP_INST_VEC_OP_TXL 0x19 + +/* DWORD 3 */ +#define NVFX_VP_INST_LAST (1 << 0) + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * + * NV40 Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + */ + +//== Opcode / Destination selection == +#define NVFX_FP_OP_PROGRAM_END (1 << 0) +#define NVFX_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NVFX_FP_OP_OUT_REG_HALF (1 << 7) +#define NVFX_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NVFX_FP_OP_OUTMASK_SHIFT 9 +#define NVFX_FP_OP_OUTMASK_MASK (0xF << 9) +# define NVFX_FP_OP_OUT_X (1<<9) +# define NVFX_FP_OP_OUT_Y (1<<10) +# define NVFX_FP_OP_OUT_Z (1<<11) +# define NVFX_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NVFX_FP_OP_INPUT_SRC_SHIFT 13 +#define NVFX_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NVFX_FP_OP_INPUT_SRC_POSITION 0x0 +# define NVFX_FP_OP_INPUT_SRC_COL0 0x1 +# define NVFX_FP_OP_INPUT_SRC_COL1 0x2 +# define NVFX_FP_OP_INPUT_SRC_FOGC 0x3 +# define NVFX_FP_OP_INPUT_SRC_TC0 0x4 +# define NVFX_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NVFX_FP_OP_TEX_UNIT_SHIFT 17 +#define NVFX_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NVFX_FP_OP_PRECISION_SHIFT 22 +#define NVFX_FP_OP_PRECISION_MASK (3 << 22) +# define NVFX_FP_PRECISION_FP32 0 +# define NVFX_FP_PRECISION_FP16 1 +# define NVFX_FP_PRECISION_FX12 2 +#define NVFX_FP_OP_OPCODE_SHIFT 24 +#define NVFX_FP_OP_OPCODE_MASK (0x3F << 24) +/* NV30/NV40 fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_NOP 0x00 +#define NVFX_FP_OP_OPCODE_MOV 0x01 +#define NVFX_FP_OP_OPCODE_MUL 0x02 +#define NVFX_FP_OP_OPCODE_ADD 0x03 +#define NVFX_FP_OP_OPCODE_MAD 0x04 +#define NVFX_FP_OP_OPCODE_DP3 0x05 +#define NVFX_FP_OP_OPCODE_DP4 0x06 +#define NVFX_FP_OP_OPCODE_DST 0x07 +#define NVFX_FP_OP_OPCODE_MIN 0x08 +#define NVFX_FP_OP_OPCODE_MAX 0x09 +#define NVFX_FP_OP_OPCODE_SLT 0x0A +#define NVFX_FP_OP_OPCODE_SGE 0x0B +#define NVFX_FP_OP_OPCODE_SLE 0x0C +#define NVFX_FP_OP_OPCODE_SGT 0x0D +#define NVFX_FP_OP_OPCODE_SNE 0x0E +#define NVFX_FP_OP_OPCODE_SEQ 0x0F +#define NVFX_FP_OP_OPCODE_FRC 0x10 +#define NVFX_FP_OP_OPCODE_FLR 0x11 +#define NVFX_FP_OP_OPCODE_KIL 0x12 +#define NVFX_FP_OP_OPCODE_PK4B 0x13 +#define NVFX_FP_OP_OPCODE_UP4B 0x14 +#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_TEX 0x17 +#define NVFX_FP_OP_OPCODE_TXP 0x18 +#define NVFX_FP_OP_OPCODE_TXD 0x19 +#define NVFX_FP_OP_OPCODE_RCP 0x1A +#define NVFX_FP_OP_OPCODE_EX2 0x1C +#define NVFX_FP_OP_OPCODE_LG2 0x1D +#define NVFX_FP_OP_OPCODE_STR 0x20 +#define NVFX_FP_OP_OPCODE_SFL 0x21 +#define NVFX_FP_OP_OPCODE_COS 0x22 +#define NVFX_FP_OP_OPCODE_SIN 0x23 +#define NVFX_FP_OP_OPCODE_PK2H 0x24 +#define NVFX_FP_OP_OPCODE_UP2H 0x25 +#define NVFX_FP_OP_OPCODE_PK4UB 0x27 +#define NVFX_FP_OP_OPCODE_UP4UB 0x28 +#define NVFX_FP_OP_OPCODE_PK2US 0x29 +#define NVFX_FP_OP_OPCODE_UP2US 0x2A +#define NVFX_FP_OP_OPCODE_DP2A 0x2E +#define NVFX_FP_OP_OPCODE_TXB 0x31 +#define NVFX_FP_OP_OPCODE_DIV 0x3A + +/* NV30 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B +#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E +#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F +#define NVFX_FP_OP_OPCODE_POW_NV30 0x26 +#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36 + +/* NV40 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31 +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +#define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +#define NV40_FP_OP_BRA_OPCODE_IF 0x2 +#define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +#define NV40_FP_OP_BRA_OPCODE_REP 0x4 +#define NV40_FP_OP_BRA_OPCODE_RET 0x5 + +#define NVFX_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NVFX_FP_OP_OUT_ABS (1 << 29) +#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27 +#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NVFX_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NVFX_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NVFX_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NVFX_FP_OP_COND_SWZ_X_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NVFX_FP_OP_COND_SHIFT 18 +#define NVFX_FP_OP_COND_MASK (0x07 << 18) +# define NVFX_FP_OP_COND_FL 0 +# define NVFX_FP_OP_COND_LT 1 +# define NVFX_FP_OP_COND_EQ 2 +# define NVFX_FP_OP_COND_LE 3 +# define NVFX_FP_OP_COND_GT 4 +# define NVFX_FP_OP_COND_NE 5 +# define NVFX_FP_OP_COND_GE 6 +# define NVFX_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NVFX_FP_OP_DST_SCALE_SHIFT 28 +#define NVFX_FP_OP_DST_SCALE_MASK (3 << 28) +#define NVFX_FP_OP_DST_SCALE_1X 0 +#define NVFX_FP_OP_DST_SCALE_2X 1 +#define NVFX_FP_OP_DST_SCALE_4X 2 +#define NVFX_FP_OP_DST_SCALE_8X 3 +#define NVFX_FP_OP_DST_SCALE_INV_2X 5 +#define NVFX_FP_OP_DST_SCALE_INV_4X 6 +#define NVFX_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +/* high order bits of SRC2 */ +#define NVFX_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NVFX_FP_REG_TYPE_SHIFT 0 +#define NVFX_FP_REG_TYPE_MASK (3 << 0) +# define NVFX_FP_REG_TYPE_TEMP 0 +# define NVFX_FP_REG_TYPE_INPUT 1 +# define NVFX_FP_REG_TYPE_CONST 2 +#define NVFX_FP_REG_SRC_SHIFT 2 +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NVFX_FP_REG_SRC_HALF (1 << 8) +#define NVFX_FP_REG_SWZ_ALL_SHIFT 9 +#define NVFX_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NVFX_FP_REG_SWZ_X_SHIFT 9 +#define NVFX_FP_REG_SWZ_X_MASK (3 << 9) +#define NVFX_FP_REG_SWZ_Y_SHIFT 11 +#define NVFX_FP_REG_SWZ_Y_MASK (3 << 11) +#define NVFX_FP_REG_SWZ_Z_SHIFT 13 +#define NVFX_FP_REG_SWZ_Z_MASK (3 << 13) +#define NVFX_FP_REG_SWZ_W_SHIFT 15 +#define NVFX_FP_REG_SWZ_W_MASK (3 << 15) +# define NVFX_FP_SWIZZLE_X 0 +# define NVFX_FP_SWIZZLE_Y 1 +# define NVFX_FP_SWIZZLE_Z 2 +# define NVFX_FP_SWIZZLE_W 3 +#define NVFX_FP_REG_NEGATE (1 << 17) + +#ifndef NVFX_SHADER_NO_FUCKEDNESS +#define NVFXSR_NONE 0 +#define NVFXSR_OUTPUT 1 +#define NVFXSR_INPUT 2 +#define NVFXSR_TEMP 3 +#define NVFXSR_CONST 4 + +struct nvfx_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nvfx_sreg +nvfx_sr(int type, int index) +{ + struct nvfx_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nvfx_sreg +nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) +{ + struct nvfx_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nvfx_sreg +nvfx_sr_neg(struct nvfx_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_abs(struct nvfx_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_scale(struct nvfx_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} +#endif + +#endif -- cgit v1.2.3 From bcb37411fc9159a5c1af50b7defbf1f526b50793 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 10:55:41 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_fragprog.c The files are mostly the same except: 1. On NV40, some TGSI instructions are emulated with several hardware ones 2. Some instructions such as DDX/DDY, and STR were missing from nv30 3. NV40 has more sophisticated register management nv30 now supports all instructions and uses the nv40 register management. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.h | 5 - src/gallium/drivers/nv30/nv30_fragprog.c | 903 ------------------------- src/gallium/drivers/nv30/nv30_state.c | 2 +- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.h | 5 - src/gallium/drivers/nv40/nv40_fragprog.c | 984 --------------------------- src/gallium/drivers/nv40/nv40_state.c | 2 +- src/gallium/drivers/nvfx/Makefile | 3 +- src/gallium/drivers/nvfx/nvfx_context.h | 5 + src/gallium/drivers/nvfx/nvfx_fragprog.c | 1011 ++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- 12 files changed, 1021 insertions(+), 1903 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_fragprog.c delete mode 100644 src/gallium/drivers/nv40/nv40_fragprog.c create mode 100644 src/gallium/drivers/nvfx/nvfx_fragprog.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index b5728a34f83..196cc9a1ef7 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -6,7 +6,6 @@ LIBNAME = nv30 C_SOURCES = \ nv30_context.c \ nv30_draw.c \ - nv30_fragprog.c \ nv30_fragtex.c \ nv30_screen.c \ nv30_state.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index c203425cc91..7840318363a 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -12,15 +12,10 @@ extern struct draw_stage *nv30_draw_render_stage(struct nvfx_context *nvfx); extern void nv30_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); -/* nv30_fragprog.c */ -extern void nv30_fragprog_destroy(struct nvfx_context *, - struct nvfx_fragment_program *); - /* nv30_fragtex.c */ extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ -extern struct nvfx_state_entry nv30_state_fragprog; extern struct nvfx_state_entry nv30_state_vertprog; extern struct nvfx_state_entry nv30_state_fragtex; extern struct nvfx_state_entry nv30_state_vbo; diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c deleted file mode 100644 index 4ce16b8f0e3..00000000000 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ /dev/null @@ -1,903 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv30_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X -#define DEF_CTEST NVFX_FP_OP_COND_TR -#include "nvfx_shader.h" - -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) -#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) - -#define MAX_CONSTS 128 -#define MAX_IMM 32 -struct nv30_fpc { - struct nvfx_fragment_program *fp; - - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - int high_temp; - int temp_temp_count; - int num_regs; - - uint depth_id; - uint colour_id; - - unsigned inst_offset; - - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; - - struct nvfx_sreg imm[MAX_IMM]; - unsigned nr_imm; -}; - -static INLINE struct nvfx_sreg -temp(struct nv30_fpc *fpc) -{ - int idx; - - idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 1; - return nvfx_sr(NVFXSR_TEMP, idx); -} - -static INLINE struct nvfx_sreg -constant(struct nv30_fpc *fpc, int pipe, float vals[4]) -{ - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; - - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nvfx_sr(NVFXSR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv30_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - -static void -grow_insns(struct nv30_fpc *fpc, int size) -{ - struct nvfx_fragment_program *fp = fpc->fp; - - fp->insn_len += size; - fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); -} - -static void -emit_src(struct nv30_fpc *fpc, int pos, struct nvfx_sreg src) -{ - struct nvfx_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - uint32_t sr = 0; - - switch (src.type) { - case NVFXSR_INPUT: - sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); - break; - case NVFXSR_OUTPUT: - sr |= NVFX_FP_REG_SRC_HALF; - /* fall-through */ - case NVFXSR_TEMP: - sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); - sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); - break; - case NVFXSR_CONST: - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { - struct nvfx_fragment_program_data *fpd; - - fp->consts = realloc(fp->consts, ++fp->nr_consts * - sizeof(*fpd)); - fpd = &fp->consts[fp->nr_consts - 1]; - fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; - memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); - } - - sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); - break; - case NVFXSR_NONE: - sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NVFX_FP_REG_NEGATE; - - if (src.abs) - hw[1] |= (1 << (29 + pos)); - - sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); - - hw[pos + 1] |= sr; -} - -static void -emit_dst(struct nv30_fpc *fpc, struct nvfx_sreg dst) -{ - struct nvfx_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - - switch (dst.type) { - case NVFXSR_TEMP: - if (fpc->num_regs < (dst.index + 1)) - fpc->num_regs = dst.index + 1; - break; - case NVFXSR_OUTPUT: - if (dst.index == 1) { - fp->fp_control |= 0xe; - } else { - hw[0] |= NVFX_FP_OP_OUT_REG_HALF; - } - break; - case NVFXSR_NONE: - hw[0] |= (1 << 30); - break; - default: - assert(0); - } - - hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); -} - -static void -nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) -{ - struct nvfx_fragment_program *fp = fpc->fp; - uint32_t *hw; - - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - memset(hw, 0, sizeof(uint32_t) * 4); - - if (op == NVFX_FP_OP_OPCODE_KIL) - fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); - - if (sat) - hw[0] |= NVFX_FP_OP_OUT_SAT; - - if (dst.cc_update) - hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); -} - -static void -nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) -{ - struct nvfx_fragment_program *fp = fpc->fp; - - nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - - fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); -} - -static INLINE struct nvfx_sreg -tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) -{ - struct nvfx_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); - break; - case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->Register.Index, NULL); - break; - case TGSI_FILE_IMMEDIATE: - assert(fsrc->Register.Index < fpc->nr_imm); - src = fpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index + 1); - if (fpc->high_temp < src.index) - fpc->high_temp = src.index; - break; - /* This is clearly insane, but gallium hands us shaders like this. - * Luckily fragprog results are just temp regs.. - */ - case TGSI_FILE_OUTPUT: - if (fsrc->Register.Index == fpc->colour_id) - return nvfx_sr(NVFXSR_OUTPUT, 0); - else - return nvfx_sr(NVFXSR_OUTPUT, 1); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nvfx_sreg -tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - int idx; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - if (fdst->Register.Index == fpc->colour_id) - return nvfx_sr(NVFXSR_OUTPUT, 0); - else - return nvfx_sr(NVFXSR_OUTPUT, 1); - break; - case TGSI_FILE_TEMPORARY: - idx = fdst->Register.Index + 1; - if (fpc->high_temp < idx) - fpc->high_temp = idx; - return nvfx_sr(NVFXSR_TEMP, idx); - case TGSI_FILE_NULL: - return nvfx_sr(NVFXSR_NONE, 0); - default: - NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nvfx_sr(NVFXSR_NONE, 0); - } -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nvfx_sreg *src) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, - const struct tgsi_full_instruction *finst) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg src[3], dst, tmp; - int mask, sat, unit = 0; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - fpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(fpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - NOUVEAU_MSG("extra src attr %d\n", - fsrc->Register.Index); - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - case TGSI_FILE_SAMPLER: - unit = fsrc->Register.Index; - break; - case TGSI_FILE_OUTPUT: - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(fpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_CMP: - tmp = nvfx_sr(NVFXSR_NONE, 0); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NVFX_VP_INST_COND_GE; - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NVFX_VP_INST_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); - break; - case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); - break; - case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_KILP: - arith(fpc, 0, KIL, none, 0, none, none, none); - break; - case TGSI_OPCODE_KIL: - dst = nvfx_sr(NVFXSR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); - break; - case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); - break; -// case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LRP: - arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); - break; - case TGSI_OPCODE_RFL: - arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_RSQ: - arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); - break; - case TGSI_OPCODE_SCS: - /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) - { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - } - else - { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - } - break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); - break; - case TGSI_OPCODE_TEX: - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXP: - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NVFX_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NVFX_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NVFX_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NVFX_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->Range.First; - break; - case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->Range.First; - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_prepare(struct nv30_fpc *fpc) -{ - struct tgsi_parse_context p; - /*int high_temp = -1, i;*/ - - tgsi_parse_init(&p, fpc->fp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &p.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_OUTPUT: - if (!nv30_fragprog_parse_decl_output(fpc, fdec)) - goto out_err; - break; - /*case TGSI_FILE_TEMPORARY: - if (fdec->Range.Last > high_temp) { - high_temp = - fdec->Range.Last; - } - break;*/ - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm; - float vals[4]; - - imm = &p.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); - - vals[0] = imm->u[0].Float; - vals[1] = imm->u[1].Float; - vals[2] = imm->u[2].Float; - vals[3] = imm->u[3].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } - break; - default: - break; - } - } - tgsi_parse_free(&p); - - /*if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); - for (i = 0; i < high_temp; i++) - fpc->r_temp[i] = temp(fpc); - fpc->r_temps_discard = 0; - }*/ - - return TRUE; - -out_err: - /*if (fpc->r_temp) - FREE(fpc->r_temp);*/ - tgsi_parse_free(&p); - return FALSE; -} - -static void -nv30_fragprog_translate(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) -{ - struct tgsi_parse_context parse; - struct nv30_fpc *fpc = NULL; - - tgsi_dump(fp->pipe.tokens,0); - - fpc = CALLOC(1, sizeof(struct nv30_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; - - if (!nv30_fragprog_prepare(fpc)) { - FREE(fpc); - return; - } - - tgsi_parse_init(&parse, fp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - - finst = &parse.FullToken.FullInstruction; - if (!nv30_fragprog_parse_instruction(fpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - fp->fp_control |= (fpc->num_regs-1)/2; - - /* Terminate final instruction */ - fp->insn[fpc->inst_offset] |= 0x00000001; - - /* Append NOP + END instruction, may or may not be necessary. */ - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - fp->insn[fpc->inst_offset + 0] = 0x00000001; - fp->insn[fpc->inst_offset + 1] = 0x00000000; - fp->insn[fpc->inst_offset + 2] = 0x00000000; - fp->insn[fpc->inst_offset + 3] = 0x00000000; - - fp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - FREE(fpc); -} - -static void -nv30_fragprog_upload(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - const uint32_t le = 1; - uint32_t *map; - int i; - - map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - fflush(stdout); fflush(stderr); - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - fflush(stdout); fflush(stderr); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - pipe_buffer_unmap(pscreen, fp->buffer); -} - -static boolean -nv30_fragprog_validate(struct nvfx_context *nvfx) -{ - struct nvfx_fragment_program *fp = nvfx->fragprog; - struct pipe_buffer *constbuf = - nvfx->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nouveau_stateobj *so; - boolean new_consts = FALSE; - int i; - - if (fp->translated) - goto update_constants; - - /*nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG;*/ - nv30_fragprog_translate(nvfx, fp); - if (!fp->translated) { - /*nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG;*/ - return FALSE; - } - - fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv30_fragprog_upload(nvfx, fp); - - so = so_new(4, 4, 1); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); - so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1); - so_data (so, (1<<16)|0x4); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1); - so_data (so, fp->samplers); - so_ref(so, &fp->so); - so_ref(NULL, &so); - -update_constants: - if (fp->nr_consts) { - float *map; - - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nvfx_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; - - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - new_consts = TRUE; - } - pipe_buffer_unmap(pscreen, constbuf); - - if (new_consts) - nv30_fragprog_upload(nvfx, fp); - } - - if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { - so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_fragprog_destroy(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) -{ - if (fp->buffer) - pipe_buffer_reference(&fp->buffer, NULL); - - if (fp->so) - so_ref(NULL, &fp->so); - - if (fp->insn_len) - FREE(fp->insn); -} - -struct nvfx_state_entry nv30_state_fragprog = { - .validate = nv30_fragprog_validate, - .dirty = { - .pipe = NVFX_NEW_FRAGPROG, - .hw = NVFX_STATE_FRAGPROG - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 5263f894f2c..92a4089b316 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -567,7 +567,7 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_fragment_program *fp = hwcso; - nv30_fragprog_destroy(nvfx, fp); + nvfx_fragprog_destroy(nvfx, fp); FREE((void*)fp->pipe.tokens); FREE(fp); } diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index f78e81ac4b3..f3ff376dfde 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -6,7 +6,6 @@ LIBNAME = nv40 C_SOURCES = \ nv40_context.c \ nv40_draw.c \ - nv40_fragprog.c \ nv40_fragtex.c \ nv40_screen.c \ nv40_state.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 3840134ce69..4cd56af8f7b 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -16,15 +16,10 @@ extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, extern void nv40_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); -/* nv40_fragprog.c */ -extern void nv40_fragprog_destroy(struct nvfx_context *, - struct nvfx_fragment_program *); - /* nv40_fragtex.c */ extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ -extern struct nvfx_state_entry nv40_state_fragprog; extern struct nvfx_state_entry nv40_state_vertprog; extern struct nvfx_state_entry nv40_state_fragtex; extern struct nvfx_state_entry nv40_state_vbo; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c deleted file mode 100644 index e044f367a0b..00000000000 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ /dev/null @@ -1,984 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv40_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X -#define DEF_CTEST NVFX_FP_OP_COND_TR -#include "nvfx_shader.h" - -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) -#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) - -#define MAX_CONSTS 128 -#define MAX_IMM 32 -struct nv40_fpc { - struct nvfx_fragment_program *fp; - - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - unsigned r_temps; - unsigned r_temps_discard; - struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nvfx_sreg *r_temp; - - int num_regs; - - unsigned inst_offset; - unsigned have_const; - - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; - - struct nvfx_sreg imm[MAX_IMM]; - unsigned nr_imm; -}; - -static INLINE struct nvfx_sreg -temp(struct nv40_fpc *fpc) -{ - int idx = ffs(~fpc->r_temps) - 1; - - if (idx < 0) { - NOUVEAU_ERR("out of temps!!\n"); - assert(0); - return nvfx_sr(NVFXSR_TEMP, 0); - } - - fpc->r_temps |= (1 << idx); - fpc->r_temps_discard |= (1 << idx); - return nvfx_sr(NVFXSR_TEMP, idx); -} - -static INLINE void -release_temps(struct nv40_fpc *fpc) -{ - fpc->r_temps &= ~fpc->r_temps_discard; - fpc->r_temps_discard = 0; -} - -static INLINE struct nvfx_sreg -constant(struct nv40_fpc *fpc, int pipe, float vals[4]) -{ - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; - - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nvfx_sr(NVFXSR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv40_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - -static void -grow_insns(struct nv40_fpc *fpc, int size) -{ - struct nvfx_fragment_program *fp = fpc->fp; - - fp->insn_len += size; - fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); -} - -static void -emit_src(struct nv40_fpc *fpc, int pos, struct nvfx_sreg src) -{ - struct nvfx_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - uint32_t sr = 0; - - switch (src.type) { - case NVFXSR_INPUT: - sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); - break; - case NVFXSR_OUTPUT: - sr |= NVFX_FP_REG_SRC_HALF; - /* fall-through */ - case NVFXSR_TEMP: - sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); - sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); - break; - case NVFXSR_CONST: - if (!fpc->have_const) { - grow_insns(fpc, 4); - fpc->have_const = 1; - } - - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { - struct nvfx_fragment_program_data *fpd; - - fp->consts = realloc(fp->consts, ++fp->nr_consts * - sizeof(*fpd)); - fpd = &fp->consts[fp->nr_consts - 1]; - fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; - memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); - } - - sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); - break; - case NVFXSR_NONE: - sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NVFX_FP_REG_NEGATE; - - if (src.abs) - hw[1] |= (1 << (29 + pos)); - - sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); - - hw[pos + 1] |= sr; -} - -static void -emit_dst(struct nv40_fpc *fpc, struct nvfx_sreg dst) -{ - struct nvfx_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - - switch (dst.type) { - case NVFXSR_TEMP: - if (fpc->num_regs < (dst.index + 1)) - fpc->num_regs = dst.index + 1; - break; - case NVFXSR_OUTPUT: - if (dst.index == 1) { - fp->fp_control |= 0xe; - } else { - hw[0] |= NVFX_FP_OP_OUT_REG_HALF; - } - break; - case NVFXSR_NONE: - hw[0] |= (1 << 30); - break; - default: - assert(0); - } - - hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); -} - -static void -nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) -{ - struct nvfx_fragment_program *fp = fpc->fp; - uint32_t *hw; - - fpc->inst_offset = fp->insn_len; - fpc->have_const = 0; - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - memset(hw, 0, sizeof(uint32_t) * 4); - - if (op == NVFX_FP_OP_OPCODE_KIL) - fp->fp_control |= NV40TCL_FP_CONTROL_KIL; - hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); - - if (sat) - hw[0] |= NVFX_FP_OP_OUT_SAT; - - if (dst.cc_update) - hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); -} - -static void -nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) -{ - struct nvfx_fragment_program *fp = fpc->fp; - - nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - - fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); -} - -static INLINE struct nvfx_sreg -tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) -{ - struct nvfx_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); - break; - case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->Register.Index, NULL); - break; - case TGSI_FILE_IMMEDIATE: - assert(fsrc->Register.Index < fpc->nr_imm); - src = fpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = fpc->r_temp[fsrc->Register.Index]; - break; - /* NV40 fragprog result regs are just temps, so this is simple */ - case TGSI_FILE_OUTPUT: - src = fpc->r_result[fsrc->Register.Index]; - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nvfx_sreg -tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - return fpc->r_result[fdst->Register.Index]; - case TGSI_FILE_TEMPORARY: - return fpc->r_temp[fdst->Register.Index]; - case TGSI_FILE_NULL: - return nvfx_sr(NVFXSR_NONE, 0); - default: - NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nvfx_sr(NVFXSR_NONE, 0); - } -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nvfx_sreg *src) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, - const struct tgsi_full_instruction *finst) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg src[3], dst, tmp; - int mask, sat, unit; - int ai = -1, ci = -1, ii = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(fpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - if ((ci == -1 && ii == -1) || - ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_IMMEDIATE: - if ((ci == -1 && ii == -1) || - ii == fsrc->Register.Index) { - ii = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - case TGSI_FILE_SAMPLER: - unit = fsrc->Register.Index; - break; - case TGSI_FILE_OUTPUT: - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(fpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_CMP: - tmp = nvfx_sr(NVFXSR_NONE, 0); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NVFX_VP_INST_COND_GE; - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NVFX_VP_INST_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); - break; - case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DDX: - if (mask & (MASK_Z | MASK_W)) { - tmp = temp(fpc); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, - swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, - swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], - none, none); - arith(fpc, 0, MOV, dst, mask, tmp, none, none); - } else { - arith(fpc, sat, DDX, dst, mask, src[0], none, none); - } - break; - case TGSI_OPCODE_DDY: - if (mask & (MASK_Z | MASK_W)) { - tmp = temp(fpc); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, - swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, - swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], - none, none); - arith(fpc, 0, MOV, dst, mask, tmp, none, none); - } else { - arith(fpc, sat, DDY, dst, mask, src[0], none, none); - } - break; - case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); - break; - case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_KILP: - arith(fpc, 0, KIL, none, 0, none, none, none); - break; - case TGSI_OPCODE_KIL: - dst = nvfx_sr(NVFXSR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); - break; - case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); - break; -// case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LRP: - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); - break; - case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); - break; - case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); - break; - case TGSI_OPCODE_RFL: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); - break; - case TGSI_OPCODE_RSQ: - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); - break; - case TGSI_OPCODE_SCS: - /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) - { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - } - else - { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - } - break; - case TGSI_OPCODE_SEQ: - arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SFL: - arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_SLE: - arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SNE: - arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_STR: - arith(fpc, sat, STR, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); - break; - case TGSI_OPCODE_TEX: - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXP: - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - release_temps(fpc); - return TRUE; -} - -static boolean -nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NVFX_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NVFX_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NVFX_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NVFX_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - unsigned idx = fdec->Range.First; - unsigned hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = 1; - break; - case TGSI_SEMANTIC_COLOR: - switch (fdec->Semantic.Index) { - case 0: hw = 0; break; - case 1: hw = 2; break; - case 2: hw = 3; break; - case 3: hw = 4; break; - default: - NOUVEAU_ERR("bad rcol index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); - fpc->r_temps |= (1 << hw); - return TRUE; -} - -static boolean -nv40_fragprog_prepare(struct nv40_fpc *fpc) -{ - struct tgsi_parse_context p; - int high_temp = -1, i; - - tgsi_parse_init(&p, fpc->fp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &p.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_OUTPUT: - if (!nv40_fragprog_parse_decl_output(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_TEMPORARY: - if (fdec->Range.Last > high_temp) { - high_temp = - fdec->Range.Last; - } - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm; - float vals[4]; - - imm = &p.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); - - vals[0] = imm->u[0].Float; - vals[1] = imm->u[1].Float; - vals[2] = imm->u[2].Float; - vals[3] = imm->u[3].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); - for (i = 0; i < high_temp; i++) - fpc->r_temp[i] = temp(fpc); - fpc->r_temps_discard = 0; - } - - return TRUE; - -out_err: - if (fpc->r_temp) - FREE(fpc->r_temp); - tgsi_parse_free(&p); - return FALSE; -} - -static void -nv40_fragprog_translate(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) -{ - struct tgsi_parse_context parse; - struct nv40_fpc *fpc = NULL; - - fpc = CALLOC(1, sizeof(struct nv40_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->num_regs = 2; - - if (!nv40_fragprog_prepare(fpc)) { - FREE(fpc); - return; - } - - tgsi_parse_init(&parse, fp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - - finst = &parse.FullToken.FullInstruction; - if (!nv40_fragprog_parse_instruction(fpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; - - /* Terminate final instruction */ - fp->insn[fpc->inst_offset] |= 0x00000001; - - /* Append NOP + END instruction, may or may not be necessary. */ - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - fp->insn[fpc->inst_offset + 0] = 0x00000001; - fp->insn[fpc->inst_offset + 1] = 0x00000000; - fp->insn[fpc->inst_offset + 2] = 0x00000000; - fp->insn[fpc->inst_offset + 3] = 0x00000000; - - fp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - if (fpc->r_temp) - FREE(fpc->r_temp); - FREE(fpc); -} - -static void -nv40_fragprog_upload(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - const uint32_t le = 1; - uint32_t *map; - int i; - - map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - fflush(stdout); fflush(stderr); - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - fflush(stdout); fflush(stderr); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - pipe_buffer_unmap(pscreen, fp->buffer); -} - -static boolean -nv40_fragprog_validate(struct nvfx_context *nvfx) -{ - struct nvfx_fragment_program *fp = nvfx->fragprog; - struct pipe_buffer *constbuf = - nvfx->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nouveau_stateobj *so; - boolean new_consts = FALSE; - int i; - - if (fp->translated) - goto update_constants; - - nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG; - nv40_fragprog_translate(nvfx, fp); - if (!fp->translated) { - nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG; - return FALSE; - } - - fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv40_fragprog_upload(nvfx, fp); - - so = so_new(2, 2, 1); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); - so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - so_ref(so, &fp->so); - so_ref(NULL, &so); - -update_constants: - if (fp->nr_consts) { - float *map; - - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nvfx_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; - - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - new_consts = TRUE; - } - pipe_buffer_unmap(pscreen, constbuf); - - if (new_consts) - nv40_fragprog_upload(nvfx, fp); - } - - if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { - so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv40_fragprog_destroy(struct nvfx_context *nvfx, - struct nvfx_fragment_program *fp) -{ - if (fp->buffer) - pipe_buffer_reference(&fp->buffer, NULL); - - if (fp->so) - so_ref(NULL, &fp->so); - - if (fp->insn_len) - FREE(fp->insn); -} - -struct nvfx_state_entry nv40_state_fragprog = { - .validate = nv40_fragprog_validate, - .dirty = { - .pipe = NVFX_NEW_FRAGPROG, - .hw = NVFX_STATE_FRAGPROG - } -}; - diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index a205342ac5d..1f4b9777d48 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -577,7 +577,7 @@ nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_fragment_program *fp = hwcso; - nv40_fragprog_destroy(nvfx, fp); + nvfx_fragprog_destroy(nvfx, fp); FREE((void*)fp->pipe.tokens); FREE(fp); } diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 2f80681e5cf..f8c10a249b5 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -5,10 +5,11 @@ LIBNAME = nvfx C_SOURCES = \ nvfx_clear.c \ - nvfx_state_emit.c \ + nvfx_fragprog.c \ nvfx_miptree.c \ nvfx_query.c \ nvfx_state_blend.c \ + nvfx_state_emit.c \ nvfx_state_fb.c \ nvfx_state_rasterizer.c \ nvfx_state_scissor.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 28daa1d2e7a..0a7a0f12527 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -183,6 +183,7 @@ struct nvfx_state_entry { extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; +extern struct nvfx_state_entry nvfx_state_fragprog; extern struct nvfx_state_entry nvfx_state_framebuffer; extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; @@ -198,6 +199,10 @@ extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nvfx_fragprog.c */ +extern void nvfx_fragprog_destroy(struct nvfx_context *, + struct nvfx_fragment_program *); + /* nvfx_state_emit.c */ extern void nvfx_state_flush_notify(struct nouveau_channel *chan); extern boolean nvfx_state_validate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c new file mode 100644 index 00000000000..ecc193877b8 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -0,0 +1,1011 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nvfx_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X +#define DEF_CTEST NVFX_FP_OP_COND_TR +#include "nvfx_shader.h" + +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nvfx_fpc { + struct nvfx_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + unsigned r_temps; + unsigned r_temps_discard; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_temp; + + int num_regs; + + unsigned inst_offset; + unsigned have_const; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nvfx_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nvfx_sreg +temp(struct nvfx_fpc *fpc) +{ + int idx = ffs(~fpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nvfx_sr(NVFXSR_TEMP, 0); + } + + fpc->r_temps |= (1 << idx); + fpc->r_temps_discard |= (1 << idx); + return nvfx_sr(NVFXSR_TEMP, idx); +} + +static INLINE void +release_temps(struct nvfx_fpc *fpc) +{ + fpc->r_temps &= ~fpc->r_temps_discard; + fpc->r_temps_discard = 0; +} + +static INLINE struct nvfx_sreg +constant(struct nvfx_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nvfx_sr(NVFXSR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nvfx_fpc *fpc, int size) +{ + struct nvfx_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) +{ + struct nvfx_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); + break; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; + /* fall-through */ + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); + break; + case NVFXSR_CONST: + if (!fpc->have_const) { + grow_insns(fpc, 4); + fpc->have_const = 1; + } + + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nvfx_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); + break; + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NVFX_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst) +{ + struct nvfx_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NVFXSR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NVFXSR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; + } + break; + case NVFXSR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); +} + +static void +nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) +{ + struct nvfx_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + fpc->have_const = 0; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NVFX_FP_OP_OPCODE_KIL) + fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; + hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NVFX_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) +{ + struct nvfx_fragment_program *fp = fpc->fp; + + nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nvfx_sreg src; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + src = nvfx_sr(NVFXSR_INPUT, + fpc->attrib_map[fsrc->Register.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->Register.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = fpc->r_temp[fsrc->Register.Index]; + break; + /* NV40 fragprog result regs are just temps, so this is simple */ + case TGSI_FILE_OUTPUT: + src = fpc->r_result[fsrc->Register.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; + return src; +} + +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + switch (fdst->Register.File) { + case TGSI_FILE_OUTPUT: + return fpc->r_result[fdst->Register.Index]; + case TGSI_FILE_TEMPORARY: + return fpc->r_temp[fdst->Register.Index]; + case TGSI_FILE_NULL: + return nvfx_sr(NVFXSR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); + return nvfx_sr(NVFXSR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nvfx_sreg *src) +{ + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + mask |= (1 << c); + break; + default: + assert(0); + } + } + + if (mask == MASK_ALL) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + return FALSE; +} + +static boolean +nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; + int mask, sat, unit; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->Register.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = nvfx_sr(NVFXSR_NONE, 0); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NVFX_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + dst.cc_test = NVFX_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DDX: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDX, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DDY: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDY, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KILP: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KIL: + dst = nvfx_sr(NVFXSR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + if(!nvfx->is_nv4x) + arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); + else { + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + } + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + if(!nvfx->is_nv4x) + arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); + else { + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + if(!nvfx->is_nv4x) + arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); + else { + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + } + break; + case TGSI_OPCODE_RSQ: + if(!nvfx->is_nv4x) + arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + else { + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + } + break; + case TGSI_OPCODE_SCS: + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + } + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + } + break; + case TGSI_OPCODE_SEQ: + arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SLE: + arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SNE: + arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(fpc, sat, STR, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(fpc); + return TRUE; +} + +static boolean +nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = NVFX_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NVFX_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.Index <= 7) { + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. + Index); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->Range.First] = hw; + return TRUE; +} + +static boolean +nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->Range.First; + unsigned hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = 1; + break; + case TGSI_SEMANTIC_COLOR: + hw = ~0; + switch (fdec->Semantic.Index) { + case 0: hw = 0; break; + case 1: hw = 2; break; + case 2: hw = 3; break; + case 3: hw = 4; break; + } + if(hw > ((nvfx->is_nv4x) ? 4 : 2)) { + NOUVEAU_ERR("bad rcol index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); + fpc->r_temps |= (1 << hw); + return TRUE; +} + +static boolean +nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, i; + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_TEMPORARY: + if (fdec->Range.Last > high_temp) { + high_temp = + fdec->Range.Last; + } + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u[0].Float; + vals[1] = imm->u[1].Float; + vals[2] = imm->u[2].Float; + vals[3] = imm->u[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + } + + return TRUE; + +out_err: + if (fpc->r_temp) + FREE(fpc->r_temp); + tgsi_parse_free(&p); + return FALSE; +} + +static void +nvfx_fragprog_translate(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nvfx_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nvfx_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->num_regs = 2; + + if (!nvfx_fragprog_prepare(nvfx, fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + if(!nvfx->is_nv4x) + fp->fp_control |= (fpc->num_regs-1)/2; + else + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (fpc->r_temp) + FREE(fpc->r_temp); + FREE(fpc); +} + +static void +nvfx_fragprog_upload(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) +{ + struct pipe_screen *pscreen = nvfx->pipe.screen; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + pipe_buffer_unmap(pscreen, fp->buffer); +} + +static boolean +nvfx_fragprog_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + struct pipe_buffer *constbuf = + nvfx->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; + int i; + + if (fp->translated) + goto update_constants; + + nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG; + nvfx_fragprog_translate(nvfx, fp); + if (!fp->translated) { + nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG; + return FALSE; + } + + fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); + nvfx_fragprog_upload(nvfx, fp); + + so = so_new(4, 4, 1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); + so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, + NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + if(!nvfx->is_nv4x) { + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, (1<<16)|0x4); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1); + so_data (so, fp->samplers); + } + + so_ref(so, &fp->so); + so_ref(NULL, &so); + +update_constants: + if (fp->nr_consts) { + float *map; + + map = pipe_buffer_map(pscreen, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nvfx_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + pipe_buffer_unmap(pscreen, constbuf); + + if (new_consts) + nvfx_fragprog_upload(nvfx, fp); + } + + if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { + so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); + return TRUE; + } + + return FALSE; +} + +void +nvfx_fragprog_destroy(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) +{ + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + + if (fp->insn_len) + FREE(fp->insn); +} + +struct nvfx_state_entry nvfx_state_fragprog = { + .validate = nvfx_fragprog_validate, + .dirty = { + .pipe = NVFX_NEW_FRAGPROG, + .hw = NVFX_STATE_FRAGPROG + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index d3088e42112..d6fbc5d8dcb 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -9,7 +9,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_rasterizer, \ &nvfx_state_scissor, \ &nvfx_state_stipple, \ - &nvxx##_state_fragprog, \ + &nvfx_state_fragprog, \ &nvxx##_state_fragtex, \ &nvxx##_state_vertprog, \ &nvfx_state_blend, \ -- cgit v1.2.3 From f9eafeca297497a94c438ea28ed59f3a45ed2566 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 11:17:55 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_draw.c nv30_draw.c is a stub. This patch makes both nv30 and nv40 use the nv40 swtnl path. Note that this doesn't actually work on nv30 because the vertex program is encoded in the nv40-only layout. However, swtnl was unimplemented before on nv30, so this is not a regression. Furthermore, a patch to fix this is available near the end of the patchset. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_context.h | 3 - src/gallium/drivers/nv30/nv30_draw.c | 61 ----- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 8 - src/gallium/drivers/nv40/nv40_draw.c | 360 ---------------------------- src/gallium/drivers/nv40/nv40_vbo.c | 4 +- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 8 + src/gallium/drivers/nvfx/nvfx_draw.c | 363 +++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 6 +- 13 files changed, 381 insertions(+), 439 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_draw.c delete mode 100644 src/gallium/drivers/nv40/nv40_draw.c create mode 100644 src/gallium/drivers/nvfx/nvfx_draw.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 196cc9a1ef7..791b0040bb8 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -5,7 +5,6 @@ LIBNAME = nv30 C_SOURCES = \ nv30_context.c \ - nv30_draw.c \ nv30_fragtex.c \ nv30_screen.c \ nv30_state.c \ diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index f13458d50a3..671a1939e8d 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -84,7 +84,7 @@ nv30_create(struct pipe_screen *pscreen, void *priv) draw_wide_line_threshold(nvfx->draw, 9999999.0); draw_enable_line_stipple(nvfx->draw, FALSE); draw_enable_point_sprites(nvfx->draw, FALSE); - draw_set_rasterize_stage(nvfx->draw, nv30_draw_render_stage(nvfx)); + draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); return &nvfx->pipe; } diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 7840318363a..ebdd5455ca8 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -5,9 +5,6 @@ extern void nv30_init_state_functions(struct nvfx_context *nvfx); -/* nv30_draw.c */ -extern struct draw_stage *nv30_draw_render_stage(struct nvfx_context *nvfx); - /* nv30_vertprog.c */ extern void nv30_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c deleted file mode 100644 index 5d22e78abf7..00000000000 --- a/src/gallium/drivers/nv30/nv30_draw.c +++ /dev/null @@ -1,61 +0,0 @@ -#include "draw/draw_pipe.h" - -#include "nv30_context.h" - -struct nv30_draw_stage { - struct draw_stage draw; - struct nvfx_context *nvfx; -}; - -static void -nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_flush(struct draw_stage *draw, unsigned flags) -{ -} - -static void -nv30_draw_reset_stipple_counter(struct draw_stage *draw) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_destroy(struct draw_stage *draw) -{ - FREE(draw); -} - -struct draw_stage * -nv30_draw_render_stage(struct nvfx_context *nvfx) -{ - struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); - - nv30draw->nvfx = nvfx; - nv30draw->draw.draw = nvfx->draw; - nv30draw->draw.point = nv30_draw_point; - nv30draw->draw.line = nv30_draw_line; - nv30draw->draw.tri = nv30_draw_tri; - nv30draw->draw.flush = nv30_draw_flush; - nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; - nv30draw->draw.destroy = nv30_draw_destroy; - - return &nv30draw->draw; -} - diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index f3ff376dfde..7529e33a741 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -5,7 +5,6 @@ LIBNAME = nv40 C_SOURCES = \ nv40_context.c \ - nv40_draw.c \ nv40_fragtex.c \ nv40_screen.c \ nv40_state.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 441b038b052..eb831317720 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -84,7 +84,7 @@ nv40_create(struct pipe_screen *pscreen, void *priv) draw_wide_line_threshold(nvfx->draw, 9999999.0); draw_enable_line_stipple(nvfx->draw, FALSE); draw_enable_point_sprites(nvfx->draw, FALSE); - draw_set_rasterize_stage(nvfx->draw, nv40_draw_render_stage(nvfx)); + draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); return &nvfx->pipe; } diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 4cd56af8f7b..a8aba0c0cab 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -5,13 +5,6 @@ extern void nv40_init_state_functions(struct nvfx_context *nvfx); -/* nv40_draw.c */ -extern struct draw_stage *nv40_draw_render_stage(struct nvfx_context *nvfx); -extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, - unsigned ib_size, unsigned mode, - unsigned start, unsigned count); - /* nv40_vertprog.c */ extern void nv40_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); @@ -23,7 +16,6 @@ extern void nv40_fragtex_bind(struct nvfx_context *); extern struct nvfx_state_entry nv40_state_vertprog; extern struct nvfx_state_entry nv40_state_fragtex; extern struct nvfx_state_entry nv40_state_vbo; -extern struct nvfx_state_entry nv40_state_vtxfmt; /* nv40_vbo.c */ extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c deleted file mode 100644 index 4ed87779fd6..00000000000 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ /dev/null @@ -1,360 +0,0 @@ -#include "pipe/p_shader_tokens.h" -#include "util/u_inlines.h" - -#include "util/u_pack_color.h" - -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pipe.h" - -#include "nv40_context.h" -#define NVFX_SHADER_NO_FUCKEDNESS -#include "nv40_shader.h" - -/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very - * often at all. Uses "quadro style" vertex submission + a fixed vertex - * layout to avoid the need to generate a vertex program or vtxfmt. - */ - -struct nv40_render_stage { - struct draw_stage stage; - struct nvfx_context *nvfx; - unsigned prim; -}; - -static INLINE struct nv40_render_stage * -nv40_render_stage(struct draw_stage *stage) -{ - return (struct nv40_render_stage *)stage; -} - -static INLINE void -nv40_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; - - for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { - unsigned idx = nvfx->swtnl.draw[i]; - unsigned hw = nvfx->swtnl.hw[i]; - - switch (nvfx->swtnl.emit[i]) { - case EMIT_OMIT: - break; - case EMIT_1F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (chan, fui(v->data[idx][0])); - break; - case EMIT_2F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - break; - case EMIT_3F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - break; - case EMIT_4F: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - OUT_RING (chan, fui(v->data[idx][3])); - break; - case EMIT_4UB: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][3]))); - break; - default: - assert(0); - break; - } - } -} - -static INLINE void -nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, - unsigned mode, unsigned count) -{ - struct nv40_render_stage *rs = nv40_render_stage(stage); - struct nvfx_context *nvfx = rs->nvfx; - - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; - - /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - NOUVEAU_ERR("AIII, missed flush\n"); - assert(0); - } - FIRE_RING(chan); - nvfx_state_emit(nvfx); - } - - /* Switch primitive modes if necessary */ - if (rs->prim != mode) { - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, mode); - rs->prim = mode; - } - - /* Emit vertex data */ - for (i = 0; i < count; i++) - nv40_render_vertex(nvfx, prim->v[i]); - - /* If it's likely we'll need to empty the push buffer soon, finish - * off the primitive now. - */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; - } -} - -static void -nv40_render_point(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1); -} - -static void -nv40_render_line(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2); -} - -static void -nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); -} - -static void -nv40_render_flush(struct draw_stage *draw, unsigned flags) -{ - struct nv40_render_stage *rs = nv40_render_stage(draw); - struct nvfx_context *nvfx = rs->nvfx; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; - } -} - -static void -nv40_render_reset_stipple_counter(struct draw_stage *draw) -{ -} - -static void -nv40_render_destroy(struct draw_stage *draw) -{ - FREE(draw); -} - -static INLINE void -emit_mov(struct nvfx_vertex_program *vp, - unsigned dst, unsigned src, unsigned vor, unsigned mask) -{ - struct nvfx_vertex_program_exec *inst; - - vp->insns = realloc(vp->insns, - sizeof(struct nvfx_vertex_program_exec) * - ++vp->nr_insns); - inst = &vp->insns[vp->nr_insns - 1]; - - inst->data[0] = 0x401f9c6c; - inst->data[1] = 0x0040000d | (src << 8); - inst->data[2] = 0x8106c083; - inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); - inst->const_index = -1; - inst->has_branch_offset = FALSE; - - vp->ir |= (1 << src); - if (vor != ~0) - vp->or |= (1 << vor); -} - -static struct nvfx_vertex_program * -create_drawvp(struct nvfx_context *nvfx) -{ - struct nvfx_vertex_program *vp = CALLOC_STRUCT(nvfx_vertex_program); - unsigned i; - - emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); - for (i = 0; i < 8; i++) - emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); - - vp->insns[vp->nr_insns - 1].data[3] |= 1; - vp->translated = TRUE; - return vp; -} - -struct draw_stage * -nv40_draw_render_stage(struct nvfx_context *nvfx) -{ - struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); - - if (!nvfx->swtnl.vertprog) - nvfx->swtnl.vertprog = create_drawvp(nvfx); - - render->nvfx = nvfx; - render->stage.draw = nvfx->draw; - render->stage.point = nv40_render_point; - render->stage.line = nv40_render_line; - render->stage.tri = nv40_render_tri; - render->stage.flush = nv40_render_flush; - render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; - render->stage.destroy = nv40_render_destroy; - - return &render->stage; -} - -void -nv40_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, unsigned idxbuf_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - unsigned i; - void *map; - - if (!nvfx_state_validate_swtnl(nvfx)) - return; - nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); - nvfx_state_emit(nvfx); - - for (i = 0; i < nvfx->vtxbuf_nr; i++) { - map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(nvfx->draw, i, map); - } - - if (idxbuf) { - map = pipe_buffer_map(pscreen, idxbuf, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map); - } else { - draw_set_mapped_element_buffer(nvfx->draw, 0, NULL); - } - - if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { - const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; - - map = pipe_buffer_map(pscreen, - nvfx->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, - map, nr); - } - - draw_arrays(nvfx->draw, mode, start, count); - - for (i = 0; i < nvfx->vtxbuf_nr; i++) - pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer); - - if (idxbuf) - pipe_buffer_unmap(pscreen, idxbuf); - - if (nvfx->constbuf[PIPE_SHADER_VERTEX]) - pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]); - - draw_flush(nvfx->draw); - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, - unsigned semantic, unsigned index) -{ - unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); - unsigned a = nvfx->swtnl.nr_attribs++; - - nvfx->swtnl.hw[a] = hw; - nvfx->swtnl.emit[a] = emit; - nvfx->swtnl.draw[a] = draw_out; -} - -static boolean -nv40_state_vtxfmt_validate(struct nvfx_context *nvfx) -{ - struct nvfx_fragment_program *fp = nvfx->fragprog; - unsigned colour = 0, texcoords = 0, fog = 0, i; - - /* Determine needed fragprog inputs */ - for (i = 0; i < fp->info.num_inputs; i++) { - switch (fp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - colour |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_GENERIC: - texcoords |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_FOG: - fog = 1; - break; - default: - assert(0); - } - } - - nvfx->swtnl.nr_attribs = 0; - - /* Map draw vtxprog output to hw attribute IDs */ - for (i = 0; i < 2; i++) { - if (!(colour & (1 << i))) - continue; - emit_attrib(nvfx, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); - } - - for (i = 0; i < 8; i++) { - if (!(texcoords & (1 << i))) - continue; - emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); - } - - if (fog) { - emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); - } - - emit_attrib(nvfx, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); - - return FALSE; -} - -struct nvfx_state_entry nv40_state_vtxfmt = { - .validate = nv40_state_vtxfmt_validate, - .dirty = { - .pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG, - .hw = 0 - } -}; - diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 456b508d438..196a12b1f6b 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -177,7 +177,7 @@ nv40_draw_arrays(struct pipe_context *pipe, nv40_vbo_set_idxbuf(nvfx, NULL, 0); if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { - nv40_draw_elements_swtnl(pipe, NULL, 0, + nvfx_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; } @@ -467,7 +467,7 @@ nv40_draw_elements(struct pipe_context *pipe, idxbuf = nv40_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { - nv40_draw_elements_swtnl(pipe, NULL, 0, + nvfx_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; } diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index f8c10a249b5..a23d024f118 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -5,6 +5,7 @@ LIBNAME = nvfx C_SOURCES = \ nvfx_clear.c \ + nvfx_draw.c \ nvfx_fragprog.c \ nvfx_miptree.c \ nvfx_query.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 0a7a0f12527..75008f8dddc 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -190,6 +190,7 @@ extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; extern struct nvfx_state_entry nvfx_state_viewport; +extern struct nvfx_state_entry nvfx_state_vtxfmt; extern struct nvfx_state_entry nvfx_state_zsa; extern void nvfx_init_query_functions(struct nvfx_context *nvfx); @@ -199,6 +200,13 @@ extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); +/* nvfx_draw.c */ +extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); +extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + /* nvfx_fragprog.c */ extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c new file mode 100644 index 00000000000..8700e143297 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -0,0 +1,363 @@ +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nvfx_context.h" +#define NVFX_SHADER_NO_FUCKEDNESS +#include "nv30/nv30_shader.h" +#include "nv40/nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nvfx_render_stage { + struct draw_stage stage; + struct nvfx_context *nvfx; + unsigned prim; +}; + +static INLINE struct nvfx_render_stage * +nvfx_render_stage(struct draw_stage *stage) +{ + return (struct nvfx_render_stage *)stage; +} + +static INLINE void +nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { + unsigned idx = nvfx->swtnl.draw[i]; + unsigned hw = nvfx->swtnl.hw[i]; + + switch (nvfx->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(chan); + nvfx_state_emit(nvfx); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nvfx_render_vertex(nvfx, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1); +} + +static void +nvfx_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2); +} + +static void +nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); +} + +static void +nvfx_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(draw); + struct nvfx_context *nvfx = rs->nvfx; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nvfx_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static INLINE void +emit_mov(struct nvfx_vertex_program *vp, + unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ + struct nvfx_vertex_program_exec *inst; + + vp->insns = realloc(vp->insns, + sizeof(struct nvfx_vertex_program_exec) * + ++vp->nr_insns); + inst = &vp->insns[vp->nr_insns - 1]; + + inst->data[0] = 0x401f9c6c; + inst->data[1] = 0x0040000d | (src << 8); + inst->data[2] = 0x8106c083; + inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); + inst->const_index = -1; + inst->has_branch_offset = FALSE; + + vp->ir |= (1 << src); + if (vor != ~0) + vp->or |= (1 << vor); +} + +static struct nvfx_vertex_program * +create_drawvp(struct nvfx_context *nvfx) +{ + struct nvfx_vertex_program *vp = CALLOC_STRUCT(nvfx_vertex_program); + unsigned i; + + // nv30 support comes in a later patch + assert(nvfx->is_nv4x); + + emit_mov(vp, NVFX_VP(INST_DEST_POS), 0, ~0, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_COL0), 3, 0, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_COL1), 4, 1, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_BFC0), 3, 2, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_BFC1), 4, 3, 0xf); + emit_mov(vp, NVFX_VP(INST_DEST_FOGC), 5, 4, 0x8); + for (i = 0; i < 8; i++) + emit_mov(vp, NVFX_VP(INST_DEST_TC(i)), 8 + i, 14 + i, 0xf); + + vp->insns[vp->nr_insns - 1].data[3] |= 1; + vp->translated = TRUE; + return vp; +} + +struct draw_stage * +nvfx_draw_render_stage(struct nvfx_context *nvfx) +{ + struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); + + if (!nvfx->swtnl.vertprog) + nvfx->swtnl.vertprog = create_drawvp(nvfx); + + render->nvfx = nvfx; + render->stage.draw = nvfx->draw; + render->stage.point = nvfx_render_point; + render->stage.line = nvfx_render_line; + render->stage.tri = nvfx_render_tri; + render->stage.flush = nvfx_render_flush; + render->stage.reset_stipple_counter = nvfx_render_reset_stipple_counter; + render->stage.destroy = nvfx_render_destroy; + + return &render->stage; +} + +void +nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct pipe_screen *pscreen = pipe->screen; + unsigned i; + void *map; + + if (!nvfx_state_validate_swtnl(nvfx)) + return; + nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); + nvfx_state_emit(nvfx); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) { + map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nvfx->draw, i, map); + } + + if (idxbuf) { + map = pipe_buffer_map(pscreen, idxbuf, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nvfx->draw, 0, NULL); + } + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = pipe_buffer_map(pscreen, + nvfx->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, + map, nr); + } + + draw_arrays(nvfx->draw, mode, start, count); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) + pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer); + + if (idxbuf) + pipe_buffer_unmap(pscreen, idxbuf); + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) + pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nvfx->draw); + pipe->flush(pipe, 0, NULL); +} + +static INLINE void +emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); + unsigned a = nvfx->swtnl.nr_attribs++; + + nvfx->swtnl.hw[a] = hw; + nvfx->swtnl.emit[a] = emit; + nvfx->swtnl.draw[a] = draw_out; +} + +static boolean +nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nvfx->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nvfx, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nvfx, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nvfx_state_entry nvfx_state_vtxfmt = { + .validate = nvfx_state_vtxfmt_validate, + .dirty = { + .pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index d6fbc5d8dcb..9961dacce4c 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -3,6 +3,10 @@ #include "nvfx_state.h" #include "draw/draw_context.h" +/* temporary, will be removed in next patch */ +#define nv30_state_vtxfmt nvfx_state_vtxfmt +#define nv40_state_vtxfmt nvfx_state_vtxfmt + #define RENDER_STATES(name, nvxx, vbo) \ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_framebuffer, \ @@ -22,7 +26,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ } RENDER_STATES(nv30, nv30, vbo); -RENDER_STATES(nv30_swtnl, nv30, vbo); /* TODO: replace with vtxfmt once draw is unified */ +RENDER_STATES(nv30_swtnl, nv30, vtxfmt); RENDER_STATES(nv40, nv40, vbo); RENDER_STATES(nv40_swtnl, nv40, vtxfmt); -- cgit v1.2.3 From 6e1d0fc5eba8cda4f4c8f3188f53fec6d2d2e9c3 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 11:12:08 +0100 Subject: nv30, nv40: unify nv[34]0_vbo.c The files are identical, except for swtnl support which is commented out on nv30 and restart being initialized on nv30 to avoid a compiler warning. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.c | 4 +- src/gallium/drivers/nv30/nv30_context.h | 10 - src/gallium/drivers/nv30/nv30_vbo.c | 562 ---------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.c | 4 +- src/gallium/drivers/nv40/nv40_context.h | 10 - src/gallium/drivers/nv40/nv40_vbo.c | 565 ----------------------------- src/gallium/drivers/nvfx/Makefile | 3 +- src/gallium/drivers/nvfx/nvfx_context.h | 10 + src/gallium/drivers/nvfx/nvfx_state_emit.c | 6 +- src/gallium/drivers/nvfx/nvfx_vbo.c | 563 ++++++++++++++++++++++++++++ 12 files changed, 580 insertions(+), 1159 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_vbo.c delete mode 100644 src/gallium/drivers/nv40/nv40_vbo.c create mode 100644 src/gallium/drivers/nvfx/nvfx_vbo.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 791b0040bb8..6ec93ee3465 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -8,7 +8,6 @@ C_SOURCES = \ nv30_fragtex.c \ nv30_screen.c \ nv30_state.c \ - nv30_vbo.c \ nv30_vertprog.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 671a1939e8d..730f4588785 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -60,8 +60,8 @@ nv30_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.screen = pscreen; nvfx->pipe.priv = priv; nvfx->pipe.destroy = nv30_destroy; - nvfx->pipe.draw_arrays = nv30_draw_arrays; - nvfx->pipe.draw_elements = nv30_draw_elements; + nvfx->pipe.draw_arrays = nvfx_draw_arrays; + nvfx->pipe.draw_elements = nvfx_draw_elements; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nv30_flush; diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index ebdd5455ca8..de879d504fc 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -15,16 +15,6 @@ extern void nv30_fragtex_bind(struct nvfx_context *); /* nv30_state.c and friends */ extern struct nvfx_state_entry nv30_state_vertprog; extern struct nvfx_state_entry nv30_state_fragtex; -extern struct nvfx_state_entry nv30_state_vbo; - -/* nv30_vbo.c */ -extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv30_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); /* nvfx_context.c */ struct pipe_context * diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c deleted file mode 100644 index 2b4401e5330..00000000000 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ /dev/null @@ -1,562 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" -#include "util/u_format.h" - -#include "nv30_context.h" -#include "nvfx_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" - -#define FORCE_SWTNL 0 - -static INLINE int -nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) -{ - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV34TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - return 0; -} - -static boolean -nv30_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, - unsigned ib_size) -{ - struct pipe_screen *pscreen = &nvfx->screen->base.base; - unsigned type; - - if (!ib) { - nvfx->idxbuf = NULL; - nvfx->idxbuf_format = 0xdeadbeef; - return FALSE; - } - - if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) - return FALSE; - - switch (ib_size) { - case 2: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } - - if (ib != nvfx->idxbuf || - type != nvfx->idxbuf_format) { - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf = ib; - nvfx->idxbuf_format = type; - } - - return TRUE; -} - -static boolean -nv30_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - unsigned type, ncomp; - void *map; - - if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) - return FALSE; - - map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - map += vb->buffer_offset + ve->src_offset; - - switch (type) { - case NV34TCL_VTXFMT_TYPE_FLOAT: - { - float *v = map; - - switch (ncomp) { - case 4: - so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - so_data (so, fui(v[3])); - break; - case 3: - so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - break; - case 2: - so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - break; - case 1: - so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1); - so_data (so, fui(v[0])); - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - } - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - - pipe_buffer_unmap(pscreen, vb->buffer); - return TRUE; -} - -void -nv30_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned restart = 0; - - nv30_vbo_set_idxbuf(nvfx, NULL, 0); - if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { - /*return nv30_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count);*/ - return; - } - - while (count) { - unsigned vc, nr; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } - - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -nv30_draw_elements_u08(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart = 0; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv30_draw_elements_u16(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart = 0; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv30_draw_elements_u32(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart = 0; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - while (vc) { - push = MIN2(vc, 2047); - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (chan, elts, push); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static void -nv30_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - void *map; - - map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } - - switch (ib_size) { - case 1: - nv30_draw_elements_u08(nvfx, map, mode, start, count); - break; - case 2: - nv30_draw_elements_u16(nvfx, map, mode, start, count); - break; - case 4: - nv30_draw_elements_u32(nvfx, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - pipe_buffer_unmap(pscreen, ib); -} - -static void -nv30_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned restart = 0; - - while (count) { - unsigned nr, vc; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } -} - -void -nv30_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - boolean idxbuf; - - idxbuf = nv30_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { - /*return nv30_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count);*/ - return; - } - - if (idxbuf) { - nv30_draw_elements_vbo(pipe, mode, start, count); - } else { - nv30_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); -} - -static boolean -nv30_vbo_validate(struct nvfx_context *nvfx) -{ - struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct pipe_buffer *ib = nvfx->idxbuf; - unsigned ib_format = nvfx->idxbuf_format; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - int hw; - - vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements); - vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements); - - for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nvfx->vtxelt->pipe[hw]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->stride) { - if (!sattr) - sattr = so_new(16, 16 * 4, 0); - - if (nv30_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); - continue; - } - } - - if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - /*nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS;*/ - so_ref(NULL, &vtxbuf); - so_ref(NULL, &vtxfmt); - return FALSE; - } - - so_reloc(vtxbuf, nouveau_bo(vb->buffer), vb->buffer_offset + - ve->src_offset, vb_flags | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); - } - - if (ib) { - struct nouveau_bo *bo = nouveau_bo(ib); - - so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); - } - - so_method(vtxbuf, eng3d, 0x1710, 1); - so_data (vtxbuf, 0); - - so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]); - so_ref(NULL, &vtxbuf); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF); - so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]); - so_ref(NULL, &vtxfmt); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT); - so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]); - so_ref(NULL, &sattr); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR); - return FALSE; -} - -struct nvfx_state_entry nv30_state_vbo = { - .validate = nv30_vbo_validate, - .dirty = { - .pipe = NVFX_NEW_ARRAYS, - .hw = 0, - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 7529e33a741..69085069060 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -8,7 +8,6 @@ C_SOURCES = \ nv40_fragtex.c \ nv40_screen.c \ nv40_state.c \ - nv40_vbo.c \ nv40_vertprog.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index eb831317720..85c1355b4fa 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -60,8 +60,8 @@ nv40_create(struct pipe_screen *pscreen, void *priv) nvfx->pipe.priv = priv; nvfx->pipe.screen = pscreen; nvfx->pipe.destroy = nv40_destroy; - nvfx->pipe.draw_arrays = nv40_draw_arrays; - nvfx->pipe.draw_elements = nv40_draw_elements; + nvfx->pipe.draw_arrays = nvfx_draw_arrays; + nvfx->pipe.draw_elements = nvfx_draw_elements; nvfx->pipe.clear = nvfx_clear; nvfx->pipe.flush = nv40_flush; diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index a8aba0c0cab..cebf21d00c0 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -15,16 +15,6 @@ extern void nv40_fragtex_bind(struct nvfx_context *); /* nv40_state.c and friends */ extern struct nvfx_state_entry nv40_state_vertprog; extern struct nvfx_state_entry nv40_state_fragtex; -extern struct nvfx_state_entry nv40_state_vbo; - -/* nv40_vbo.c */ -extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); /* nvfx_context.c */ struct pipe_context * diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c deleted file mode 100644 index 196a12b1f6b..00000000000 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ /dev/null @@ -1,565 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" -#include "util/u_format.h" - -#include "nv40_context.h" -#include "nvfx_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" - -#define FORCE_SWTNL 0 - -static INLINE int -nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) -{ - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV34TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - return 0; -} - -static boolean -nv40_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, - unsigned ib_size) -{ - struct pipe_screen *pscreen = &nvfx->screen->base.base; - unsigned type; - - if (!ib) { - nvfx->idxbuf = NULL; - nvfx->idxbuf_format = 0xdeadbeef; - return FALSE; - } - - if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) - return FALSE; - - switch (ib_size) { - case 2: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } - - if (ib != nvfx->idxbuf || - type != nvfx->idxbuf_format) { - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->idxbuf = ib; - nvfx->idxbuf_format = type; - } - - return TRUE; -} - -static boolean -nv40_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - unsigned type, ncomp; - void *map; - - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) - return FALSE; - - map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - map += vb->buffer_offset + ve->src_offset; - - switch (type) { - case NV34TCL_VTXFMT_TYPE_FLOAT: - { - float *v = map; - - switch (ncomp) { - case 4: - so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - so_data (so, fui(v[3])); - break; - case 3: - so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - break; - case 2: - so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - break; - case 1: - so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1); - so_data (so, fui(v[0])); - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - } - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - - pipe_buffer_unmap(pscreen, vb->buffer); - - return TRUE; -} - -void -nv40_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned restart; - - nv40_vbo_set_idxbuf(nvfx, NULL, 0); - if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); - return; - } - - while (count) { - unsigned vc, nr; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } - - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -nv40_draw_elements_u08(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv40_draw_elements_u16(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv40_draw_elements_u32(struct nvfx_context *nvfx, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - while (vc) { - push = MIN2(vc, 2047); - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push); - OUT_RINGp (chan, elts, push); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static void -nv40_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - void *map; - - map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } - - switch (ib_size) { - case 1: - nv40_draw_elements_u08(nvfx, map, mode, start, count); - break; - case 2: - nv40_draw_elements_u16(nvfx, map, mode, start, count); - break; - case 4: - nv40_draw_elements_u32(nvfx, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - pipe_buffer_unmap(pscreen, ib); -} - -static void -nv40_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned restart; - - while (count) { - unsigned nr, vc; - - nvfx_state_emit(nvfx); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } -} - -void -nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - boolean idxbuf; - - idxbuf = nv40_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); - return; - } - - if (idxbuf) { - nv40_draw_elements_vbo(pipe, mode, start, count); - } else { - nv40_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); -} - -static boolean -nv40_vbo_validate(struct nvfx_context *nvfx) -{ - struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct pipe_buffer *ib = nvfx->idxbuf; - unsigned ib_format = nvfx->idxbuf_format; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - int hw; - - vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements); - vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements); - - for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nvfx->vtxelt->pipe[hw]; - vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->stride) { - if (!sattr) - sattr = so_new(16, 16 * 4, 0); - - if (nv40_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); - continue; - } - } - - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; - so_ref(NULL, &vtxbuf); - so_ref(NULL, &vtxfmt); - return FALSE; - } - - so_reloc(vtxbuf, nouveau_bo(vb->buffer), - vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); - } - - if (ib) { - struct nouveau_bo *bo = nouveau_bo(ib); - - so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV34TCL_IDXBUF_FORMAT_DMA1); - } - - so_method(vtxbuf, eng3d, 0x1710, 1); - so_data (vtxbuf, 0); - - so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]); - so_ref(NULL, &vtxbuf); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF); - so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]); - so_ref(NULL, &vtxfmt); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT); - so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]); - so_ref(NULL, &sattr); - nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR); - return FALSE; -} - -struct nvfx_state_entry nv40_state_vbo = { - .validate = nv40_vbo_validate, - .dirty = { - .pipe = NVFX_NEW_ARRAYS, - .hw = 0, - } -}; - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index a23d024f118..d1304ccea0d 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -18,6 +18,7 @@ C_SOURCES = \ nvfx_state_viewport.c \ nvfx_state_zsa.c \ nvfx_surface.c \ - nvfx_transfer.c + nvfx_transfer.c \ + nvfx_vbo.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 75008f8dddc..8f121fe3af2 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -189,6 +189,7 @@ extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; +extern struct nvfx_state_entry nvfx_state_vbo; extern struct nvfx_state_entry nvfx_state_viewport; extern struct nvfx_state_entry nvfx_state_vtxfmt; extern struct nvfx_state_entry nvfx_state_zsa; @@ -220,4 +221,13 @@ extern void nvfx_state_emit(struct nvfx_context *nvfx); /* nvfx_transfer.c */ extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); +/* nvfx_vbo.c */ +extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern void nvfx_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 9961dacce4c..88b86f9fb50 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -3,10 +3,6 @@ #include "nvfx_state.h" #include "draw/draw_context.h" -/* temporary, will be removed in next patch */ -#define nv30_state_vtxfmt nvfx_state_vtxfmt -#define nv40_state_vtxfmt nvfx_state_vtxfmt - #define RENDER_STATES(name, nvxx, vbo) \ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_framebuffer, \ @@ -21,7 +17,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_zsa, \ &nvfx_state_sr, \ &nvfx_state_viewport, \ - &nvxx##_state_##vbo, \ + &nvfx_state_##vbo, \ NULL \ } diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c new file mode 100644 index 00000000000..9d501ef1da9 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -0,0 +1,563 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV34TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, + unsigned ib_size) +{ + struct pipe_screen *pscreen = &nvfx->screen->base.base; + unsigned type; + + if (!ib) { + nvfx->idxbuf = NULL; + nvfx->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nvfx->idxbuf || + type != nvfx->idxbuf_format) { + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->idxbuf = ib; + nvfx->idxbuf_format = type; + } + + return TRUE; +} + +static boolean +nvfx_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + unsigned type, ncomp; + void *map; + + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; + + map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV34TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + switch (ncomp) { + case 4: + so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); + break; + case 3: + so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + break; + case 2: + so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + break; + case 1: + so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); + break; + default: + pipe_buffer_unmap(pscreen, vb->buffer); + return FALSE; + } + } + break; + default: + pipe_buffer_unmap(pscreen, vb->buffer); + return FALSE; + } + + pipe_buffer_unmap(pscreen, vb->buffer); + return TRUE; +} + +void +nvfx_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned restart = 0; + + nvfx_vbo_set_idxbuf(nvfx, NULL, 0); + if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; + } + + while (count) { + unsigned vc, nr; + + nvfx_state_emit(nvfx); + + vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(chan); + continue; + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); +} + +static INLINE void +nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart = 0; + + nvfx_state_emit(nvfx); + + vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(chan); + continue; + } + count -= vc; + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); + + start = restart; + } +} + +static INLINE void +nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart = 0; + + nvfx_state_emit(nvfx); + + vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(chan); + continue; + } + count -= vc; + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (chan, elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING(chan, (elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); + + start = restart; + } +} + +static INLINE void +nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart = 0; + + nvfx_state_emit(nvfx); + + vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(chan); + continue; + } + count -= vc; + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (chan, elts, push); + + vc -= push; + elts += push; + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); + + start = restart; + } +} + +static void +nvfx_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct pipe_screen *pscreen = pipe->screen; + void *map; + + map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return; + } + + switch (ib_size) { + case 1: + nvfx_draw_elements_u08(nvfx, map, mode, start, count); + break; + case 2: + nvfx_draw_elements_u16(nvfx, map, mode, start, count); + break; + case 4: + nvfx_draw_elements_u32(nvfx, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + pipe_buffer_unmap(pscreen, ib); +} + +static void +nvfx_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned restart = 0; + + while (count) { + unsigned nr, vc; + + nvfx_state_emit(nvfx); + + vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(chan); + continue; + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (chan, ((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(chan, ((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, 0); + + count -= vc; + start = restart; + } +} + +void +nvfx_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + boolean idxbuf; + + idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); + if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; + } + + if (idxbuf) { + nvfx_draw_elements_vbo(pipe, mode, start, count); + } else { + nvfx_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); +} + +static boolean +nvfx_vbo_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_buffer *ib = nvfx->idxbuf; + unsigned ib_format = nvfx->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + vtxbuf = so_new(3, 17, 18); + so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements); + vtxfmt = so_new(1, 16, 0); + so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements); + + for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nvfx->vtxelt->pipe[hw]; + vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->stride) { + if (!sattr) + sattr = so_new(16, 16 * 4, 0); + + if (nvfx_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, nouveau_bo(vb->buffer), + vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + struct nouveau_bo *bo = nouveau_bo(ib); + + so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, eng3d, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]); + so_ref(NULL, &vtxbuf); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF); + so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]); + so_ref(NULL, &vtxfmt); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT); + so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]); + so_ref(NULL, &sattr); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR); + return FALSE; +} + +struct nvfx_state_entry nvfx_state_vbo = { + .validate = nvfx_vbo_validate, + .dirty = { + .pipe = NVFX_NEW_ARRAYS, + .hw = 0, + } +}; -- cgit v1.2.3 From c65a8f3ed2ab1650df38a3ed32d1e91e84b50520 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 12:39:21 +0100 Subject: nv30, nv40: partially unify nv[34]0_state.c state.c is identical except for: 1. Sampler state creation is different 2. nv40 swtnl support 3. Separate blend equations on nv40 This patch unifies nv[34]0_state.c, except the sampler state creation code. --- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_context.h | 6 - src/gallium/drivers/nv30/nv30_state.c | 573 +----------------------------- src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 6 - src/gallium/drivers/nv40/nv40_state.c | 577 +----------------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 8 + src/gallium/drivers/nvfx/nvfx_state.c | 605 ++++++++++++++++++++++++++++++++ 9 files changed, 618 insertions(+), 1162 deletions(-) create mode 100644 src/gallium/drivers/nvfx/nvfx_state.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 730f4588785..9ddb331e749 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -75,7 +75,7 @@ nv30_create(struct pipe_screen *pscreen, void *priv) nvfx_init_query_functions(nvfx); nvfx_init_surface_functions(nvfx); - nv30_init_state_functions(nvfx); + nvfx_init_state_functions(nvfx); nvfx_init_transfer_functions(nvfx); /* Create, configure, and install fallback swtnl path */ diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index de879d504fc..16cf3cbaa45 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -3,12 +3,6 @@ #include "nvfx_context.h" -extern void nv30_init_state_functions(struct nvfx_context *nvfx); - -/* nv30_vertprog.c */ -extern void nv30_vertprog_destroy(struct nvfx_context *, - struct nvfx_vertex_program *); - /* nv30_fragtex.c */ extern void nv30_fragtex_bind(struct nvfx_context *); diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c index 92a4089b316..bf96f7cf353 100644 --- a/src/gallium/drivers/nv30/nv30_state.c +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -7,74 +7,6 @@ #include "nv30_context.h" #include "nvfx_state.h" -static void * -nv30_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); - - if (cso->rt[0].blend_enable) { - so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | - nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rt[0].rgb_dst_factor)); - /* FIXME: Gallium assumes GL_EXT_blend_func_separate. - It is not the case for NV30 */ - so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); - } else { - so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - so_ref(so, &bso->so); - so_ref(NULL, &so); - bso->pipe = *cso; - return (void *)bso; -} - -static void -nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->blend = hwcso; - nvfx->dirty |= NVFX_NEW_BLEND; -} - -static void -nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_blend_state *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); -} - - static INLINE unsigned wrap_mode(unsigned wrap) { unsigned ret; @@ -113,7 +45,7 @@ wrap_mode(unsigned wrap) { return ret >> NV34TCL_TX_WRAP_S_SHIFT; } -static void * +void * nv30_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { @@ -245,506 +177,3 @@ nv30_sampler_state_create(struct pipe_context *pipe, return (void *)ps; } -static void -nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nvfx->tex_sampler[unit] = sampler[unit]; - nvfx->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nvfx->nr_samplers; unit++) { - nvfx->tex_sampler[unit] = NULL; - nvfx->dirty_samplers |= (1 << unit); - } - - nvfx->nr_samplers = nr; - nvfx->dirty |= NVFX_NEW_SAMPLER; -} - -static void -nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nvfx->tex_miptree[unit], miptree[unit]); - nvfx->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nvfx->nr_textures; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nvfx->tex_miptree[unit], NULL); - nvfx->dirty_samplers |= (1 << unit); - } - - nvfx->nr_textures = nr; - nvfx->dirty |= NVFX_NEW_SAMPLER; -} - -static void * -nv30_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - - /*XXX: ignored: - * light_twoside - * point_smooth -nohw - * multisample - */ - - so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : - NV34TCL_SHADE_MODEL_SMOOTH); - - so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); - else - so_data(so, 0); - if (cso->offset_cw || cso->offset_ccw) { - so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); - } - - so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - so_ref(so, &rsso->so); - so_ref(NULL, &so); - rsso->pipe = *cso; - return (void *)rsso; -} - -static void -nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->rasterizer = hwcso; - nvfx->dirty |= NVFX_NEW_RAST; - /*nvfx->draw_dirty |= NVFX_NEW_RAST;*/ -} - -static void -nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_rasterizer_state *rsso = hwcso; - - so_ref(NULL, &rsso->so); - FREE(rsso); -} - -static void * -nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - - so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); - - if (cso->stencil[0].enabled) { - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &zsaso->so); - so_ref(NULL, &so); - zsaso->pipe = *cso; - return (void *)zsaso; -} - -static void -nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->zsa = hwcso; - nvfx->dirty |= NVFX_NEW_ZSA; -} - -static void -nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_zsa_state *zsaso = hwcso; - - so_ref(NULL, &zsaso->so); - FREE(zsaso); -} - -static void * -nv30_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - /*struct nvfx_context *nvfx = nvfx_context(pipe);*/ - struct nvfx_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - /*vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe);*/ - - return (void *)vp; -} - -static void -nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vertprog = hwcso; - nvfx->dirty |= NVFX_NEW_VERTPROG; - /*nvfx->draw_dirty |= NVFX_NEW_VERTPROG;*/ -} - -static void -nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp = hwcso; - - /*draw_delete_vertex_shader(nvfx->draw, vp->draw);*/ - nv30_vertprog_destroy(nvfx, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nv30_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nvfx_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->fragprog = hwcso; - nvfx->dirty |= NVFX_NEW_FRAGPROG; -} - -static void -nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_fragment_program *fp = hwcso; - - nvfx_fragprog_destroy(nvfx, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - -static void -nv30_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->blend_colour = *bcol; - nvfx->dirty |= NVFX_NEW_BCOL; -} - -static void -nv30_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->stencil_ref = *sr; - nvfx->dirty |= NVFX_NEW_SR; -} - -static void -nv30_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->constbuf[shader] = buf; - nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_VERTEX) { - nvfx->dirty |= NVFX_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nvfx->dirty |= NVFX_NEW_FRAGPROG; - } -} - -static void -nv30_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->framebuffer = *fb; - nvfx->dirty |= NVFX_NEW_FB; -} - -static void -nv30_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - memcpy(nvfx->stipple, stipple->stipple, 4 * 32); - nvfx->dirty |= NVFX_NEW_STIPPLE; -} - -static void -nv30_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->scissor = *s; - nvfx->dirty |= NVFX_NEW_SCISSOR; -} - -static void -nv30_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->viewport = *vpt; - nvfx->dirty |= NVFX_NEW_VIEWPORT; - /*nvfx->draw_dirty |= NVFX_NEW_VIEWPORT;*/ -} - -static void -nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); - nvfx->vtxbuf_nr = count; - - nvfx->dirty |= NVFX_NEW_ARRAYS; - /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ -} - -static void * -nv30_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - -/* nv30_vtxelt_construct(cso);*/ - - return (void *)cso; -} - -static void -nv30_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv30_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vtxelt = hwcso; - nvfx->dirty |= NVFX_NEW_ARRAYS; - /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ -} - -void -nv30_init_state_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.create_blend_state = nv30_blend_state_create; - nvfx->pipe.bind_blend_state = nv30_blend_state_bind; - nvfx->pipe.delete_blend_state = nv30_blend_state_delete; - - nvfx->pipe.create_sampler_state = nv30_sampler_state_create; - nvfx->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind; - nvfx->pipe.delete_sampler_state = nv30_sampler_state_delete; - nvfx->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture; - - nvfx->pipe.create_rasterizer_state = nv30_rasterizer_state_create; - nvfx->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; - nvfx->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; - - nvfx->pipe.create_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_create; - nvfx->pipe.bind_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_bind; - nvfx->pipe.delete_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_delete; - - nvfx->pipe.create_vs_state = nv30_vp_state_create; - nvfx->pipe.bind_vs_state = nv30_vp_state_bind; - nvfx->pipe.delete_vs_state = nv30_vp_state_delete; - - nvfx->pipe.create_fs_state = nv30_fp_state_create; - nvfx->pipe.bind_fs_state = nv30_fp_state_bind; - nvfx->pipe.delete_fs_state = nv30_fp_state_delete; - - nvfx->pipe.set_blend_color = nv30_set_blend_color; - nvfx->pipe.set_stencil_ref = nv30_set_stencil_ref; - nvfx->pipe.set_clip_state = nv30_set_clip_state; - nvfx->pipe.set_constant_buffer = nv30_set_constant_buffer; - nvfx->pipe.set_framebuffer_state = nv30_set_framebuffer_state; - nvfx->pipe.set_polygon_stipple = nv30_set_polygon_stipple; - nvfx->pipe.set_scissor_state = nv30_set_scissor_state; - nvfx->pipe.set_viewport_state = nv30_set_viewport_state; - - nvfx->pipe.create_vertex_elements_state = nv30_vtxelts_state_create; - nvfx->pipe.delete_vertex_elements_state = nv30_vtxelts_state_delete; - nvfx->pipe.bind_vertex_elements_state = nv30_vtxelts_state_bind; - - nvfx->pipe.set_vertex_buffers = nv30_set_vertex_buffers; -} - diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 85c1355b4fa..13bd50dd1dc 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -75,7 +75,7 @@ nv40_create(struct pipe_screen *pscreen, void *priv) nvfx_init_query_functions(nvfx); nvfx_init_surface_functions(nvfx); - nv40_init_state_functions(nvfx); + nvfx_init_state_functions(nvfx); nvfx_init_transfer_functions(nvfx); /* Create, configure, and install fallback swtnl path */ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index cebf21d00c0..5ea2229facd 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -3,12 +3,6 @@ #include "nvfx_context.h" -extern void nv40_init_state_functions(struct nvfx_context *nvfx); - -/* nv40_vertprog.c */ -extern void nv40_vertprog_destroy(struct nvfx_context *, - struct nvfx_vertex_program *); - /* nv40_fragtex.c */ extern void nv40_fragtex_bind(struct nvfx_context *); diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 1f4b9777d48..4d4098bc42c 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -9,73 +9,6 @@ #include "nv40_context.h" #include "nvfx_state.h" -static void * -nv40_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); - - if (cso->rt[0].blend_enable) { - so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | - nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rt[0].rgb_dst_factor)); - so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | - nvgl_blend_eqn(cso->rt[0].rgb_func)); - } else { - so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - so_ref(so, &bso->so); - so_ref(NULL, &so); - bso->pipe = *cso; - return (void *)bso; -} - -static void -nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->blend = hwcso; - nvfx->dirty |= NVFX_NEW_BLEND; -} - -static void -nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_blend_state *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); -} - - static INLINE unsigned wrap_mode(unsigned wrap) { unsigned ret; @@ -114,7 +47,7 @@ wrap_mode(unsigned wrap) { return ret >> NV34TCL_TX_WRAP_S_SHIFT; } -static void * +void * nv40_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { @@ -255,511 +188,3 @@ nv40_sampler_state_create(struct pipe_context *pipe, return (void *)ps; } -static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nvfx->tex_sampler[unit] = sampler[unit]; - nvfx->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nvfx->nr_samplers; unit++) { - nvfx->tex_sampler[unit] = NULL; - nvfx->dirty_samplers |= (1 << unit); - } - - nvfx->nr_samplers = nr; - nvfx->dirty |= NVFX_NEW_SAMPLER; -} - -static void -nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nvfx->tex_miptree[unit], miptree[unit]); - nvfx->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nvfx->nr_textures; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nvfx->tex_miptree[unit], NULL); - nvfx->dirty_samplers |= (1 << unit); - } - - nvfx->nr_textures = nr; - nvfx->dirty |= NVFX_NEW_SAMPLER; -} - -static void * -nv40_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - - /*XXX: ignored: - * light_twoside - * point_smooth -nohw - * multisample - */ - - so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : - NV34TCL_SHADE_MODEL_SMOOTH); - - so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - - so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); - else - so_data(so, 0); - if (cso->offset_cw || cso->offset_ccw) { - so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); - } - - so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - so_ref(so, &rsso->so); - so_ref(NULL, &so); - rsso->pipe = *cso; - return (void *)rsso; -} - -static void -nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->rasterizer = hwcso; - nvfx->dirty |= NVFX_NEW_RAST; - nvfx->draw_dirty |= NVFX_NEW_RAST; -} - -static void -nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_rasterizer_state *rsso = hwcso; - - so_ref(NULL, &rsso->so); - FREE(rsso); -} - -static void * -nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *eng3d = nvfx->screen->eng3d; - - so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); - - if (cso->stencil[0].enabled) { - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &zsaso->so); - so_ref(NULL, &so); - zsaso->pipe = *cso; - return (void *)zsaso; -} - -static void -nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->zsa = hwcso; - nvfx->dirty |= NVFX_NEW_ZSA; -} - -static void -nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_zsa_state *zsaso = hwcso; - - so_ref(NULL, &zsaso->so); - FREE(zsaso); -} - -static void * -nv40_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); - - return (void *)vp; -} - -static void -nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vertprog = hwcso; - nvfx->dirty |= NVFX_NEW_VERTPROG; - nvfx->draw_dirty |= NVFX_NEW_VERTPROG; -} - -static void -nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp = hwcso; - - draw_delete_vertex_shader(nvfx->draw, vp->draw); - nv40_vertprog_destroy(nvfx, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nv40_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nvfx_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->fragprog = hwcso; - nvfx->dirty |= NVFX_NEW_FRAGPROG; -} - -static void -nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_fragment_program *fp = hwcso; - - nvfx_fragprog_destroy(nvfx, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - -static void -nv40_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->blend_colour = *bcol; - nvfx->dirty |= NVFX_NEW_BCOL; -} - - static void -nv40_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->stencil_ref = *sr; - nvfx->dirty |= NVFX_NEW_SR; -} - -static void -nv40_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->clip = *clip; - nvfx->dirty |= NVFX_NEW_UCP; - nvfx->draw_dirty |= NVFX_NEW_UCP; -} - -static void -nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->constbuf[shader] = buf; - nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_VERTEX) { - nvfx->dirty |= NVFX_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nvfx->dirty |= NVFX_NEW_FRAGPROG; - } -} - -static void -nv40_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->framebuffer = *fb; - nvfx->dirty |= NVFX_NEW_FB; -} - -static void -nv40_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - memcpy(nvfx->stipple, stipple->stipple, 4 * 32); - nvfx->dirty |= NVFX_NEW_STIPPLE; -} - -static void -nv40_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->scissor = *s; - nvfx->dirty |= NVFX_NEW_SCISSOR; -} - -static void -nv40_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->viewport = *vpt; - nvfx->dirty |= NVFX_NEW_VIEWPORT; - nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; -} - -static void -nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); - nvfx->vtxbuf_nr = count; - - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->draw_dirty |= NVFX_NEW_ARRAYS; -} - -static void * -nv40_vtxelts_state_create(struct pipe_context *pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); - - assert(num_elements < 16); /* not doing fallbacks yet */ - cso->num_elements = num_elements; - memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); - -/* nv40_vtxelt_construct(cso);*/ - - return (void *)cso; -} - -static void -nv40_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv40_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - - nvfx->vtxelt = hwcso; - nvfx->dirty |= NVFX_NEW_ARRAYS; - nvfx->draw_dirty |= NVFX_NEW_ARRAYS; -} - -void -nv40_init_state_functions(struct nvfx_context *nvfx) -{ - nvfx->pipe.create_blend_state = nv40_blend_state_create; - nvfx->pipe.bind_blend_state = nv40_blend_state_bind; - nvfx->pipe.delete_blend_state = nv40_blend_state_delete; - - nvfx->pipe.create_sampler_state = nv40_sampler_state_create; - nvfx->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; - nvfx->pipe.delete_sampler_state = nv40_sampler_state_delete; - nvfx->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture; - - nvfx->pipe.create_rasterizer_state = nv40_rasterizer_state_create; - nvfx->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; - nvfx->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; - - nvfx->pipe.create_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_create; - nvfx->pipe.bind_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_bind; - nvfx->pipe.delete_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_delete; - - nvfx->pipe.create_vs_state = nv40_vp_state_create; - nvfx->pipe.bind_vs_state = nv40_vp_state_bind; - nvfx->pipe.delete_vs_state = nv40_vp_state_delete; - - nvfx->pipe.create_fs_state = nv40_fp_state_create; - nvfx->pipe.bind_fs_state = nv40_fp_state_bind; - nvfx->pipe.delete_fs_state = nv40_fp_state_delete; - - nvfx->pipe.set_blend_color = nv40_set_blend_color; - nvfx->pipe.set_stencil_ref = nv40_set_stencil_ref; - nvfx->pipe.set_clip_state = nv40_set_clip_state; - nvfx->pipe.set_constant_buffer = nv40_set_constant_buffer; - nvfx->pipe.set_framebuffer_state = nv40_set_framebuffer_state; - nvfx->pipe.set_polygon_stipple = nv40_set_polygon_stipple; - nvfx->pipe.set_scissor_state = nv40_set_scissor_state; - nvfx->pipe.set_viewport_state = nv40_set_viewport_state; - - nvfx->pipe.create_vertex_elements_state = nv40_vtxelts_state_create; - nvfx->pipe.delete_vertex_elements_state = nv40_vtxelts_state_delete; - nvfx->pipe.bind_vertex_elements_state = nv40_vtxelts_state_bind; - - nvfx->pipe.set_vertex_buffers = nv40_set_vertex_buffers; -} - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index d1304ccea0d..6c4ee17493d 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ nvfx_fragprog.c \ nvfx_miptree.c \ nvfx_query.c \ + nvfx_state.c \ nvfx_state_blend.c \ nvfx_state_emit.c \ nvfx_state_fb.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 8f121fe3af2..ab1a1fbbe9d 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -212,6 +212,9 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); +/* nvfx_state.c */ +extern void nvfx_init_state_functions(struct nvfx_context *nvfx); + /* nvfx_state_emit.c */ extern void nvfx_state_flush_notify(struct nouveau_channel *chan); extern boolean nvfx_state_validate(struct nvfx_context *nvfx); @@ -230,4 +233,9 @@ extern void nvfx_draw_elements(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count); +/* nvfx_vertprog.c */ +extern void nv30_vertprog_destroy(struct nvfx_context *, + struct nvfx_vertex_program *); +extern void nv40_vertprog_destroy(struct nvfx_context *, + struct nvfx_vertex_program *); #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c new file mode 100644 index 00000000000..76780dc4f5d --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -0,0 +1,605 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" + +static void * +nvfx_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(5, 8, 0); + + if (cso->rt[0].blend_enable) { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); + if(nvfx->screen->base.device->chipset < 0x40) { + so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + } else { + so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | + nvgl_blend_eqn(cso->rt[0].rgb_func)); + } + } else { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + /* TODO: add NV40 MRT color mask */ + + if (cso->logicop_enable) { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + so_ref(NULL, &so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nvfx_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend = hwcso; + nvfx->dirty |= NVFX_NEW_BLEND; +} + +static void +nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nvfx->tex_sampler[unit] = sampler[unit]; + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_samplers; unit++) { + nvfx->tex_sampler[unit] = NULL; + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_samplers = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], miptree[unit]); + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], NULL); + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_textures = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void * +nvfx_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(9, 19, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); + + so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); + if (cso->point_quad_rasterization) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if ((cso->sprite_coord_enable >> i) & 1) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + so_ref(NULL, &so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->rasterizer = hwcso; + nvfx->dirty |= NVFX_NEW_RAST; + nvfx->draw_dirty |= NVFX_NEW_RAST; +} + +static void +nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(6, 20, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + so_ref(NULL, &so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nvfx_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->zsa = hwcso; + nvfx->dirty |= NVFX_NEW_ZSA; +} + +static void +nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nvfx_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vertprog = hwcso; + nvfx->dirty |= NVFX_NEW_VERTPROG; + nvfx->draw_dirty |= NVFX_NEW_VERTPROG; +} + +static void +nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nvfx->draw, vp->draw); + if(!nvfx->is_nv4x) + nv30_vertprog_destroy(nvfx, vp); + else + nv40_vertprog_destroy(nvfx, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nvfx_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->fragprog = hwcso; + nvfx->dirty |= NVFX_NEW_FRAGPROG; +} + +static void +nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_fragment_program *fp = hwcso; + + nvfx_fragprog_destroy(nvfx, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nvfx_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend_colour = *bcol; + nvfx->dirty |= NVFX_NEW_BCOL; +} + +static void +nvfx_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->stencil_ref = *sr; + nvfx->dirty |= NVFX_NEW_SR; +} + +static void +nvfx_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->clip = *clip; + nvfx->dirty |= NVFX_NEW_UCP; + nvfx->draw_dirty |= NVFX_NEW_UCP; +} + +static void +nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_buffer *buf ) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->constbuf[shader] = buf; + nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nvfx->dirty |= NVFX_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nvfx->dirty |= NVFX_NEW_FRAGPROG; + } +} + +static void +nvfx_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->framebuffer = *fb; + nvfx->dirty |= NVFX_NEW_FB; +} + +static void +nvfx_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->stipple, stipple->stipple, 4 * 32); + nvfx->dirty |= NVFX_NEW_STIPPLE; +} + +static void +nvfx_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->scissor = *s; + nvfx->dirty |= NVFX_NEW_SCISSOR; +} + +static void +nvfx_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->viewport = *vpt; + nvfx->dirty |= NVFX_NEW_VIEWPORT; + nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; +} + +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); + nvfx->vtxbuf_nr = count; + + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + + assert(num_elements < 16); /* not doing fallbacks yet */ + cso->num_elements = num_elements; + memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); + +/* nvfx_vtxelt_construct(cso);*/ + + return (void *)cso; +} + +static void +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vtxelt = hwcso; + nvfx->dirty |= NVFX_NEW_ARRAYS; + /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ +} + +void * +nv30_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso); + +void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso); + +void +nvfx_init_state_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_blend_state = nvfx_blend_state_create; + nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; + nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; + + if(nvfx->is_nv4x) + nvfx->pipe.create_sampler_state = nv40_sampler_state_create; + else + nvfx->pipe.create_sampler_state = nv30_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nv40_sampler_state_delete; + nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture; + + nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; + nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind; + nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete; + + nvfx->pipe.create_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_create; + nvfx->pipe.bind_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_bind; + nvfx->pipe.delete_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_delete; + + nvfx->pipe.create_vs_state = nvfx_vp_state_create; + nvfx->pipe.bind_vs_state = nvfx_vp_state_bind; + nvfx->pipe.delete_vs_state = nvfx_vp_state_delete; + + nvfx->pipe.create_fs_state = nvfx_fp_state_create; + nvfx->pipe.bind_fs_state = nvfx_fp_state_bind; + nvfx->pipe.delete_fs_state = nvfx_fp_state_delete; + + nvfx->pipe.set_blend_color = nvfx_set_blend_color; + nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref; + nvfx->pipe.set_clip_state = nvfx_set_clip_state; + nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer; + nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state; + nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple; + nvfx->pipe.set_scissor_state = nvfx_set_scissor_state; + nvfx->pipe.set_viewport_state = nvfx_set_viewport_state; + + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; +} -- cgit v1.2.3 From 840c36f5e6d940343a3154af7e76fec341ca46e6 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 13:33:48 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_vertprog.c vertprog.c is similar but has substantial differences: 1. nv40 supports clip planes 2. nv40 uses a more advanced register allocator 3. Some register setup is different 4. Constants with the same name have different values This patch unifies the two files. nv30 gains clip plane support and the nv40 register allocator. A new NVFX_VP(x) macro is introduced that at runtime resolved to either the nv30 or the nv40 constant value. nv30 clip planes are not tested and might not work --- src/gallium/drivers/nv30/Makefile | 3 +- src/gallium/drivers/nv30/nv30_context.h | 3 - src/gallium/drivers/nv30/nv30_vertprog.c | 842 --------------------- src/gallium/drivers/nv40/Makefile | 3 +- src/gallium/drivers/nv40/nv40_context.h | 3 - src/gallium/drivers/nv40/nv40_vertprog.c | 1048 -------------------------- src/gallium/drivers/nvfx/Makefile | 3 +- src/gallium/drivers/nvfx/nvfx_context.h | 6 +- src/gallium/drivers/nvfx/nvfx_state.c | 5 +- src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_vertprog.c | 1108 ++++++++++++++++++++++++++++ 11 files changed, 1117 insertions(+), 1909 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_vertprog.c delete mode 100644 src/gallium/drivers/nv40/nv40_vertprog.c create mode 100644 src/gallium/drivers/nvfx/nvfx_vertprog.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 6ec93ee3465..b71afbbb9aa 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -7,8 +7,7 @@ C_SOURCES = \ nv30_context.c \ nv30_fragtex.c \ nv30_screen.c \ - nv30_state.c \ - nv30_vertprog.c + nv30_state.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 16cf3cbaa45..eacbb1753d5 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -5,9 +5,6 @@ /* nv30_fragtex.c */ extern void nv30_fragtex_bind(struct nvfx_context *); - -/* nv30_state.c and friends */ -extern struct nvfx_state_entry nv30_state_vertprog; extern struct nvfx_state_entry nv30_state_fragtex; /* nvfx_context.c */ diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c deleted file mode 100644 index ec6d63889bc..00000000000 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ /dev/null @@ -1,842 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" - -#include "nv30_context.h" -#include "nvfx_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) - -struct nv30_vpc { - struct nvfx_vertex_program *vp; - - struct nvfx_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nvfx_sreg *imm; - unsigned nr_imm; -}; - -static struct nvfx_sreg -temp(struct nv30_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nvfx_sr(NVFXSR_TEMP, idx); -} - -static struct nvfx_sreg -constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nvfx_vertex_program *vp = vpc->vp; - struct nvfx_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nvfx_sr(NVFXSR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nvfx_sr(NVFXSR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NVFXSR_TEMP: - sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); - break; - case NVFXSR_INPUT: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); - break; - case NVFXSR_CONST: - sr |= (NV30_VP_SRC_REG_TYPE_CONST << - NV30_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NVFXSR_NONE: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); - -/* - * |VVV| - * d�.�b - * \u/ - * - */ - - switch (pos) { - case 0: - hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> - NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << - NV30_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> - NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << - NV30_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) -{ - struct nvfx_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NVFXSR_TEMP: - hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); - break; - case NVFXSR_OUTPUT: - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - - /*XXX: no way this is entirely correct, someone needs to - * figure out what exactly it is. - */ - hw[3] |= 0x800; - break; - default: - assert(0); - } -} - -static void -nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, - struct nvfx_sreg s2) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NVFX_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); - - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); -// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; -// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - - if (dst.type == NVFXSR_OUTPUT) { - if (slot) - hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); - } else { - if (slot) - hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nvfx_sreg -tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->Register.Index) - vpc->high_temp = fsrc->Register.Index; - src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nvfx_sreg -tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nvfx_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = nvfx_sr(NVFXSR_OUTPUT, - vpc->output_map[fdst->Register.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nvfx_sr(NVFXSR_TEMP, fdst->Register.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nvfx_sreg src[3], dst, tmp; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, VEC, ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, VEC, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, VEC, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, VEC, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, SCA, LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, SCA, EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, SCA, RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, VEC, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV30_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_vertprog_prepare(struct nv30_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -static void -nv30_vertprog_translate(struct nvfx_context *nvfx, - struct nvfx_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv30_vpc *vpc = NULL; - - tgsi_dump(vp->pipe.tokens,0); - - vpc = CALLOC(1, sizeof(struct nv30_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv30_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv30_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv30_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - FREE(vpc); -} - -static boolean -nv30_vertprog_validate(struct nvfx_context *nvfx) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - struct nvfx_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - vp = nvfx->vertprog; - constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv30_vertprog_translate(nvfx, vp); - if (!vp->translated) - return FALSE; - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->exec); - } - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(1, 1, 0); - so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_ref(so, &vp->so); - so_ref(NULL, &so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nvfx->screen->vp_data_heap; - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->data); - } - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, - &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV30_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nvfx_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } - - if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i, vp->insns[i].data[0], vp->insns[i].data[1], - vp->insns[i].data[2], vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); - } - } - - if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { - so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) -{ - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nouveau_resource_free(&vp->exec); - vp->exec_start = 0; - nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = 0; - so_ref(NULL, &vp->so); -} - -struct nvfx_state_entry nv30_state_vertprog = { - .validate = nv30_vertprog_validate, - .dirty = { - .pipe = NVFX_NEW_VERTPROG /*| NVFX_NEW_UCP*/, - .hw = NVFX_STATE_VERTPROG, - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 69085069060..c8f3858c36f 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -7,8 +7,7 @@ C_SOURCES = \ nv40_context.c \ nv40_fragtex.c \ nv40_screen.c \ - nv40_state.c \ - nv40_vertprog.c + nv40_state.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 5ea2229facd..8dc87e426f9 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -5,9 +5,6 @@ /* nv40_fragtex.c */ extern void nv40_fragtex_bind(struct nvfx_context *); - -/* nv40_state.c and friends */ -extern struct nvfx_state_entry nv40_state_vertprog; extern struct nvfx_state_entry nv40_state_fragtex; /* nvfx_context.c */ diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c deleted file mode 100644 index 752cd0d1b3d..00000000000 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ /dev/null @@ -1,1048 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv40_context.h" -#include "nvfx_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv40_shader.h" - -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) - -#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) - -struct nv40_vpc { - struct nvfx_vertex_program *vp; - - struct nvfx_vertex_program_exec *vpi; - - unsigned r_temps; - unsigned r_temps_discard; - struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nvfx_sreg *r_address; - struct nvfx_sreg *r_temp; - - struct nvfx_sreg *imm; - unsigned nr_imm; - - unsigned hpos_idx; -}; - -static struct nvfx_sreg -temp(struct nv40_vpc *vpc) -{ - int idx = ffs(~vpc->r_temps) - 1; - - if (idx < 0) { - NOUVEAU_ERR("out of temps!!\n"); - assert(0); - return nvfx_sr(NVFXSR_TEMP, 0); - } - - vpc->r_temps |= (1 << idx); - vpc->r_temps_discard |= (1 << idx); - return nvfx_sr(NVFXSR_TEMP, idx); -} - -static INLINE void -release_temps(struct nv40_vpc *vpc) -{ - vpc->r_temps &= ~vpc->r_temps_discard; - vpc->r_temps_discard = 0; -} - -static struct nvfx_sreg -constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nvfx_vertex_program *vp = vpc->vp; - struct nvfx_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nvfx_sr(NVFXSR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nvfx_sr(NVFXSR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NVFXSR_TEMP: - sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); - break; - case NVFXSR_INPUT: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); - break; - case NVFXSR_CONST: - sr |= (NV40_VP_SRC_REG_TYPE_CONST << - NV40_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NVFXSR_NONE: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV40_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); - - switch (pos) { - case 0: - hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> - NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << - NV40_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> - NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << - NV40_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) -{ - struct nvfx_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NVFXSR_TEMP: - hw[3] |= NV40_VP_INST_DEST_MASK; - if (slot == 0) { - hw[0] |= (dst.index << - NV40_VP_INST_VEC_DEST_TEMP_SHIFT); - } else { - hw[3] |= (dst.index << - NV40_VP_INST_SCA_DEST_TEMP_SHIFT); - } - break; - case NVFXSR_OUTPUT: - switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - case NV40_VP_INST_DEST_CLIP(0): - vp->or |= (1 << 6); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; - dst.index = NV40_VP_INST_DEST_FOGC; - break; - case NV40_VP_INST_DEST_CLIP(1): - vp->or |= (1 << 7); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; - dst.index = NV40_VP_INST_DEST_FOGC; - break; - case NV40_VP_INST_DEST_CLIP(2): - vp->or |= (1 << 8); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; - dst.index = NV40_VP_INST_DEST_FOGC; - break; - case NV40_VP_INST_DEST_CLIP(3): - vp->or |= (1 << 9); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; - dst.index = NV40_VP_INST_DEST_PSZ; - break; - case NV40_VP_INST_DEST_CLIP(4): - vp->or |= (1 << 10); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; - dst.index = NV40_VP_INST_DEST_PSZ; - break; - case NV40_VP_INST_DEST_CLIP(5): - vp->or |= (1 << 11); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; - dst.index = NV40_VP_INST_DEST_PSZ; - break; - default: - break; - } - - hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); - if (slot == 0) { - hw[0] |= NV40_VP_INST_VEC_RESULT; - hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - } else { - hw[3] |= NV40_VP_INST_SCA_RESULT; - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - } - break; - default: - assert(0); - } -} - -static void -nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, - struct nvfx_sreg s2) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NVFX_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); - - if (slot == 0) { - hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); - } else { - hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); - hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); - hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nvfx_sreg -tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = vpc->r_temp[fsrc->Register.Index]; - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nvfx_sreg -tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nvfx_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = vpc->r_result[fdst->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - dst = vpc->r_temp[fdst->Register.Index]; - break; - case TGSI_FILE_ADDRESS: - dst = vpc->r_address[fdst->Register.Index]; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, - struct nvfx_sreg *src) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= tgsi_mask(1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(vpc); - - if (mask) - arith(vpc, VEC, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nvfx_sreg src[3], dst, tmp; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - int mask; - int ai = -1, ci = -1, ii = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(vpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - if ((ci == -1 && ii == -1) || - ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_IMMEDIATE: - if ((ci == -1 && ii == -1) || - ii == fsrc->Register.Index) { - ii = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, VEC, ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, VEC, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, VEC, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, VEC, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, SCA, LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, SCA, EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); - break; - case TGSI_OPCODE_SGE: - arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, VEC, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - release_temps(vpc); - return TRUE; -} - -static boolean -nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - unsigned idx = fdec->Range.First; - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV40_VP_INST_DEST_POS; - vpc->hpos_idx = idx; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV40_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV40_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - /* not really an error just a fallback */ - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); - return TRUE; -} - -static boolean -nv40_vertprog_prepare(struct nv40_vpc *vpc) -{ - struct tgsi_parse_context p; - int high_temp = -1, high_addr = -1, nr_imm = 0, i; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - - fdec = &p.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_TEMPORARY: - if (fdec->Range.Last > high_temp) { - high_temp = - fdec->Range.Last; - } - break; -#if 0 /* this would be nice.. except gallium doesn't track it */ - case TGSI_FILE_ADDRESS: - if (fdec->Range.Last > high_addr) { - high_addr = - fdec->Range.Last; - } - break; -#endif - case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) - return FALSE; - break; - default: - break; - } - } - break; -#if 1 /* yay, parse instructions looking for address regs instead */ - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - const struct tgsi_full_dst_register *fdst; - - finst = &p.FullToken.FullInstruction; - fdst = &finst->Dst[0]; - - if (fdst->Register.File == TGSI_FILE_ADDRESS) { - if (fdst->Register.Index > high_addr) - high_addr = fdst->Register.Index; - } - - } - break; -#endif - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); - assert(vpc->imm); - } - - if (++high_temp) { - vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); - for (i = 0; i < high_temp; i++) - vpc->r_temp[i] = temp(vpc); - } - - if (++high_addr) { - vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); - for (i = 0; i < high_addr; i++) - vpc->r_address[i] = temp(vpc); - } - - vpc->r_temps_discard = 0; - return TRUE; -} - -static void -nv40_vertprog_translate(struct nvfx_context *nvfx, - struct nvfx_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv40_vpc *vpc = NULL; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - int i; - - vpc = CALLOC(1, sizeof(struct nv40_vpc)); - if (!vpc) - return; - vpc->vp = vp; - - if (!nv40_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - /* Redirect post-transform vertex position to a temp if user clip - * planes are enabled. We need to append code to the vtxprog - * to handle clip planes later. - */ - if (vp->ucp.nr) { - vpc->r_result[vpc->hpos_idx] = temp(vpc); - vpc->r_temps_discard = 0; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv40_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - /* Write out HPOS if it was redirected to a temp earlier */ - if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { - struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, - NV40_VP_INST_DEST_POS); - struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - - arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none); - } - - /* Insert code to handle user clip planes */ - for (i = 0; i < vp->ucp.nr; i++) { - struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, - NV40_VP_INST_DEST_CLIP(i)); - struct nvfx_sreg ceqn = constant(vpc, -1, - nvfx->clip.ucp[i][0], - nvfx->clip.ucp[i][1], - nvfx->clip.ucp[i][2], - nvfx->clip.ucp[i][3]); - struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - unsigned mask; - - switch (i) { - case 0: case 3: mask = MASK_Y; break; - case 1: case 4: mask = MASK_Z; break; - case 2: case 5: mask = MASK_W; break; - default: - NOUVEAU_ERR("invalid clip dist #%d\n", i); - goto out_err; - } - - arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); - } - - vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - if (vpc->r_temp) - FREE(vpc->r_temp); - if (vpc->r_address) - FREE(vpc->r_address); - if (vpc->imm) - FREE(vpc->imm); - FREE(vpc); -} - -static boolean -nv40_vertprog_validate(struct nvfx_context *nvfx) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - struct nvfx_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - if (nvfx->render_mode == HW) { - vp = nvfx->vertprog; - constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - - if ((nvfx->dirty & NVFX_NEW_UCP) || - memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { - nv40_vertprog_destroy(nvfx, vp); - memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); - } - } else { - vp = nvfx->swtnl.vertprog; - constbuf = NULL; - } - - /* Translate TGSI shader into hw bytecode */ - if (vp->translated) - goto check_gpu_resources; - - nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; - nv40_vertprog_translate(nvfx, vp); - if (!vp->translated) { - nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; - return FALSE; - } - -check_gpu_resources: - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->exec); - } - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(3, 4, 0); - so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or); - so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); - so_data (so, vp->clip_ctrl); - so_ref(so, &vp->so); - so_ref(NULL, &so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nvfx->screen->vp_data_heap; - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->data); - } - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV40_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nvfx_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } - - if (constbuf) - pscreen->buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); - } - } - - if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { - so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv40_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) -{ - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nouveau_resource_free(&vp->exec); - vp->exec_start = 0; - nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = vp->clip_ctrl = 0; - so_ref(NULL, &vp->so); -} - -struct nvfx_state_entry nv40_state_vertprog = { - .validate = nv40_vertprog_validate, - .dirty = { - .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, - .hw = NVFX_STATE_VERTPROG, - } -}; - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 6c4ee17493d..a665c7e2631 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -20,6 +20,7 @@ C_SOURCES = \ nvfx_state_zsa.c \ nvfx_surface.c \ nvfx_transfer.c \ - nvfx_vbo.c + nvfx_vbo.c \ + nvfx_vertprog.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index ab1a1fbbe9d..9a4b4631b54 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -190,6 +190,7 @@ extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; extern struct nvfx_state_entry nvfx_state_vbo; +extern struct nvfx_state_entry nvfx_state_vertprog; extern struct nvfx_state_entry nvfx_state_viewport; extern struct nvfx_state_entry nvfx_state_vtxfmt; extern struct nvfx_state_entry nvfx_state_zsa; @@ -234,8 +235,7 @@ extern void nvfx_draw_elements(struct pipe_context *pipe, unsigned count); /* nvfx_vertprog.c */ -extern void nv30_vertprog_destroy(struct nvfx_context *, - struct nvfx_vertex_program *); -extern void nv40_vertprog_destroy(struct nvfx_context *, +extern void nvfx_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 76780dc4f5d..7e138afc717 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -370,10 +370,7 @@ nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nvfx_vertex_program *vp = hwcso; draw_delete_vertex_shader(nvfx->draw, vp->draw); - if(!nvfx->is_nv4x) - nv30_vertprog_destroy(nvfx, vp); - else - nv40_vertprog_destroy(nvfx, vp); + nvfx_vertprog_destroy(nvfx, vp); FREE((void*)vp->pipe.tokens); FREE(vp); } diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 88b86f9fb50..fcbf8310501 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -11,7 +11,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_stipple, \ &nvfx_state_fragprog, \ &nvxx##_state_fragtex, \ - &nvxx##_state_vertprog, \ + &nvfx_state_vertprog, \ &nvfx_state_blend, \ &nvfx_state_blend_colour, \ &nvfx_state_zsa, \ diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c new file mode 100644 index 00000000000..730361a9827 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -0,0 +1,1108 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_util.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv30/nv30_shader.h" +#include "nv40/nv40_shader.h" + +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) + +#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nvfx_vpc { + struct nvfx_vertex_program *vp; + + struct nvfx_vertex_program_exec *vpi; + + unsigned r_temps; + unsigned r_temps_discard; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_address; + struct nvfx_sreg *r_temp; + + struct nvfx_sreg *imm; + unsigned nr_imm; + + unsigned hpos_idx; +}; + +static struct nvfx_sreg +temp(struct nvfx_vpc *vpc) +{ + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nvfx_sr(NVFXSR_TEMP, 0); + } + + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); + return nvfx_sr(NVFXSR_TEMP, idx); +} + +static INLINE void +release_temps(struct nvfx_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + +static struct nvfx_sreg +constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nvfx_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nvfx_sr(NVFXSR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nvfx_sr(NVFXSR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) +{ + struct nvfx_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NVFXSR_TEMP: + sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); + sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); + break; + case NVFXSR_INPUT: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); + break; + case NVFXSR_CONST: + sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NVFXSR_NONE: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NVFX_VP(SRC_NEGATE); + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | + (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | + (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | + (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); + + switch (pos) { + case 0: + hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> + NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); + hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << + NVFX_VP(INST_SRC0L_SHIFT); + break; + case 1: + hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); + break; + case 2: + hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> + NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); + hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << + NVFX_VP(INST_SRC2L_SHIFT); + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) +{ + struct nvfx_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NVFXSR_TEMP: + if(!nvfx->is_nv4x) + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + } + break; + case NVFXSR_OUTPUT: + /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ + switch (dst.index) { + case NVFX_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NVFX_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NVFX_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NVFX_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NVFX_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NVFX_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + default: + if(!nvfx->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } else { + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } + break; + } + + if(!nvfx->is_nv4x) { + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + } else { + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + } + break; + default: + assert(0); + } +} + +static void +nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, + struct nvfx_sreg s2) +{ + struct nvfx_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NVFX_VP_INST_COND_TR << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | + (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | + (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | + (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + + if(!nvfx->is_nv4x) { + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); +// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); + + if (dst.type == NVFXSR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + } else { + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + } + + emit_dst(nvfx, vpc, hw, slot, dst); + emit_src(nvfx, vpc, hw, 0, s0); + emit_src(nvfx, vpc, hw, 1, s1); + emit_src(nvfx, vpc, hw, 2, s2); +} + +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nvfx_sreg src; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = vpc->r_temp[fsrc->Register.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; + return src; +} + +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nvfx_sreg dst; + + switch (fdst->Register.File) { + case TGSI_FILE_OUTPUT: + dst = vpc->r_result[fdst->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + dst = vpc->r_temp[fdst->Register.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->Register.Index]; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc, + struct nvfx_sreg *src) +{ + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc); + uint mask = 0; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + mask |= tgsi_mask(1 << c); + break; + default: + assert(0); + } + } + + if (mask == MASK_ALL) + return TRUE; + + *src = temp(vpc); + + if (mask) + arith(vpc, VEC, MOV, *src, mask, tgsi, none, none); + + return FALSE; +} + +static boolean +nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nvfx_sreg src[3], dst, tmp; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + int mask; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(nvfx, vpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, VEC, MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, VEC, MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, VEC, MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, VEC, ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, VEC, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, VEC, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, VEC, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, SCA, LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, SCA, EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); + break; + case TGSI_OPCODE_SGE: + arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, VEC, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(vpc); + return TRUE; +} + +static boolean +nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->Range.First; + int hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = NVFX_VP(INST_DEST_POS); + vpc->hpos_idx = idx; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_VP(INST_DEST_COL0); + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_VP(INST_DEST_COL1); + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_VP(INST_DEST_BFC0); + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_VP(INST_DEST_BFC1); + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NVFX_VP(INST_DEST_FOGC); + break; + case TGSI_SEMANTIC_PSIZE: + hw = NVFX_VP(INST_DEST_PSZ); + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.Index <= 7) { + hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index)); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_EDGEFLAG: + /* not really an error just a fallback */ + NOUVEAU_ERR("cannot handle edgeflag output\n"); + return FALSE; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); + return TRUE; +} + +static boolean +nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, high_addr = -1, nr_imm = 0, i; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->Range.Last > high_temp) { + high_temp = + fdec->Range.Last; + } + break; +#if 0 /* this would be nice.. except gallium doesn't track it */ + case TGSI_FILE_ADDRESS: + if (fdec->Range.Last > high_addr) { + high_addr = + fdec->Range.Last; + } + break; +#endif + case TGSI_FILE_OUTPUT: + if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec)) + return FALSE; + break; + default: + break; + } + } + break; +#if 1 /* yay, parse instructions looking for address regs instead */ + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + const struct tgsi_full_dst_register *fdst; + + finst = &p.FullToken.FullInstruction; + fdst = &finst->Dst[0]; + + if (fdst->Register.File == TGSI_FILE_ADDRESS) { + if (fdst->Register.Index > high_addr) + high_addr = fdst->Register.Index; + } + + } + break; +#endif + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); + assert(vpc->imm); + } + + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = temp(vpc); + } + + vpc->r_temps_discard = 0; + return TRUE; +} + +static void +nvfx_vertprog_translate(struct nvfx_context *nvfx, + struct nvfx_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nvfx_vpc *vpc = NULL; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + int i; + + vpc = CALLOC(1, sizeof(struct nvfx_vpc)); + if (!vpc) + return; + vpc->vp = vp; + + if (!nvfx_vertprog_prepare(nvfx, vpc)) { + FREE(vpc); + return; + } + + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code to the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(imm->Immediate.NrTokens == 4 + 1); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { + struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP(INST_DEST_POS)); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP_INST_DEST_CLIP(i)); + struct nvfx_sreg ceqn = constant(vpc, -1, + nvfx->clip.ucp[i][0], + nvfx->clip.ucp[i][1], + nvfx->clip.ucp[i][2], + nvfx->clip.ucp[i][3]); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); + } + + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (vpc->r_temp) + FREE(vpc->r_temp); + if (vpc->r_address) + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); + FREE(vpc); +} + +static boolean +nvfx_vertprog_validate(struct nvfx_context *nvfx) +{ + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + if (nvfx->render_mode == HW) { + vp = nvfx->vertprog; + constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + + if ((nvfx->dirty & NVFX_NEW_UCP) || + memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { + nvfx_vertprog_destroy(nvfx, vp); + memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); + } + } else { + vp = nvfx->swtnl.vertprog; + constbuf = NULL; + } + + /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + nvfx_vertprog_translate(nvfx, vp); + if (!vp->translated) { + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + return FALSE; + } + +check_gpu_resources: + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nvfx_vertex_program *evict; + + evict = heap->next->priv; + nouveau_resource_free(&evict->exec); + } + + if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(3, 4, 0); + so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + } + so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); + so_data (so, vp->clip_ctrl); + so_ref(so, &vp->so); + so_ref(NULL, &so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nvfx->screen->vp_data_heap; + + if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nvfx_vertex_program *evict; + + evict = heap->next->priv; + nouveau_resource_free(&evict->data); + } + + if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NVFX_VP(INST_CONST_SRC_SHIFT); + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = pipe_buffer_map(pscreen, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nvfx_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); + } + + if (constbuf) + pipe_buffer_unmap(pscreen, constbuf); + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); + } + } + + if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { + so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) +{ + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nouveau_resource_free(&vp->exec); + vp->exec_start = 0; + nouveau_resource_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); +} + +struct nvfx_state_entry nvfx_state_vertprog = { + .validate = nvfx_vertprog_validate, + .dirty = { + .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, + .hw = NVFX_STATE_VERTPROG, + } +}; -- cgit v1.2.3 From 10f464fc1073e8f3b53dbcf2209a2204f4924094 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 14:31:27 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_screen.c The files have the same structure but are substantially different. They are unified with appropriate conditionals. --- src/gallium/drivers/nouveau/nouveau_winsys.h | 5 +- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_screen.c | 361 ----------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_screen.c | 322 --------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_screen.c | 436 +++++++++++++++++++++ .../winsys/drm/nouveau/drm/nouveau_drm_api.c | 4 +- 8 files changed, 439 insertions(+), 692 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_screen.c delete mode 100644 src/gallium/drivers/nv40/nv40_screen.c create mode 100644 src/gallium/drivers/nvfx/nvfx_screen.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index af9ddd558c8..bed014b9ce0 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -27,10 +27,7 @@ #define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) extern struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *); - -extern struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *); extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index b71afbbb9aa..ef3ce7eafaa 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -6,7 +6,6 @@ LIBNAME = nv30 C_SOURCES = \ nv30_context.c \ nv30_fragtex.c \ - nv30_screen.c \ nv30_state.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c deleted file mode 100644 index 3a48255c18e..00000000000 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ /dev/null @@ -1,361 +0,0 @@ -#include "pipe/p_screen.h" -#include "pipe/p_state.h" - -#include "nouveau/nouveau_screen.h" - -#include "nv30_context.h" -#include "nvfx_screen.h" - -#define NV30TCL_CHIPSET_3X_MASK 0x00000003 -#define NV34TCL_CHIPSET_3X_MASK 0x00000010 -#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 - -/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h - * to get the pointer to the context front buffer, so I copied nouveau_winsys here. - * nv30_screen_surface_format_supported() can then use it to enforce creating fbo - * with same number of bits everywhere. - */ -struct nouveau_winsys { - struct pipe_winsys base; - - struct pipe_screen *pscreen; - - struct pipe_surface *front; -}; - -static int -nv30_screen_get_param(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 2; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; - case NOUVEAU_CAP_HW_VTXBUF: - case NOUVEAU_CAP_HW_IDXBUF: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - case PIPE_CAP_INDEP_BLEND_ENABLE: - return 0; - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 8.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv30_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; - - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - return TRUE; - case PIPE_FORMAT_Z16_UNORM: - if (front) { - return (front->format == PIPE_FORMAT_B5G6R5_UNORM); - } - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static struct pipe_buffer * -nv30_surface_buffer(struct pipe_surface *surf) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; - - return mt->buffer; -} - -static void -nv30_screen_destroy(struct pipe_screen *pscreen) -{ - struct nvfx_screen *screen = nvfx_screen(pscreen); - unsigned i; - - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (screen->state[i]) - so_ref(NULL, &screen->state[i]); - } - - nouveau_resource_destroy(&screen->vp_exec_heap); - nouveau_resource_destroy(&screen->vp_data_heap); - nouveau_resource_destroy(&screen->query_heap); - nouveau_notifier_free(&screen->query); - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->eng3d); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - struct nouveau_stateobj *so; - unsigned eng3d_class = 0; - int ret, i; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv30_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv30_screen_destroy; - pscreen->get_param = nv30_screen_get_param; - pscreen->get_paramf = nv30_screen_get_paramf; - pscreen->is_format_supported = nv30_screen_surface_format_supported; - pscreen->context_create = nv30_create; - - nvfx_screen_init_miptree_functions(pscreen); - - /* 3D object */ - switch (dev->chipset & 0xf0) { - case 0x30: - if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - eng3d_class = 0x0397; - else - if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - eng3d_class = 0x0697; - else - if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - eng3d_class = 0x0497; - break; - default: - break; - } - - if (!eng3d_class) { - NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, - &screen->eng3d); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv30_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv30_screen_destroy(pscreen); - return NULL; - } - - /* Query objects */ - ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv30_screen_destroy(pscreen); - return NULL; - } - - ret = nouveau_resource_init(&screen->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv30_screen_destroy(pscreen); - return NULL; - } - - /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, 256) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { - nv30_screen_destroy(pscreen); - return NULL; - } - - /* Static eng3d initialisation */ - so = so_new(36, 60, 0); - so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1); - so_data (so, screen->sync->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); -/* so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle);*/ - so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY8, 1); - so_data (so, chan->vram->handle); - - for (i=1; i<8; i++) { - so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); - so_data (so, 0); - so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); - so_data (so, 0); - } - - so_method(so, screen->eng3d, 0x220, 1); - so_data (so, 1); - - so_method(so, screen->eng3d, 0x03b0, 1); - so_data (so, 0x00100000); - so_method(so, screen->eng3d, 0x1454, 1); - so_data (so, 0); - so_method(so, screen->eng3d, 0x1d80, 1); - so_data (so, 3); - so_method(so, screen->eng3d, 0x1450, 1); - so_data (so, 0x00030004); - - /* NEW */ - so_method(so, screen->eng3d, 0x1e98, 1); - so_data (so, 0); - so_method(so, screen->eng3d, 0x17e0, 3); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_method(so, screen->eng3d, 0x1f80, 16); - for (i=0; i<16; i++) { - so_data (so, (i==8) ? 0x0000ffff : 0); - } - - so_method(so, screen->eng3d, 0x120, 3); - so_data (so, 0); - so_data (so, 1); - so_data (so, 2); - - so_method(so, screen->eng3d, 0x1d88, 1); - so_data (so, 0x00001200); - - so_method(so, screen->eng3d, NV34TCL_RC_ENABLE, 1); - so_data (so, 0); - - so_method(so, screen->eng3d, NV34TCL_DEPTH_RANGE_NEAR, 2); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - - so_method(so, screen->eng3d, NV34TCL_MULTISAMPLE_CONTROL, 1); - so_data (so, 0xffff0000); - - /* enables use of vp rather than fixed-function somehow */ - so_method(so, screen->eng3d, 0x1e94, 1); - so_data (so, 0x13); - - so_emit(chan, so); - so_ref(NULL, &so); - nouveau_pushbuf_flush(chan, 0); - - return pscreen; -} diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index c8f3858c36f..a37446d50ce 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -6,7 +6,6 @@ LIBNAME = nv40 C_SOURCES = \ nv40_context.c \ nv40_fragtex.c \ - nv40_screen.c \ nv40_state.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c deleted file mode 100644 index 0603b7d1c1b..00000000000 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ /dev/null @@ -1,322 +0,0 @@ -#include "pipe/p_screen.h" - -#include "nv40_context.h" -#include "nvfx_screen.h" - -#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf -#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 -#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 - -static int -nv40_screen_get_param(struct pipe_screen *pscreen, int param) -{ - struct nvfx_screen *screen = nvfx_screen(pscreen); - - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; /* We have 4 - but unsupported currently */ - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - case NOUVEAU_CAP_HW_VTXBUF: - return 1; - case NOUVEAU_CAP_HW_IDXBUF: - if (screen->eng3d->grclass == NV40TCL) - return 1; - return 0; - case PIPE_CAP_INDEP_BLEND_ENABLE: - return 0; - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv40_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B5G5R5A1_UNORM: - case PIPE_FORMAT_B4G4R4A4_UNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static struct pipe_buffer * -nv40_surface_buffer(struct pipe_surface *surf) -{ - struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; - - return mt->buffer; -} - -static void -nv40_screen_destroy(struct pipe_screen *pscreen) -{ - struct nvfx_screen *screen = nvfx_screen(pscreen); - unsigned i; - - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (screen->state[i]) - so_ref(NULL, &screen->state[i]); - } - - nouveau_resource_destroy(&screen->vp_exec_heap); - nouveau_resource_destroy(&screen->vp_data_heap); - nouveau_resource_destroy(&screen->query_heap); - nouveau_notifier_free(&screen->query); - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->eng3d); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - struct nouveau_stateobj *so; - unsigned eng3d_class = 0; - int ret; - - if (!screen) - return NULL; - - screen->is_nv4x = ~0; - - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv40_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv40_screen_destroy; - pscreen->get_param = nv40_screen_get_param; - pscreen->get_paramf = nv40_screen_get_paramf; - pscreen->is_format_supported = nv40_screen_surface_format_supported; - pscreen->context_create = nv40_create; - - nvfx_screen_init_miptree_functions(pscreen); - - /* 3D object */ - switch (dev->chipset & 0xf0) { - case 0x40: - if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) - eng3d_class = NV40TCL; - else - if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - eng3d_class = NV44TCL; - break; - case 0x60: - if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - eng3d_class = NV44TCL; - break; - } - - if (!eng3d_class) { - NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv40_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Query objects */ - ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - nouveau_resource_init(&screen->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, 512) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Static eng3d initialisation */ - so = so_new(16, 25, 0); - so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1); - so_data (so, screen->sync->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle); - so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - - so_method(so, screen->eng3d, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); - - /* vtxprog output routing */ - so_method(so, screen->eng3d, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, screen->eng3d, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, screen->eng3d, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, screen->eng3d, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, screen->eng3d, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, screen->eng3d, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, screen->eng3d, 0x1e94, 1); - so_data (so, 0x00000001); - - so_emit(chan, so); - so_ref(NULL, &so); - nouveau_pushbuf_flush(chan, 0); - - return pscreen; -} - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index a665c7e2631..e2c24ebc3c4 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ nvfx_fragprog.c \ nvfx_miptree.c \ nvfx_query.c \ + nvfx_screen.c \ nvfx_state.c \ nvfx_state_blend.c \ nvfx_state_emit.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c new file mode 100644 index 00000000000..6132cf94e2c --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -0,0 +1,436 @@ +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "nouveau/nouveau_screen.h" + +#include "nv30/nv30_context.h" +#include "nv40/nv40_context.h" +#include "nvfx_screen.h" + +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + +/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h +* to get the pointer to the context front buffer, so I copied nouveau_winsys here. +* nv30_screen_surface_format_supported() can then use it to enforce creating fbo +* with same number of bits everywhere. +*/ +struct nouveau_winsys { + struct pipe_winsys base; + + struct pipe_screen *pscreen; + + struct pipe_surface *front; +}; +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static int +nvfx_screen_get_param(struct pipe_screen *pscreen, int param) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + /* TODO: check this */ + return screen->is_nv4x ? 16 : 8; + case PIPE_CAP_NPOT_TEXTURES: + return !!screen->is_nv4x; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return screen->is_nv4x ? 4 : 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return !!screen->is_nv4x; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; /* We have 4 on nv40 - but unsupported currently */ + case PIPE_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return !!screen->is_nv4x; + case NOUVEAU_CAP_HW_VTXBUF: + /* TODO: this is almost surely wrong */ + return !!screen->is_nv4x; + case NOUVEAU_CAP_HW_IDXBUF: + /* TODO: this is also almost surely wrong */ + return screen->is_nv4x && screen->eng3d->grclass == NV40TCL; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16; + case PIPE_CAP_INDEP_BLEND_ENABLE: + /* TODO: on nv40 we have separate color masks */ + /* TODO: nv40 mrt blending is probably broken */ + return 0; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return screen->is_nv4x ? 16.0 : 8.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return screen->is_nv4x ? 16.0 : 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; + + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + return TRUE; + default: + break; + } + } else + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + switch (format) { + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return TRUE; + case PIPE_FORMAT_Z16_UNORM: + /* TODO: this nv30 limitation probably does not exist */ + if (!screen->is_nv4x && front) + return (front->format == PIPE_FORMAT_B5G6R5_UNORM); + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + /* TODO: does nv30 support this? */ + case PIPE_FORMAT_R16_SNORM: + return !!screen->is_nv4x; + default: + break; + } + } + + return FALSE; +} + +static struct pipe_buffer * +nvfx_surface_buffer(struct pipe_surface *surf) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; + + return mt->buffer; +} + +static void +nvfx_screen_destroy(struct pipe_screen *pscreen) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + unsigned i; + + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } + + nouveau_resource_destroy(&screen->vp_exec_heap); + nouveau_resource_destroy(&screen->vp_data_heap); + nouveau_resource_destroy(&screen->query_heap); + nouveau_notifier_free(&screen->query); + nouveau_notifier_free(&screen->sync); + nouveau_grobj_free(&screen->eng3d); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); + + FREE(pscreen); +} + +static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +{ + screen->base.base.context_create = nv30_create; + int i; + + /* TODO: perhaps we should do some of this on nv40 too? */ + for (i=1; i<8; i++) { + so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + so_data (so, 0); + so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + so_data (so, 0); + } + + so_method(so, screen->eng3d, 0x220, 1); + so_data (so, 1); + + so_method(so, screen->eng3d, 0x03b0, 1); + so_data (so, 0x00100000); + so_method(so, screen->eng3d, 0x1454, 1); + so_data (so, 0); + so_method(so, screen->eng3d, 0x1d80, 1); + so_data (so, 3); + so_method(so, screen->eng3d, 0x1450, 1); + so_data (so, 0x00030004); + + /* NEW */ + so_method(so, screen->eng3d, 0x1e98, 1); + so_data (so, 0); + so_method(so, screen->eng3d, 0x17e0, 3); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_method(so, screen->eng3d, 0x1f80, 16); + for (i=0; i<16; i++) { + so_data (so, (i==8) ? 0x0000ffff : 0); + } + + so_method(so, screen->eng3d, 0x120, 3); + so_data (so, 0); + so_data (so, 1); + so_data (so, 2); + + so_method(so, screen->eng3d, 0x1d88, 1); + so_data (so, 0x00001200); + + so_method(so, screen->eng3d, NV34TCL_RC_ENABLE, 1); + so_data (so, 0); + + so_method(so, screen->eng3d, NV34TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->eng3d, NV34TCL_MULTISAMPLE_CONTROL, 1); + so_data (so, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ + so_method(so, screen->eng3d, 0x1e94, 1); + so_data (so, 0x13); +} + +static void nv40_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +{ + screen->base.base.context_create = nv40_create; + + so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2); + so_data (so, screen->base.channel->vram->handle); + so_data (so, screen->base.channel->vram->handle); + + so_method(so, screen->eng3d, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->eng3d, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->eng3d, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->eng3d, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->eng3d, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->eng3d, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->eng3d, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->eng3d, 0x1e94, 1); + so_data (so, 0x00000001); +} + +struct pipe_screen * +nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +{ + struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); + struct nouveau_channel *chan; + struct pipe_screen *pscreen; + struct nouveau_stateobj *so; + unsigned eng3d_class = 0; + int ret; + + if (!screen) + return NULL; + + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + nvfx_screen_destroy(pscreen); + return NULL; + } + chan = screen->base.channel; + + pscreen->winsys = ws; + pscreen->destroy = nvfx_screen_destroy; + pscreen->get_param = nvfx_screen_get_param; + pscreen->get_paramf = nvfx_screen_get_paramf; + pscreen->is_format_supported = nvfx_screen_surface_format_supported; + + switch (dev->chipset & 0xf0) { + case 0x30: + if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0397; + else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0697; + else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0497; + break; + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV40TCL; + else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV44TCL; + screen->is_nv4x = ~0; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV44TCL; + screen->is_nv4x = ~0; + break; + } + + if (!eng3d_class) { + NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset); + return NULL; + } + + nvfx_screen_init_miptree_functions(pscreen); + + ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(&screen->base); + screen->eng2d->buf = nvfx_surface_buffer; + + /* Notifier for sync purposes */ + ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nvfx_screen_destroy(pscreen); + return NULL; + } + + /* Query objects */ + ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nvfx_screen_destroy(pscreen); + return NULL; + } + + ret = nouveau_resource_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nvfx_screen_destroy(pscreen); + return NULL; + } + + /* Vtxprog resources */ + if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) || + nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { + nvfx_screen_destroy(pscreen); + return NULL; + } + + /* Static eng3d initialisation */ + /* make the so big and don't worry about exact values + since we it will be thrown away immediately after use */ + so = so_new(256, 256, 0); + so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->gart->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1); + so_data (so, chan->vram->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->vram->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->gart->handle); + + so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + + so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->vram->handle); + + if(!screen->is_nv4x) + nv30_screen_init(screen, so); + else + nv40_screen_init(screen, so); + + so_emit(chan, so); + so_ref(NULL, &so); + nouveau_pushbuf_flush(chan, 0); + + return pscreen; +} diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c index 21517b4bb54..716d4bacd3b 100644 --- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c +++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c @@ -86,11 +86,9 @@ nouveau_drm_create_screen(struct drm_api *api, int fd, switch (dev->chipset & 0xf0) { case 0x30: - init = nv30_screen_create; - break; case 0x40: case 0x60: - init = nv40_screen_create; + init = nvfx_screen_create; break; case 0x50: case 0x80: -- cgit v1.2.3 From 6992be543383ba0850bd813153def24ab4e28911 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 14:38:04 +0100 Subject: nv30, nv40: fully unify nv[34]0_context.h Move the remaining content to the common header. --- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_context.h | 14 -------------- src/gallium/drivers/nv30/nv30_fragtex.c | 2 +- src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_context.h | 14 -------------- src/gallium/drivers/nv40/nv40_fragtex.c | 3 +-- src/gallium/drivers/nvfx/nvfx_context.h | 18 ++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_screen.c | 3 +-- src/gallium/drivers/nvfx/nvfx_state_emit.c | 3 +-- 9 files changed, 24 insertions(+), 37 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_context.h delete mode 100644 src/gallium/drivers/nv40/nv40_context.h (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 9ddb331e749..7e3fd83ee0a 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -1,7 +1,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "nv30_context.h" +#include "nvfx_context.h" #include "nvfx_screen.h" static void diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h deleted file mode 100644 index eacbb1753d5..00000000000 --- a/src/gallium/drivers/nv30/nv30_context.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __NV30_CONTEXT_H__ -#define __NV30_CONTEXT_H__ - -#include "nvfx_context.h" - -/* nv30_fragtex.c */ -extern void nv30_fragtex_bind(struct nvfx_context *); -extern struct nvfx_state_entry nv30_state_fragtex; - -/* nvfx_context.c */ -struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv); - -#endif diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 34e7dd54445..ab39dedae6b 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -1,6 +1,6 @@ #include "util/u_format.h" -#include "nv30_context.h" +#include "nvfx_context.h" #include "nouveau/nouveau_util.h" #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index 13bd50dd1dc..5a526423ac4 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -1,7 +1,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "nv40_context.h" +#include "nvfx_context.h" #include "nvfx_screen.h" static void diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h deleted file mode 100644 index 8dc87e426f9..00000000000 --- a/src/gallium/drivers/nv40/nv40_context.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __NV40_CONTEXT_H__ -#define __NV40_CONTEXT_H__ - -#include "nvfx_context.h" - -/* nv40_fragtex.c */ -extern void nv40_fragtex_bind(struct nvfx_context *); -extern struct nvfx_state_entry nv40_state_fragtex; - -/* nvfx_context.c */ -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv); - -#endif diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 0b46a5313bd..29257173b82 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -1,6 +1,5 @@ #include "util/u_format.h" - -#include "nv40_context.h" +#include "nvfx_context.h" #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ { \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 9a4b4631b54..46cc7362eab 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -198,6 +198,14 @@ extern struct nvfx_state_entry nvfx_state_zsa; extern void nvfx_init_query_functions(struct nvfx_context *nvfx); extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); +/* nv30_context.c */ +struct pipe_context * +nv30_create(struct pipe_screen *pscreen, void *priv); + +/* nv40_context.c */ +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, void *priv); + /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); @@ -213,6 +221,16 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); +/* nv30_fragtex.c */ +extern void nv30_init_sampler_functions(struct nvfx_context *nvfx); +extern void nv30_fragtex_bind(struct nvfx_context *); +extern struct nvfx_state_entry nv30_state_fragtex; + +/* nv40_fragtex.c */ +extern void nv40_init_sampler_functions(struct nvfx_context *nvfx); +extern void nv40_fragtex_bind(struct nvfx_context *); +extern struct nvfx_state_entry nv40_state_fragtex; + /* nvfx_state.c */ extern void nvfx_init_state_functions(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 6132cf94e2c..0c906ecb1d1 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -3,8 +3,7 @@ #include "nouveau/nouveau_screen.h" -#include "nv30/nv30_context.h" -#include "nv40/nv40_context.h" +#include "nvfx_context.h" #include "nvfx_screen.h" #define NV30TCL_CHIPSET_3X_MASK 0x00000003 diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index fcbf8310501..9d28b590746 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -1,5 +1,4 @@ -#include "nv30/nv30_context.h" -#include "nv40/nv40_context.h" +#include "nvfx_context.h" #include "nvfx_state.h" #include "draw/draw_context.h" -- cgit v1.2.3 From da5103c3b382ca08368a19a195a24278596db4cb Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 14:40:32 +0100 Subject: nv30, nv40: unify nv[34]0_context.c They are now almost identical, except for nv30 vs nv40 fragtex initialization. --- src/gallium/drivers/nv30/Makefile | 1 - src/gallium/drivers/nv30/nv30_context.c | 90 --------------------------------- src/gallium/drivers/nv40/Makefile | 1 - src/gallium/drivers/nv40/nv40_context.c | 90 --------------------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.c | 90 +++++++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_context.h | 8 +-- src/gallium/drivers/nvfx/nvfx_screen.c | 4 +- 8 files changed, 94 insertions(+), 191 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_context.c delete mode 100644 src/gallium/drivers/nv40/nv40_context.c create mode 100644 src/gallium/drivers/nvfx/nvfx_context.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index ef3ce7eafaa..0a20de9e7a7 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nv30 C_SOURCES = \ - nv30_context.c \ nv30_fragtex.c \ nv30_state.c diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c deleted file mode 100644 index 7e3fd83ee0a..00000000000 --- a/src/gallium/drivers/nv30/nv30_context.c +++ /dev/null @@ -1,90 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" - -#include "nvfx_context.h" -#include "nvfx_screen.h" - -static void -nv30_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, eng3d, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, eng3d, 0x1fd8, 1); - OUT_RING (chan, 1); - } - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv30_destroy(struct pipe_context *pipe) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned i; - - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (nvfx->state.hw[i]) - so_ref(NULL, &nvfx->state.hw[i]); - } - - if (nvfx->draw) - draw_destroy(nvfx->draw); - FREE(nvfx); -} - -struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv) -{ - struct nvfx_screen *screen = nvfx_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nvfx_context *nvfx; - struct nouveau_winsys *nvws = screen->nvws; - - nvfx = CALLOC(1, sizeof(struct nvfx_context)); - if (!nvfx) - return NULL; - nvfx->screen = screen; - - nvfx->nvws = nvws; - - nvfx->pipe.winsys = ws; - nvfx->pipe.screen = pscreen; - nvfx->pipe.priv = priv; - nvfx->pipe.destroy = nv30_destroy; - nvfx->pipe.draw_arrays = nvfx_draw_arrays; - nvfx->pipe.draw_elements = nvfx_draw_elements; - nvfx->pipe.clear = nvfx_clear; - nvfx->pipe.flush = nv30_flush; - - nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - screen->base.channel->user_private = nvfx; - screen->base.channel->flush_notify = nvfx_state_flush_notify; - - nvfx->is_nv4x = screen->is_nv4x; - - nvfx_init_query_functions(nvfx); - nvfx_init_surface_functions(nvfx); - nvfx_init_state_functions(nvfx); - nvfx_init_transfer_functions(nvfx); - - /* Create, configure, and install fallback swtnl path */ - nvfx->draw = draw_create(); - draw_wide_point_threshold(nvfx->draw, 9999999.0); - draw_wide_line_threshold(nvfx->draw, 9999999.0); - draw_enable_line_stipple(nvfx->draw, FALSE); - draw_enable_point_sprites(nvfx->draw, FALSE); - draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); - - return &nvfx->pipe; -} diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index a37446d50ce..6e764512cf2 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nv40 C_SOURCES = \ - nv40_context.c \ nv40_fragtex.c \ nv40_state.c diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c deleted file mode 100644 index 5a526423ac4..00000000000 --- a/src/gallium/drivers/nv40/nv40_context.c +++ /dev/null @@ -1,90 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" - -#include "nvfx_context.h" -#include "nvfx_screen.h" - -static void -nv40_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, eng3d, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, eng3d, 0x1fd8, 1); - OUT_RING (chan, 1); - } - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv40_destroy(struct pipe_context *pipe) -{ - struct nvfx_context *nvfx = nvfx_context(pipe); - unsigned i; - - for (i = 0; i < NVFX_STATE_MAX; i++) { - if (nvfx->state.hw[i]) - so_ref(NULL, &nvfx->state.hw[i]); - } - - if (nvfx->draw) - draw_destroy(nvfx->draw); - FREE(nvfx); -} - -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv) -{ - struct nvfx_screen *screen = nvfx_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nvfx_context *nvfx; - struct nouveau_winsys *nvws = screen->nvws; - - nvfx = CALLOC(1, sizeof(struct nvfx_context)); - if (!nvfx) - return NULL; - nvfx->screen = screen; - - nvfx->nvws = nvws; - - nvfx->pipe.winsys = ws; - nvfx->pipe.priv = priv; - nvfx->pipe.screen = pscreen; - nvfx->pipe.destroy = nv40_destroy; - nvfx->pipe.draw_arrays = nvfx_draw_arrays; - nvfx->pipe.draw_elements = nvfx_draw_elements; - nvfx->pipe.clear = nvfx_clear; - nvfx->pipe.flush = nv40_flush; - - nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - screen->base.channel->user_private = nvfx; - screen->base.channel->flush_notify = nvfx_state_flush_notify; - - nvfx->is_nv4x = screen->is_nv4x; - - nvfx_init_query_functions(nvfx); - nvfx_init_surface_functions(nvfx); - nvfx_init_state_functions(nvfx); - nvfx_init_transfer_functions(nvfx); - - /* Create, configure, and install fallback swtnl path */ - nvfx->draw = draw_create(); - draw_wide_point_threshold(nvfx->draw, 9999999.0); - draw_wide_line_threshold(nvfx->draw, 9999999.0); - draw_enable_line_stipple(nvfx->draw, FALSE); - draw_enable_point_sprites(nvfx->draw, FALSE); - draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); - - return &nvfx->pipe; -} diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index e2c24ebc3c4..aa03a155e32 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = nvfx C_SOURCES = \ + nvfx_context.c \ nvfx_clear.c \ nvfx_draw.c \ nvfx_fragprog.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c new file mode 100644 index 00000000000..fc3cbdb558f --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -0,0 +1,90 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" + +#include "nvfx_context.h" +#include "nvfx_screen.h" + +static void +nvfx_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING (chan, 1); + } + + FIRE_RING(chan); + if (fence) + *fence = NULL; +} + +static void +nvfx_destroy(struct pipe_context *pipe) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned i; + + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (nvfx->state.hw[i]) + so_ref(NULL, &nvfx->state.hw[i]); + } + + if (nvfx->draw) + draw_destroy(nvfx->draw); + FREE(nvfx); +} + +struct pipe_context * +nvfx_create(struct pipe_screen *pscreen, void *priv) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nvfx_context *nvfx; + struct nouveau_winsys *nvws = screen->nvws; + + nvfx = CALLOC(1, sizeof(struct nvfx_context)); + if (!nvfx) + return NULL; + nvfx->screen = screen; + + nvfx->nvws = nvws; + + nvfx->pipe.winsys = ws; + nvfx->pipe.screen = pscreen; + nvfx->pipe.priv = priv; + nvfx->pipe.destroy = nvfx_destroy; + nvfx->pipe.draw_arrays = nvfx_draw_arrays; + nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.clear = nvfx_clear; + nvfx->pipe.flush = nvfx_flush; + + nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; + + screen->base.channel->user_private = nvfx; + screen->base.channel->flush_notify = nvfx_state_flush_notify; + + nvfx->is_nv4x = screen->is_nv4x; + + nvfx_init_query_functions(nvfx); + nvfx_init_surface_functions(nvfx); + nvfx_init_state_functions(nvfx); + nvfx_init_transfer_functions(nvfx); + + /* Create, configure, and install fallback swtnl path */ + nvfx->draw = draw_create(); + draw_wide_point_threshold(nvfx->draw, 9999999.0); + draw_wide_line_threshold(nvfx->draw, 9999999.0); + draw_enable_line_stipple(nvfx->draw, FALSE); + draw_enable_point_sprites(nvfx->draw, FALSE); + draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); + + return &nvfx->pipe; +} diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 46cc7362eab..0bd37a7ae83 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -198,13 +198,9 @@ extern struct nvfx_state_entry nvfx_state_zsa; extern void nvfx_init_query_functions(struct nvfx_context *nvfx); extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); -/* nv30_context.c */ +/* nvfx_context.c */ struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv); - -/* nv40_context.c */ -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv); +nvfx_create(struct pipe_screen *pscreen, void *priv); /* nvfx_clear.c */ extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index 0c906ecb1d1..8138715cc7d 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -208,7 +208,6 @@ nvfx_screen_destroy(struct pipe_screen *pscreen) static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) { - screen->base.base.context_create = nv30_create; int i; /* TODO: perhaps we should do some of this on nv40 too? */ @@ -268,8 +267,6 @@ static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj static void nv40_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) { - screen->base.base.context_create = nv40_create; - so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2); so_data (so, screen->base.channel->vram->handle); so_data (so, screen->base.channel->vram->handle); @@ -325,6 +322,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_param = nvfx_screen_get_param; pscreen->get_paramf = nvfx_screen_get_paramf; pscreen->is_format_supported = nvfx_screen_surface_format_supported; + pscreen->context_create = nvfx_create; switch (dev->chipset & 0xf0) { case 0x30: -- cgit v1.2.3 From 5bb68e5d174afa7a177c5e972fa80bf66e37f6ab Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 15:07:17 +0100 Subject: nv30, nv40: partially non-trivially unify nv[34]0_fragtex.c The bulk files cannot be unified, but the frontend can and allows to share some code and simplify state_emit.c --- src/gallium/drivers/nv30/nv30_fragtex.c | 45 +-------------------------- src/gallium/drivers/nv40/nv40_fragtex.c | 46 +--------------------------- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nvfx_context.h | 7 +++-- src/gallium/drivers/nvfx/nvfx_fragtex.c | 49 ++++++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_state_emit.c | 22 +++++--------- 6 files changed, 64 insertions(+), 106 deletions(-) create mode 100644 src/gallium/drivers/nvfx/nvfx_fragtex.c (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index ab39dedae6b..52f7c52a9b1 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -57,7 +57,7 @@ nv30_fragtex_format(uint pipe_format) } -static struct nouveau_stateobj * +struct nouveau_stateobj * nv30_fragtex_build(struct nvfx_context *nvfx, int unit) { struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; @@ -116,46 +116,3 @@ nv30_fragtex_build(struct nvfx_context *nvfx, int unit) return so; } -static boolean -nv30_fragtex_validate(struct nvfx_context *nvfx) -{ - struct nvfx_fragment_program *fp = nvfx->fragprog; - struct nvfx_state *state = &nvfx->state; - struct nouveau_stateobj *so; - unsigned samplers, unit; - - samplers = state->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = so_new(1, 1, 0); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); - } - - samplers = nvfx->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = nv30_fragtex_build(nvfx, unit); - so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); - } - - nvfx->state.fp_samplers = fp->samplers; - return FALSE; -} - -struct nvfx_state_entry nv30_state_fragtex = { - .validate = nv30_fragtex_validate, - .dirty = { - .pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 29257173b82..76150950259 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -59,7 +59,7 @@ nv40_fragtex_format(uint pipe_format) } -static struct nouveau_stateobj * +struct nouveau_stateobj * nv40_fragtex_build(struct nvfx_context *nvfx, int unit) { struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; @@ -126,47 +126,3 @@ nv40_fragtex_build(struct nvfx_context *nvfx, int unit) return so; } - -static boolean -nv40_fragtex_validate(struct nvfx_context *nvfx) -{ - struct nvfx_fragment_program *fp = nvfx->fragprog; - struct nvfx_state *state = &nvfx->state; - struct nouveau_stateobj *so; - unsigned samplers, unit; - - samplers = state->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = so_new(1, 1, 0); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); - state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); - } - - samplers = nvfx->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = nv40_fragtex_build(nvfx, unit); - so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); - } - - nvfx->state.fp_samplers = fp->samplers; - return FALSE; -} - -struct nvfx_state_entry nv40_state_fragtex = { - .validate = nv40_fragtex_validate, - .dirty = { - .pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG, - .hw = 0 - } -}; - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index aa03a155e32..51fa34cfad2 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -8,6 +8,7 @@ C_SOURCES = \ nvfx_clear.c \ nvfx_draw.c \ nvfx_fragprog.c \ + nvfx_fragtex.c \ nvfx_miptree.c \ nvfx_query.c \ nvfx_screen.c \ diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 0bd37a7ae83..e5389042981 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -184,6 +184,7 @@ struct nvfx_state_entry { extern struct nvfx_state_entry nvfx_state_blend; extern struct nvfx_state_entry nvfx_state_blend_colour; extern struct nvfx_state_entry nvfx_state_fragprog; +extern struct nvfx_state_entry nvfx_state_fragtex; extern struct nvfx_state_entry nvfx_state_framebuffer; extern struct nvfx_state_entry nvfx_state_rasterizer; extern struct nvfx_state_entry nvfx_state_scissor; @@ -220,12 +221,14 @@ extern void nvfx_fragprog_destroy(struct nvfx_context *, /* nv30_fragtex.c */ extern void nv30_init_sampler_functions(struct nvfx_context *nvfx); extern void nv30_fragtex_bind(struct nvfx_context *); -extern struct nvfx_state_entry nv30_state_fragtex; +extern struct nouveau_stateobj * +nv30_fragtex_build(struct nvfx_context *nvfx, int unit); /* nv40_fragtex.c */ extern void nv40_init_sampler_functions(struct nvfx_context *nvfx); extern void nv40_fragtex_bind(struct nvfx_context *); -extern struct nvfx_state_entry nv40_state_fragtex; +extern struct nouveau_stateobj * +nv40_fragtex_build(struct nvfx_context *nvfx, int unit); /* nvfx_state.c */ extern void nvfx_init_state_functions(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c new file mode 100644 index 00000000000..84e4eb10042 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -0,0 +1,49 @@ +#include "nvfx_context.h" + +static boolean +nvfx_fragtex_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + struct nvfx_state *state = &nvfx->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(1, 1, 0); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + } + + samplers = nvfx->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + if(!nvfx->is_nv4x) + so = nv30_fragtex_build(nvfx, unit); + else + so = nv40_fragtex_build(nvfx, unit); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + } + + nvfx->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nvfx_state_entry nvfx_state_fragtex = { + .validate = nvfx_fragtex_validate, + .dirty = { + .pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 9d28b590746..72537388ea4 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -2,14 +2,14 @@ #include "nvfx_state.h" #include "draw/draw_context.h" -#define RENDER_STATES(name, nvxx, vbo) \ -static struct nvfx_state_entry *name##_render_states[] = { \ +#define RENDER_STATES(name, vbo) \ +static struct nvfx_state_entry *name##render_states[] = { \ &nvfx_state_framebuffer, \ &nvfx_state_rasterizer, \ &nvfx_state_scissor, \ &nvfx_state_stipple, \ &nvfx_state_fragprog, \ - &nvxx##_state_fragtex, \ + &nvfx_state_fragtex, \ &nvfx_state_vertprog, \ &nvfx_state_blend, \ &nvfx_state_blend_colour, \ @@ -20,10 +20,8 @@ static struct nvfx_state_entry *name##_render_states[] = { \ NULL \ } -RENDER_STATES(nv30, nv30, vbo); -RENDER_STATES(nv30_swtnl, nv30, vtxfmt); -RENDER_STATES(nv40, nv40, vbo); -RENDER_STATES(nv40_swtnl, nv40, vtxfmt); +RENDER_STATES(, vbo); +RENDER_STATES(swtnl_, vtxfmt); static void nvfx_state_do_validate(struct nvfx_context *nvfx, @@ -126,10 +124,7 @@ nvfx_state_validate(struct nvfx_context *nvfx) nvfx->render_mode = HW; } - if(!nvfx->is_nv4x) - nvfx_state_do_validate(nvfx, nv30_render_states); - else - nvfx_state_do_validate(nvfx, nv40_render_states); + nvfx_state_do_validate(nvfx, render_states); if (nvfx->fallback_swtnl || nvfx->fallback_swrast) return FALSE; @@ -172,10 +167,7 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); } - if(!nvfx->is_nv4x) - nvfx_state_do_validate(nvfx, nv30_swtnl_render_states); - else - nvfx_state_do_validate(nvfx, nv40_swtnl_render_states); + nvfx_state_do_validate(nvfx, swtnl_render_states); if (nvfx->fallback_swrast) { NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast); -- cgit v1.2.3 From 7d210fa05f286eb19398ac2f8c8f631f6f83c859 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 15:08:48 +0100 Subject: nv30, nv40: partially non-trivially unify sampler state in nv[34]0_state.c Many things, like texture wrap modes and min/mag filters are common. Some others, like annisotropy and lod settings, are not. --- src/gallium/drivers/nv30/Makefile | 3 +- src/gallium/drivers/nv30/nv30_fragtex.c | 30 +++++ src/gallium/drivers/nv30/nv30_state.c | 179 ------------------------------ src/gallium/drivers/nv40/Makefile | 3 +- src/gallium/drivers/nv40/nv40_fragtex.c | 46 ++++++++ src/gallium/drivers/nv40/nv40_state.c | 190 -------------------------------- src/gallium/drivers/nvfx/nvfx_context.h | 10 +- src/gallium/drivers/nvfx/nvfx_state.c | 49 +++++--- src/gallium/drivers/nvfx/nvfx_state.h | 8 -- src/gallium/drivers/nvfx/nvfx_tex.h | 133 ++++++++++++++++++++++ 10 files changed, 252 insertions(+), 399 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state.c delete mode 100644 src/gallium/drivers/nv40/nv40_state.c create mode 100644 src/gallium/drivers/nvfx/nvfx_tex.h (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 0a20de9e7a7..70741101d65 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -4,8 +4,7 @@ include $(TOP)/configs/current LIBNAME = nv30 C_SOURCES = \ - nv30_fragtex.c \ - nv30_state.c + nv30_fragtex.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 52f7c52a9b1..081362d1509 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -2,6 +2,36 @@ #include "nvfx_context.h" #include "nouveau/nouveau_util.h" +#include "nvfx_tex.h" + +void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 8) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ { \ diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c deleted file mode 100644 index bf96f7cf353..00000000000 --- a/src/gallium/drivers/nv30/nv30_state.c +++ /dev/null @@ -1,179 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv30_context.h" -#include "nvfx_state.h" - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; -/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; - break;*/ - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nvfx_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nvfx_sampler_state)); - - ps->fmt = 0; - /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format - are the tx format to use. We should store normalized coord flag - in sampler state structure, and set appropriate format in - nvxx_fragtex_build() - */ - /*NV34TCL_TX_FORMAT_RECT*/ - /*if (!cso->normalized_coords) { - ps->fmt |= (1<<14) ; - }*/ - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - - if (cso->max_anisotropy >= 8) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else - if (cso->max_anisotropy >= 2) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; - } - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 6e764512cf2..8b69f9432d1 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -4,8 +4,7 @@ include $(TOP)/configs/current LIBNAME = nv40 C_SOURCES = \ - nv40_fragtex.c \ - nv40_state.c + nv40_fragtex.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 76150950259..5889b5e40d5 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -1,5 +1,51 @@ #include "util/u_format.h" #include "nvfx_context.h" +#include "nvfx_tex.h" + +void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 2) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ { \ diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c deleted file mode 100644 index 4d4098bc42c..00000000000 --- a/src/gallium/drivers/nv40/nv40_state.c +++ /dev/null @@ -1,190 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv40_context.h" -#include "nvfx_state.h" - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; - break; - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nvfx_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nvfx_sampler_state)); - - ps->fmt = 0; - if (!cso->normalized_coords) - ps->fmt |= NV40TCL_TEX_FORMAT_RECT; - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - if (cso->max_anisotropy >= 2) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - - if (cso->max_anisotropy >= 16) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } - - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index e5389042981..5eed8a560e5 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -219,13 +219,19 @@ extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); /* nv30_fragtex.c */ -extern void nv30_init_sampler_functions(struct nvfx_context *nvfx); +extern void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); extern void nv30_fragtex_bind(struct nvfx_context *); extern struct nouveau_stateobj * nv30_fragtex_build(struct nvfx_context *nvfx, int unit); /* nv40_fragtex.c */ -extern void nv40_init_sampler_functions(struct nvfx_context *nvfx); +extern void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); extern void nv40_fragtex_bind(struct nvfx_context *); extern struct nouveau_stateobj * nv40_fragtex_build(struct nvfx_context *nvfx, int unit); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 7e138afc717..88a9d01c509 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -8,6 +8,7 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_tex.h" static void * nvfx_blend_state_create(struct pipe_context *pipe, @@ -82,8 +83,35 @@ nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) FREE(bso); } +static void * +nvfx_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_sampler_state *ps; + + ps = MALLOC(sizeof(struct nvfx_sampler_state)); + + /* on nv30, we use this as an internal flag */ + ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT; + ps->en = 0; + ps->filt = nvfx_tex_filter(cso); + ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) | + nvfx_tex_wrap_compare_mode(cso); + ps->bcol = nvfx_tex_border_color(cso->border_color); + + if(nvfx->is_nv4x) + nv40_sampler_state_init(pipe, ps, cso); + else + nv30_sampler_state_init(pipe, ps, cso); + + return (void *)ps; +} + static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { struct nvfx_context *nvfx = nvfx_context(pipe); unsigned unit; @@ -103,7 +131,7 @@ nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) } static void -nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { FREE(hwcso); } @@ -543,14 +571,6 @@ nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ } -void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso); - -void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso); - void nvfx_init_state_functions(struct nvfx_context *nvfx) { @@ -558,12 +578,9 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; - if(nvfx->is_nv4x) - nvfx->pipe.create_sampler_state = nv40_sampler_state_create; - else - nvfx->pipe.create_sampler_state = nv30_sampler_state_create; - nvfx->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; - nvfx->pipe.delete_sampler_state = nv40_sampler_state_delete; + nvfx->pipe.create_sampler_state = nvfx_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete; nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture; nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index b243b1020fb..e585246879b 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -4,14 +4,6 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -struct nvfx_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - struct nvfx_vertex_program_exec { uint32_t data[4]; boolean has_branch_offset; diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h new file mode 100644 index 00000000000..69187a79e79 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_tex.h @@ -0,0 +1,133 @@ +#ifndef NVFX_TEX_H_ +#define NVFX_TEX_H_ + +static inline unsigned +nvfx_tex_wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static inline unsigned +nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso) +{ + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + return NV34TCL_TX_WRAP_RCOMP_NEVER; + case PIPE_FUNC_GREATER: + return NV34TCL_TX_WRAP_RCOMP_GREATER; + case PIPE_FUNC_EQUAL: + return NV34TCL_TX_WRAP_RCOMP_EQUAL; + case PIPE_FUNC_GEQUAL: + return NV34TCL_TX_WRAP_RCOMP_GEQUAL; + case PIPE_FUNC_LESS: + return NV34TCL_TX_WRAP_RCOMP_LESS; + case PIPE_FUNC_NOTEQUAL: + return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + case PIPE_FUNC_LEQUAL: + return NV34TCL_TX_WRAP_RCOMP_LEQUAL; + case PIPE_FUNC_ALWAYS: + return NV34TCL_TX_WRAP_RCOMP_ALWAYS; + default: + break; + } + } + return 0; +} + +static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) +{ + unsigned filter = 0; + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + return filter; +} + +static inline unsigned nvfx_tex_border_color(const float* border_color) +{ + return ((float_to_ubyte(border_color[3]) << 24) | + (float_to_ubyte(border_color[0]) << 16) | + (float_to_ubyte(border_color[1]) << 8) | + (float_to_ubyte(border_color[2]) << 0)); +} + +struct nvfx_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +#endif /* NVFX_TEX_H_ */ -- cgit v1.2.3 From f9d09a2e7859a2cf025d71b7c3cb189edb6688c4 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 15:13:12 +0100 Subject: nv30, nv40: move last files to nvfx/ and rm -rf nv30 nv40 This is the last nvfx unification patch. nv[34]0_fragtex.c are moved to the common directory nv[34]0_shader.h are renamed to nv[34]0_vertprog.h and moved to the common directory The separate nv30 and nv40 directories are removed from the build system --- configure.ac | 2 +- src/gallium/drivers/nv30/Makefile | 11 -- src/gallium/drivers/nv30/nv30_fragtex.c | 148 ---------------------- src/gallium/drivers/nv30/nv30_shader.h | 169 ------------------------- src/gallium/drivers/nv40/Makefile | 11 -- src/gallium/drivers/nv40/nv40_fragtex.c | 174 -------------------------- src/gallium/drivers/nv40/nv40_shader.h | 178 --------------------------- src/gallium/drivers/nvfx/Makefile | 2 + src/gallium/drivers/nvfx/nv30_fragtex.c | 147 ++++++++++++++++++++++ src/gallium/drivers/nvfx/nv30_vertprog.h | 169 +++++++++++++++++++++++++ src/gallium/drivers/nvfx/nv40_fragtex.c | 174 ++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nv40_vertprog.h | 177 ++++++++++++++++++++++++++ src/gallium/drivers/nvfx/nvfx_draw.c | 4 +- src/gallium/drivers/nvfx/nvfx_vertprog.c | 4 +- src/gallium/winsys/drm/nouveau/dri/Makefile | 8 -- src/gallium/winsys/drm/nouveau/egl/Makefile | 10 +- src/gallium/winsys/drm/nouveau/xorg/Makefile | 10 +- 17 files changed, 676 insertions(+), 722 deletions(-) delete mode 100644 src/gallium/drivers/nv30/Makefile delete mode 100644 src/gallium/drivers/nv30/nv30_fragtex.c delete mode 100644 src/gallium/drivers/nv30/nv30_shader.h delete mode 100644 src/gallium/drivers/nv40/Makefile delete mode 100644 src/gallium/drivers/nv40/nv40_fragtex.c delete mode 100644 src/gallium/drivers/nv40/nv40_shader.h create mode 100644 src/gallium/drivers/nvfx/nv30_fragtex.c create mode 100644 src/gallium/drivers/nvfx/nv30_vertprog.h create mode 100644 src/gallium/drivers/nvfx/nv40_fragtex.c create mode 100644 src/gallium/drivers/nvfx/nv40_vertprog.h (limited to 'src/gallium/drivers/nvfx') diff --git a/configure.ac b/configure.ac index eb271e9d515..0f51097ef62 100644 --- a/configure.ac +++ b/configure.ac @@ -1360,7 +1360,7 @@ AC_ARG_ENABLE([gallium-nouveau], [enable_gallium_nouveau=no]) if test "x$enable_gallium_nouveau" = xyes; then GALLIUM_WINSYS_DRM_DIRS="$GALLIUM_WINSYS_DRM_DIRS nouveau" - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv30 nv40 nv50" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50" fi dnl diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile deleted file mode 100644 index 70741101d65..00000000000 --- a/src/gallium/drivers/nv30/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv30 - -C_SOURCES = \ - nv30_fragtex.c - -LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c deleted file mode 100644 index 081362d1509..00000000000 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ /dev/null @@ -1,148 +0,0 @@ -#include "util/u_format.h" - -#include "nvfx_context.h" -#include "nouveau/nouveau_util.h" -#include "nvfx_tex.h" - -void -nv30_sampler_state_init(struct pipe_context *pipe, - struct nvfx_sampler_state *ps, - const struct pipe_sampler_state *cso) -{ - if (cso->max_anisotropy >= 8) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else - if (cso->max_anisotropy >= 2) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - } - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; - } -} - -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV34TCL_TX_FORMAT_FORMAT_##tf, \ - (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ - NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ - NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ - NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \ -} - -struct nv30_texture_format { - boolean defined; - uint pipe; - int format; - int swizzle; -}; - -static struct nv30_texture_format -nv30_texture_formats[] = { - _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), - _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), - _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), - _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), - _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), - _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), - _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), - _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), - _(S8Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), - _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), - _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), - _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), - {}, -}; - -static struct nv30_texture_format * -nv30_fragtex_format(uint pipe_format) -{ - struct nv30_texture_format *tf = nv30_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format)); - return NULL; -} - - -struct nouveau_stateobj * -nv30_fragtex_build(struct nvfx_context *nvfx, int unit) -{ - struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; - struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit]; - struct pipe_texture *pt = &nv30mt->base; - struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); - struct nv30_texture_format *tf; - struct nouveau_stateobj *so; - uint32_t txf, txs; - unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - tf = nv30_fragtex_format(pt->format); - if (!tf) - return NULL; - - txf = tf->format; - txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); - txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; - txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; - txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; - txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV34TCL_TX_FORMAT_CUBIC; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= NV34TCL_TX_FORMAT_DIMS_2D; - break; - case PIPE_TEXTURE_3D: - txf |= NV34TCL_TX_FORMAT_DIMS_3D; - break; - case PIPE_TEXTURE_1D: - txf |= NV34TCL_TX_FORMAT_DIMS_1D; - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return NULL; - } - - txs = tf->swizzle; - - so = so_new(1, 8, 2); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); - so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); - so_data (so, ps->wrap); - so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); - so_data (so, txs); - so_data (so, ps->filt | 0x2000 /*voodoo*/); - so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height0); - so_data (so, ps->bcol); - - return so; -} - diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h deleted file mode 100644 index f19efb5aa4d..00000000000 --- a/src/gallium/drivers/nv30/nv30_shader.h +++ /dev/null @@ -1,169 +0,0 @@ -#ifndef __NV30_SHADER_H__ -#define __NV30_SHADER_H__ - -/* Vertex programs instruction set - * - * 128bit opcodes, split into 4 32-bit ones for ease of use. - * - * Non-native instructions - * ABS - MOV + NV40_VP_INST0_DEST_ABS - * POW - EX2 + MUL + LG2 - * SUB - ADD, second source negated - * SWZ - MOV - * XPD - - * - * Register access - * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) - * - Only one CONST can be accessed per-instruction (move extras into TEMPs) - * - * Relative Addressing - * According to the value returned for - * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB - * - * there are only two address registers available. The destination in the - * ARL instruction is set to TEMP (The temp isn't actually written). - * - * When using vanilla ARB_v_p, the proprietary driver will squish both the - * available ADDRESS regs into the first hardware reg in the X and Y - * components. - * - * To use an address reg as an index into consts, the CONST_SRC is set to - * (const_base + offset) and INDEX_CONST is set. - * - * To access the second address reg use ADDR_REG_SELECT_1. A particular - * component of the address regs is selected with ADDR_SWZ. - * - * Only one address register can be accessed per instruction. - * - * Conditional execution (see NV_vertex_program{2,3} for details) Conditional - * execution of an instruction is enabled by setting COND_TEST_ENABLE, and - * selecting the condition which will allow the test to pass with - * COND_{FL,LT,...}. It is possible to swizzle the values in the condition - * register, which allows for testing against an individual component. - * - * Branching: - * - * The BRA/CAL instructions seem to follow a slightly different opcode - * layout. The destination instruction ID (IADDR) overlaps a source field. - * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO - * command, and is incremented automatically on each UPLOAD_INST FIFO - * command. - * - * Conditional branching is achieved by using the condition tests described - * above. There doesn't appear to be dedicated looping instructions, but - * this can be done using a temp reg + conditional branching. - * - * Subroutines may be uploaded before the main program itself, but the first - * executed instruction is determined by the PROGRAM_START_ID FIFO command. - * - */ - -/* DWORD 0 */ - -#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ -#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ -#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ -#define NV30_VP_INST_VEC_RESULT (1 << 20) -#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 -#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) -#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) -#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) -#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) -#define NV30_VP_INST_COND_SHIFT 11 -#define NV30_VP_INST_COND_MASK (0x07 << 11) -#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 -#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) -#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 -#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) -#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 -#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) -#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) -#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) -#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 -#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) -#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 -#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) - -/* DWORD 1 */ -#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 -#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) -#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 -#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) -#define NV30_VP_INST_CONST_SRC_SHIFT 14 -#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) -#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ -#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ -#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ - -/* Please note: the IADDR fields overlap other fields because they are used - * only for branch instructions. See Branching: label above - * - * DWORD 2 - */ -#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ -#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ -#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ -#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ -#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ -#define NV30_VP_INST_IADDR_SHIFT 2 -#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ - -/* DWORD 3 */ -#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ -#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ -#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 -#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) -#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 -#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) -#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 -#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) -#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ -#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ -#define NV30_VP_INST_DEST_SHIFT 2 -#define NV30_VP_INST_DEST_MASK (0x0F << 2) -# define NV30_VP_INST_DEST_POS 0 -# define NV30_VP_INST_DEST_BFC0 1 -# define NV30_VP_INST_DEST_BFC1 2 -# define NV30_VP_INST_DEST_COL0 3 -# define NV30_VP_INST_DEST_COL1 4 -# define NV30_VP_INST_DEST_FOGC 5 -# define NV30_VP_INST_DEST_PSZ 6 -# define NV30_VP_INST_DEST_TC(n) (8+n) - -/* Useful to split the source selection regs into their pieces */ -#define NV30_VP_SRC0_HIGH_SHIFT 6 -#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 -#define NV30_VP_SRC0_LOW_MASK 0x0000003F -#define NV30_VP_SRC2_HIGH_SHIFT 4 -#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 -#define NV30_VP_SRC2_LOW_MASK 0x0000000F - - -/* Source-register definition - matches NV20 exactly */ -#define NV30_VP_SRC_NEGATE (1<<14) -#define NV30_VP_SRC_SWZ_X_SHIFT 12 -#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) -#define NV30_VP_SRC_SWZ_Y_SHIFT 10 -#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) -#define NV30_VP_SRC_SWZ_Z_SHIFT 8 -#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) -#define NV30_VP_SRC_SWZ_W_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) -#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) -#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) -#define NV30_VP_SRC_REG_TYPE_SHIFT 0 -#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) -#define NV30_VP_SRC_REG_TYPE_TEMP 1 -#define NV30_VP_SRC_REG_TYPE_INPUT 2 -#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ - -#include "nvfx_shader.h" - -#endif diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile deleted file mode 100644 index 8b69f9432d1..00000000000 --- a/src/gallium/drivers/nv40/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv40 - -C_SOURCES = \ - nv40_fragtex.c - -LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c deleted file mode 100644 index 5889b5e40d5..00000000000 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ /dev/null @@ -1,174 +0,0 @@ -#include "util/u_format.h" -#include "nvfx_context.h" -#include "nvfx_tex.h" - -void -nv40_sampler_state_init(struct pipe_context *pipe, - struct nvfx_sampler_state *ps, - const struct pipe_sampler_state *cso) -{ - if (cso->max_anisotropy >= 2) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - - if (cso->max_anisotropy >= 16) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } - } - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } -} - -#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ -{ \ - TRUE, \ - PIPE_FORMAT_##m, \ - NV40TCL_TEX_FORMAT_FORMAT_##tf, \ - (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ - NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ - NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ - NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ - ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \ - (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \ -} - -struct nv40_texture_format { - boolean defined; - uint pipe; - int format; - int swizzle; - int sign; -}; - -static struct nv40_texture_format -nv40_texture_formats[] = { - _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), - _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), - _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), - _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), - _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(S8Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - {}, -}; - -static struct nv40_texture_format * -nv40_fragtex_format(uint pipe_format) -{ - struct nv40_texture_format *tf = nv40_texture_formats; - - while (tf->defined) { - if (tf->pipe == pipe_format) - return tf; - tf++; - } - - NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format)); - return NULL; -} - - -struct nouveau_stateobj * -nv40_fragtex_build(struct nvfx_context *nvfx, int unit) -{ - struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; - struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit]; - struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer); - struct pipe_texture *pt = &nv40mt->base; - struct nv40_texture_format *tf; - struct nouveau_stateobj *so; - uint32_t txf, txs, txp; - unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - tf = nv40_fragtex_format(pt->format); - if (!tf) - assert(0); - - txf = ps->fmt; - txf |= tf->format | 0x8000; - txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); - - if (1) /* XXX */ - txf |= NV34TCL_TX_FORMAT_NO_BORDER; - - switch (pt->target) { - case PIPE_TEXTURE_CUBE: - txf |= NV34TCL_TX_FORMAT_CUBIC; - /* fall-through */ - case PIPE_TEXTURE_2D: - txf |= NV34TCL_TX_FORMAT_DIMS_2D; - break; - case PIPE_TEXTURE_3D: - txf |= NV34TCL_TX_FORMAT_DIMS_3D; - break; - case PIPE_TEXTURE_1D: - txf |= NV34TCL_TX_FORMAT_DIMS_1D; - break; - default: - NOUVEAU_ERR("Unknown target %d\n", pt->target); - return NULL; - } - - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - txp = 0; - } else { - txp = nv40mt->level[0].pitch; - txf |= NV40TCL_TEX_FORMAT_LINEAR; - } - - txs = tf->swizzle; - - so = so_new(2, 9, 2); - so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); - so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); - so_data (so, ps->wrap); - so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); - so_data (so, txs); - so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | - pt->height0); - so_data (so, ps->bcol); - so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1); - so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); - - return so; -} diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h deleted file mode 100644 index 8d28137e9de..00000000000 --- a/src/gallium/drivers/nv40/nv40_shader.h +++ /dev/null @@ -1,178 +0,0 @@ -#ifndef __NV40_SHADER_H__ -#define __NV40_SHADER_H__ - -/* Vertex programs instruction set - * - * The NV40 instruction set is very similar to NV30. Most fields are in - * a slightly different position in the instruction however. - * - * Merged instructions - * In some cases it is possible to put two instructions into one opcode - * slot. The rules for when this is OK is not entirely clear to me yet. - * - * There are separate writemasks and dest temp register fields for each - * grouping of instructions. There is however only one field with the - * ID of a result register. Writing to temp/result regs is selected by - * setting VEC_RESULT/SCA_RESULT. - * - * Temporary registers - * The source/dest temp register fields have been extended by 1 bit, to - * give a total of 32 temporary registers. - * - * Relative Addressing - * NV40 can use an address register to index into vertex attribute regs. - * This is done by putting the offset value into INPUT_SRC and setting - * the INDEX_INPUT flag. - * - * Conditional execution (see NV_vertex_program{2,3} for details) - * There is a second condition code register on NV40, it's use is enabled - * by setting the COND_REG_SELECT_1 flag. - * - * Texture lookup - * TODO - */ - -/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ -#define NV40_VP_INST_VEC_RESULT (1 << 30) -/* uncertain.. */ -#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) -/* use address reg as index into attribs */ -#define NV40_VP_INST_INDEX_INPUT (1 << 27) -#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) -#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV40_VP_INST_SRC2_ABS (1 << 23) -#define NV40_VP_INST_SRC1_ABS (1 << 22) -#define NV40_VP_INST_SRC0_ABS (1 << 21) -#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 -#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) -#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) -#define NV40_VP_INST_COND_SHIFT 10 -#define NV40_VP_INST_COND_MASK (0x7 << 10) -#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 -#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) -#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 -#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) -#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 -#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) -#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) -#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) -#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 -#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) -#define NV40_VP_INST0_KNOWN ( \ - NV40_VP_INST_INDEX_INPUT | \ - NV40_VP_INST_COND_REG_SELECT_1 | \ - NV40_VP_INST_ADDR_REG_SELECT_1 | \ - NV40_VP_INST_SRC2_ABS | \ - NV40_VP_INST_SRC1_ABS | \ - NV40_VP_INST_SRC0_ABS | \ - NV40_VP_INST_VEC_DEST_TEMP_MASK | \ - NV40_VP_INST_COND_TEST_ENABLE | \ - NV40_VP_INST_COND_MASK | \ - NV40_VP_INST_COND_SWZ_ALL_MASK | \ - NV40_VP_INST_ADDR_SWZ_MASK) - -/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ -#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 -#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) -#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 -#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) -#define NV40_VP_INST_CONST_SRC_SHIFT 12 -#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) -#define NV40_VP_INST_INPUT_SRC_SHIFT 8 -#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) -#define NV40_VP_INST_SRC0H_SHIFT 0 -#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) -#define NV40_VP_INST1_KNOWN ( \ - NV40_VP_INST_VEC_OPCODE_MASK | \ - NV40_VP_INST_SCA_OPCODE_MASK | \ - NV40_VP_INST_CONST_SRC_MASK | \ - NV40_VP_INST_INPUT_SRC_MASK | \ - NV40_VP_INST_SRC0H_MASK \ - ) - -/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ -#define NV40_VP_INST_SRC0L_SHIFT 23 -#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) -#define NV40_VP_INST_SRC1_SHIFT 6 -#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) -#define NV40_VP_INST_SRC2H_SHIFT 0 -#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) -#define NV40_VP_INST_IADDRH_SHIFT 0 -#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) - -/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ -#define NV40_VP_INST_IADDRL_SHIFT 29 -#define NV40_VP_INST_IADDRL_MASK (7 << 29) -#define NV40_VP_INST_SRC2L_SHIFT 21 -#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) -#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 -#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) -# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) -# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) -# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) -# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) -#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 -#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) -# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) -# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) -# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) -# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) -#define NV40_VP_INST_SCA_RESULT (1 << 12) -#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 -#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) -#define NV40_VP_INST_DEST_SHIFT 2 -#define NV40_VP_INST_DEST_MASK (31 << 2) -# define NV40_VP_INST_DEST_POS 0 -# define NV40_VP_INST_DEST_COL0 1 -# define NV40_VP_INST_DEST_COL1 2 -# define NV40_VP_INST_DEST_BFC0 3 -# define NV40_VP_INST_DEST_BFC1 4 -# define NV40_VP_INST_DEST_FOGC 5 -# define NV40_VP_INST_DEST_PSZ 6 -# define NV40_VP_INST_DEST_TC0 7 -# define NV40_VP_INST_DEST_TC(n) (7+n) -# define NV40_VP_INST_DEST_TEMP 0x1F -#define NV40_VP_INST_INDEX_CONST (1 << 1) -#define NV40_VP_INST3_KNOWN ( \ - NV40_VP_INST_SRC2L_MASK |\ - NV40_VP_INST_SCA_WRITEMASK_MASK |\ - NV40_VP_INST_VEC_WRITEMASK_MASK |\ - NV40_VP_INST_SCA_DEST_TEMP_MASK |\ - NV40_VP_INST_DEST_MASK |\ - NV40_VP_INST_INDEX_CONST) - -/* Useful to split the source selection regs into their pieces */ -#define NV40_VP_SRC0_HIGH_SHIFT 9 -#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 -#define NV40_VP_SRC0_LOW_MASK 0x000001FF -#define NV40_VP_SRC2_HIGH_SHIFT 11 -#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 -#define NV40_VP_SRC2_LOW_MASK 0x000007FF - -/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ -#define NV40_VP_SRC_NEGATE (1 << 16) -#define NV40_VP_SRC_SWZ_X_SHIFT 14 -#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) -#define NV40_VP_SRC_SWZ_Y_SHIFT 12 -#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) -#define NV40_VP_SRC_SWZ_Z_SHIFT 10 -#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) -#define NV40_VP_SRC_SWZ_W_SHIFT 8 -#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) -#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 -#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) -#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) -#define NV40_VP_SRC_REG_TYPE_SHIFT 0 -#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) -# define NV40_VP_SRC_REG_TYPE_UNK0 0 -# define NV40_VP_SRC_REG_TYPE_TEMP 1 -# define NV40_VP_SRC_REG_TYPE_INPUT 2 -# define NV40_VP_SRC_REG_TYPE_CONST 3 - -#include "nvfx_shader.h" - -#endif - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 51fa34cfad2..e912177b211 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -9,6 +9,8 @@ C_SOURCES = \ nvfx_draw.c \ nvfx_fragprog.c \ nvfx_fragtex.c \ + nv30_fragtex.c \ + nv40_fragtex.c \ nvfx_miptree.c \ nvfx_query.c \ nvfx_screen.c \ diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c new file mode 100644 index 00000000000..2b56f454921 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -0,0 +1,147 @@ +#include "util/u_format.h" + +#include "nvfx_context.h" +#include "nouveau/nouveau_util.h" +#include "nvfx_tex.h" + +void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 8) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } +} + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV34TCL_TX_FORMAT_FORMAT_##tf, \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \ +} + +struct nv30_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; +}; + +static struct nv30_texture_format +nv30_texture_formats[] = { + _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), + _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), + _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), + _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), + _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), + _(S8Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), + {}, +}; + +static struct nv30_texture_format * +nv30_fragtex_format(uint pipe_format) +{ + struct nv30_texture_format *tf = nv30_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format)); + return NULL; +} + + +struct nouveau_stateobj * +nv30_fragtex_build(struct nvfx_context *nvfx, int unit) +{ + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit]; + struct pipe_texture *pt = &nv30mt->base; + struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); + struct nv30_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv30_fragtex_format(pt->format); + if (!tf) + return NULL; + + txf = tf->format; + txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); + txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT; + txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT; + txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT; + txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV34TCL_TX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV34TCL_TX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV34TCL_TX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV34TCL_TX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + txs = tf->swizzle; + + so = so_new(1, 8, 2); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); + so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | 0x2000 /*voodoo*/); + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height0); + so_data (so, ps->bcol); + + return so; +} diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h new file mode 100644 index 00000000000..ec0444c07f8 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv30_vertprog.h @@ -0,0 +1,169 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +#include "nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c new file mode 100644 index 00000000000..5889b5e40d5 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -0,0 +1,174 @@ +#include "util/u_format.h" +#include "nvfx_context.h" +#include "nvfx_tex.h" + +void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 2) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } +} + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV40TCL_TEX_FORMAT_FORMAT_##tf, \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ + ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \ + (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \ +} + +struct nv40_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; + int sign; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { + _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), + _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), + _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), + _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(S8Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + {}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ + struct nv40_texture_format *tf = nv40_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format)); + return NULL; +} + + +struct nouveau_stateobj * +nv40_fragtex_build(struct nvfx_context *nvfx, int unit) +{ + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit]; + struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer); + struct pipe_texture *pt = &nv40mt->base; + struct nv40_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs, txp; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv40_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = ps->fmt; + txf |= tf->format | 0x8000; + txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + + if (1) /* XXX */ + txf |= NV34TCL_TX_FORMAT_NO_BORDER; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV34TCL_TX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV34TCL_TX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV34TCL_TX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV34TCL_TX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + txp = 0; + } else { + txp = nv40mt->level[0].pitch; + txf |= NV40TCL_TEX_FORMAT_LINEAR; + } + + txs = tf->swizzle; + + so = so_new(2, 9, 2); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); + so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height0); + so_data (so, ps->bcol); + so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1); + so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + + return so; +} diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h new file mode 100644 index 00000000000..7337293babc --- /dev/null +++ b/src/gallium/drivers/nvfx/nv40_vertprog.h @@ -0,0 +1,177 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + +#include "nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 8700e143297..7308f0667c7 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -9,8 +9,8 @@ #include "nvfx_context.h" #define NVFX_SHADER_NO_FUCKEDNESS -#include "nv30/nv30_shader.h" -#include "nv40/nv40_shader.h" +#include "nv30_vertprog.h" +#include "nv40_vertprog.h" /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very * often at all. Uses "quadro style" vertex submission + a fixed vertex diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 730361a9827..3d0e8c23a15 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -33,8 +33,8 @@ #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) #define DEF_SCALE 0 #define DEF_CTEST 0 -#include "nv30/nv30_shader.h" -#include "nv40/nv40_shader.h" +#include "nv30_vertprog.h" +#include "nv40_vertprog.h" #define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) #define neg(s) nvfx_sr_neg((s)) diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile index 0cc60395ffe..50ac3f203e5 100644 --- a/src/gallium/winsys/drm/nouveau/dri/Makefile +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -3,18 +3,10 @@ include $(TOP)/configs/current LIBNAME = nouveau_dri.so -# hideous hack --Wl,--start-group: --Wl,--end-group: - PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/libdridrm.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - -Wl,--start-group \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ - -Wl,--end-group \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a diff --git a/src/gallium/winsys/drm/nouveau/egl/Makefile b/src/gallium/winsys/drm/nouveau/egl/Makefile index 0f5e6d64aa4..47d11276155 100644 --- a/src/gallium/winsys/drm/nouveau/egl/Makefile +++ b/src/gallium/winsys/drm/nouveau/egl/Makefile @@ -5,17 +5,9 @@ EGL_DRIVER_NAME = nouveau EGL_DRIVER_SOURCES = dummy.c EGL_DRIVER_LIBS = -ldrm_nouveau -# hideous hack --Wl,--start-group: --Wl,--end-group: - EGL_DRIVER_PIPES = \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - -Wl,--start-group \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ - $(TOP)/src/gallium/drivers/nv40/libnvfx.a \ - -Wl,--end-group \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a diff --git a/src/gallium/winsys/drm/nouveau/xorg/Makefile b/src/gallium/winsys/drm/nouveau/xorg/Makefile index 0607d82a6e4..f7f6fe17dd6 100644 --- a/src/gallium/winsys/drm/nouveau/xorg/Makefile +++ b/src/gallium/winsys/drm/nouveau/xorg/Makefile @@ -15,18 +15,10 @@ INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/src/egl/main -# hideous hack --Wl,--start-group: --Wl,--end-group: - LIBS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \ - --Wl,--start-group \ - $(TOP)/src/gallium/drivers/nv30/libnv30.a \ - $(TOP)/src/gallium/drivers/nv40/libnv40.a \ - $(TOP)/src/gallium/drivers/nv40/libnvfx.a \ - --Wl,--end-group \ + $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a \ $(GALLIUM_AUXILIARIES) -- cgit v1.2.3 From 0192a4a825d3d04b1588bdabad629a9888f325d6 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Tue, 2 Mar 2010 22:51:39 +0100 Subject: nvfx: fix viewport state after bypass removal for swtnl The adjustment of nv30/nv40 after the removal of bypass incorrectly removed the hardware viewport bypass code, which we still need for swtnl and also forgot to remove NVFX_NEW_RAST from pipe. --- src/gallium/drivers/nvfx/nvfx_state_viewport.c | 35 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c index 72057a80f84..82e0e9220b0 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_viewport.c +++ b/src/gallium/drivers/nvfx/nvfx_state_viewport.c @@ -13,16 +13,29 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) so = so_new(2, 9, 0); so_method(so, nvfx->screen->eng3d, NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - so_method(so, nvfx->screen->eng3d, 0x1d78, 1); - so_data (so, 1); + if(nvfx->render_mode == HW) { + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, 1); + } else { + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, nvfx->is_nv4x ? 0x110 : 1); + } so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); so_ref(NULL, &so); @@ -32,7 +45,7 @@ nvfx_state_viewport_validate(struct nvfx_context *nvfx) struct nvfx_state_entry nvfx_state_viewport = { .validate = nvfx_state_viewport_validate, .dirty = { - .pipe = NVFX_NEW_VIEWPORT | NVFX_NEW_RAST, + .pipe = NVFX_NEW_VIEWPORT, .hw = NVFX_STATE_VIEWPORT } }; -- cgit v1.2.3 From dbe63ed3b688fc1b2714d1418a031210c9e00d3b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 22 Feb 2010 20:14:32 +0100 Subject: nvfx: move nv04_surface_2d.c into nvfx directory It is only used on pre-nv50 and nvfx is the only Gallium pre-nv50 driver. --- src/gallium/drivers/nouveau/Makefile | 3 +- src/gallium/drivers/nouveau/nv04_surface_2d.c | 546 -------------------------- src/gallium/drivers/nouveau/nv04_surface_2d.h | 37 -- src/gallium/drivers/nvfx/Makefile | 1 + src/gallium/drivers/nvfx/nv04_surface_2d.c | 545 +++++++++++++++++++++++++ src/gallium/drivers/nvfx/nv04_surface_2d.h | 37 ++ src/gallium/drivers/nvfx/nvfx_miptree.c | 2 +- src/gallium/drivers/nvfx/nvfx_screen.h | 2 +- 8 files changed, 586 insertions(+), 587 deletions(-) delete mode 100644 src/gallium/drivers/nouveau/nv04_surface_2d.c delete mode 100644 src/gallium/drivers/nouveau/nv04_surface_2d.h create mode 100644 src/gallium/drivers/nvfx/nv04_surface_2d.c create mode 100644 src/gallium/drivers/nvfx/nv04_surface_2d.h (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 0e02680bc63..0cb66041d50 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nouveau C_SOURCES = nouveau_screen.c \ - nouveau_context.c \ - nv04_surface_2d.c + nouveau_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.c b/src/gallium/drivers/nouveau/nv04_surface_2d.c deleted file mode 100644 index 93114465d5e..00000000000 --- a/src/gallium/drivers/nouveau/nv04_surface_2d.c +++ /dev/null @@ -1,546 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_format.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_util.h" -#include "nouveau/nouveau_screen.h" -#include "nv04_surface_2d.h" - -static INLINE int -nv04_surface_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; - default: - return -1; - } -} - -static INLINE int -nv04_rect_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_L8A8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - default: - return -1; - } -} - -static INLINE int -nv04_scaled_image_format(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; - case PIPE_FORMAT_B5G5R5A1_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; - case PIPE_FORMAT_B8G8R8A8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_B8G8R8X8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; - case PIPE_FORMAT_B5G6R5_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_L8A8_UNORM: - return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; - default: - return -1; - } -} - -static INLINE unsigned -nv04_swizzle_bits_square(unsigned x, unsigned y) -{ - unsigned u = (x & 0x001) << 0 | - (x & 0x002) << 1 | - (x & 0x004) << 2 | - (x & 0x008) << 3 | - (x & 0x010) << 4 | - (x & 0x020) << 5 | - (x & 0x040) << 6 | - (x & 0x080) << 7 | - (x & 0x100) << 8 | - (x & 0x200) << 9 | - (x & 0x400) << 10 | - (x & 0x800) << 11; - - unsigned v = (y & 0x001) << 1 | - (y & 0x002) << 2 | - (y & 0x004) << 3 | - (y & 0x008) << 4 | - (y & 0x010) << 5 | - (y & 0x020) << 6 | - (y & 0x040) << 7 | - (y & 0x080) << 8 | - (y & 0x100) << 9 | - (y & 0x200) << 10 | - (y & 0x400) << 11 | - (y & 0x800) << 12; - return v | u; -} - -/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ -static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) -{ - unsigned s = MIN2(w, h); - unsigned m = s - 1; - return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); -} - -static int -nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy, - int w, int h) -{ - struct nouveau_channel *chan = ctx->swzsurf->channel; - struct nouveau_grobj *swzsurf = ctx->swzsurf; - struct nouveau_grobj *sifm = ctx->sifm; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); - const unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - /* Max width & height may not be the same on all HW, but must be POT */ - const unsigned max_w = 1024; - const unsigned max_h = 1024; - unsigned sub_w = w > max_w ? max_w : w; - unsigned sub_h = h > max_h ? max_h : h; - unsigned x; - unsigned y; - - /* Swizzled surfaces must be POT */ - assert(util_is_pot(dst->width) && util_is_pot(dst->height)); - - /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */ - assert(sub_w == w || util_is_pot(sub_w)); - assert(sub_h == h || util_is_pot(sub_h)); - - MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 + - ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2); - - BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); - OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); - OUT_RING (chan, nv04_surface_format(dst->format) | - log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | - log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); - - BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); - OUT_RELOCo(chan, src_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); - OUT_RING (chan, swzsurf->handle); - - for (y = 0; y < h; y += sub_h) { - sub_h = MIN2(sub_h, h - y); - - for (x = 0; x < w; x += sub_w) { - sub_w = MIN2(sub_w, w - x); - - assert(!(dst->offset & 63)); - - BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); - OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); - OUT_RING (chan, nv04_scaled_image_format(src->format)); - OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); - OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); - OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 1 << 20); - OUT_RING (chan, 1 << 20); - - BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); - OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, src_pitch | - NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | - NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); - OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format), - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RING (chan, 0); - } - } - - return 0; -} - -static int -nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, - struct pipe_surface *dst, int dx, int dy, - struct pipe_surface *src, int sx, int sy, int w, int h) -{ - struct nouveau_channel *chan = ctx->m2mf->channel; - struct nouveau_grobj *m2mf = ctx->m2mf; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - unsigned dst_offset = dst->offset + dy * dst_pitch + - dx * util_format_get_blocksize(dst->texture->format); - unsigned src_offset = src->offset + sy * src_pitch + - sx * util_format_get_blocksize(src->texture->format); - - MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2); - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); - OUT_RELOCo(chan, src_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, dst_bo, - NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - while (h) { - int count = (h > 2047) ? 2047 : h; - - BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); - OUT_RELOCl(chan, src_bo, src_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, dst_bo, dst_offset, - NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); - OUT_RING (chan, src_pitch); - OUT_RING (chan, dst_pitch); - OUT_RING (chan, w * util_format_get_blocksize(src->texture->format)); - OUT_RING (chan, count); - OUT_RING (chan, 0x0101); - OUT_RING (chan, 0); - - h -= count; - src_offset += src_pitch * count; - dst_offset += dst_pitch * count; - } - - return 0; -} - -static int -nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h) -{ - struct nouveau_channel *chan = ctx->surf2d->channel; - struct nouveau_grobj *surf2d = ctx->surf2d; - struct nouveau_grobj *blit = ctx->blit; - struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - int format; - - format = nv04_surface_format(dst->format); - if (format < 0) - return 1; - - MARK_RING (chan, 12, 4); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (chan, format); - OUT_RING (chan, (dst_pitch << 16) | src_pitch); - OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, blit, 0x0300, 3); - OUT_RING (chan, (sy << 16) | sx); - OUT_RING (chan, (dy << 16) | dx); - OUT_RING (chan, ( h << 16) | w); - - return 0; -} - -static void -nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h) -{ - unsigned src_pitch = ((struct nv04_surface *)src)->pitch; - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; - int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; - - assert(src->format == dst->format); - - /* Setup transfer to swizzle the texture to vram if needed */ - if (src_linear && !dst_linear && w > 1 && h > 1) { - nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); - return; - } - - /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback - * to NV_MEMORY_TO_MEMORY_FORMAT in this case. - */ - if ((src->offset & 63) || (dst->offset & 63) || - (src_pitch & 63) || (dst_pitch & 63)) { - nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); - return; - } - - nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h); -} - -static void -nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, - int dx, int dy, int w, int h, unsigned value) -{ - struct nouveau_channel *chan = ctx->surf2d->channel; - struct nouveau_grobj *surf2d = ctx->surf2d; - struct nouveau_grobj *rect = ctx->rect; - struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); - unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; - int cs2d_format, gdirect_format; - - cs2d_format = nv04_surface_format(dst->format); - assert(cs2d_format >= 0); - - gdirect_format = nv04_rect_format(dst->format); - assert(gdirect_format >= 0); - - MARK_RING (chan, 16, 4); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); - OUT_RING (chan, cs2d_format); - OUT_RING (chan, (dst_pitch << 16) | dst_pitch); - OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); - - BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); - OUT_RING (chan, gdirect_format); - BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); - OUT_RING (chan, value); - BEGIN_RING(chan, rect, - NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); - OUT_RING (chan, (dx << 16) | dy); - OUT_RING (chan, ( w << 16) | h); -} - -void -nv04_surface_2d_takedown(struct nv04_surface_2d **pctx) -{ - struct nv04_surface_2d *ctx; - - if (!pctx || !*pctx) - return; - ctx = *pctx; - *pctx = NULL; - - nouveau_notifier_free(&ctx->ntfy); - nouveau_grobj_free(&ctx->m2mf); - nouveau_grobj_free(&ctx->surf2d); - nouveau_grobj_free(&ctx->swzsurf); - nouveau_grobj_free(&ctx->rect); - nouveau_grobj_free(&ctx->blit); - nouveau_grobj_free(&ctx->sifm); - - FREE(ctx); -} - -struct nv04_surface_2d * -nv04_surface_2d_init(struct nouveau_screen *screen) -{ - struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d); - struct nouveau_channel *chan = screen->channel; - unsigned handle = 0x88000000, class; - int ret; - - if (!ctx) - return NULL; - - ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); - OUT_RING (chan, ctx->ntfy->handle); - - if (chan->device->chipset < 0x10) - class = NV04_CONTEXT_SURFACES_2D; - else - class = NV10_CONTEXT_SURFACES_2D; - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->surf2d, - NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); - OUT_RING (chan, chan->vram->handle); - OUT_RING (chan, chan->vram->handle); - - if (chan->device->chipset < 0x10) - class = NV04_IMAGE_BLIT; - else - class = NV12_IMAGE_BLIT; - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1); - OUT_RING (chan, ctx->ntfy->handle); - BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1); - OUT_RING (chan, ctx->surf2d->handle); - BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1); - OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY); - - ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, - &ctx->rect); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); - OUT_RING (chan, ctx->ntfy->handle); - BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); - OUT_RING (chan, ctx->surf2d->handle); - BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); - OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); - BEGIN_RING(chan, ctx->rect, - NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); - OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); - - switch (chan->device->chipset & 0xf0) { - case 0x00: - case 0x10: - class = NV04_SWIZZLED_SURFACE; - break; - case 0x20: - class = NV20_SWIZZLED_SURFACE; - break; - case 0x30: - class = NV30_SWIZZLED_SURFACE; - break; - case 0x40: - case 0x60: - class = NV40_SWIZZLED_SURFACE; - break; - default: - /* Famous last words: this really can't happen.. */ - assert(0); - break; - } - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - switch (chan->device->chipset & 0xf0) { - case 0x10: - case 0x20: - class = NV10_SCALED_IMAGE_FROM_MEMORY; - break; - case 0x30: - class = NV30_SCALED_IMAGE_FROM_MEMORY; - break; - case 0x40: - case 0x60: - class = NV40_SCALED_IMAGE_FROM_MEMORY; - break; - default: - class = NV04_SCALED_IMAGE_FROM_MEMORY; - break; - } - - ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm); - if (ret) { - nv04_surface_2d_takedown(&ctx); - return NULL; - } - - ctx->copy = nv04_surface_copy; - ctx->fill = nv04_surface_fill; - return ctx; -} - -struct nv04_surface* -nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) -{ - int temp_flags; - - // printf("creating temp, flags is %i!\n", flags); - - if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) - { - temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; - ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; - } - else - { - temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; - ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; - } - - ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; - - struct pipe_texture templ; - memset(&templ, 0, sizeof(templ)); - templ.format = ns->base.texture->format; - templ.target = PIPE_TEXTURE_2D; - templ.width0 = ns->base.width; - templ.height0 = ns->base.height; - templ.depth0 = 1; - templ.last_level = 0; - - // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented - templ.nr_samples = ns->base.texture->nr_samples; - - templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; - - struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); - struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); - temp_ns->backing = ns; - - if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) - eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); - - return temp_ns; -} - diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.h b/src/gallium/drivers/nouveau/nv04_surface_2d.h deleted file mode 100644 index ce696a11a39..00000000000 --- a/src/gallium/drivers/nouveau/nv04_surface_2d.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __NV04_SURFACE_2D_H__ -#define __NV04_SURFACE_2D_H__ - -struct nv04_surface { - struct pipe_surface base; - unsigned pitch; - struct nv04_surface* backing; -}; - -struct nv04_surface_2d { - struct nouveau_notifier *ntfy; - struct nouveau_grobj *surf2d; - struct nouveau_grobj *swzsurf; - struct nouveau_grobj *m2mf; - struct nouveau_grobj *rect; - struct nouveau_grobj *blit; - struct nouveau_grobj *sifm; - - struct pipe_buffer *(*buf)(struct pipe_surface *); - - void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst, - int dx, int dy, struct pipe_surface *src, int sx, int sy, - int w, int h); - void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst, - int dx, int dy, int w, int h, unsigned value); -}; - -struct nv04_surface_2d * -nv04_surface_2d_init(struct nouveau_screen *screen); - -void -nv04_surface_2d_takedown(struct nv04_surface_2d **); - -struct nv04_surface* -nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); - -#endif diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index e912177b211..dfe97e6ed5f 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = nvfx C_SOURCES = \ + nv04_surface_2d.c \ nvfx_context.c \ nvfx_clear.c \ nvfx_draw.c \ diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c new file mode 100644 index 00000000000..ed18c9f24dc --- /dev/null +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.c @@ -0,0 +1,545 @@ +#include "pipe/p_context.h" +#include "pipe/p_format.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_util.h" +#include "nouveau/nouveau_screen.h" +#include "nv04_surface_2d.h" + +static INLINE int +nv04_surface_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; + default: + return -1; + } +} + +static INLINE int +nv04_rect_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + default: + return -1; + } +} + +static INLINE int +nv04_scaled_image_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; + case PIPE_FORMAT_B5G5R5A1_UNORM: + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; + case PIPE_FORMAT_B8G8R8A8_UNORM: + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; + case PIPE_FORMAT_B8G8R8X8_UNORM: + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_L8A8_UNORM: + return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; + default: + return -1; + } +} + +static INLINE unsigned +nv04_swizzle_bits_square(unsigned x, unsigned y) +{ + unsigned u = (x & 0x001) << 0 | + (x & 0x002) << 1 | + (x & 0x004) << 2 | + (x & 0x008) << 3 | + (x & 0x010) << 4 | + (x & 0x020) << 5 | + (x & 0x040) << 6 | + (x & 0x080) << 7 | + (x & 0x100) << 8 | + (x & 0x200) << 9 | + (x & 0x400) << 10 | + (x & 0x800) << 11; + + unsigned v = (y & 0x001) << 1 | + (y & 0x002) << 2 | + (y & 0x004) << 3 | + (y & 0x008) << 4 | + (y & 0x010) << 5 | + (y & 0x020) << 6 | + (y & 0x040) << 7 | + (y & 0x080) << 8 | + (y & 0x100) << 9 | + (y & 0x200) << 10 | + (y & 0x400) << 11 | + (y & 0x800) << 12; + return v | u; +} + +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) +{ + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); +} + +static int +nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, + struct pipe_surface *dst, int dx, int dy, + struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + struct nouveau_channel *chan = ctx->swzsurf->channel; + struct nouveau_grobj *swzsurf = ctx->swzsurf; + struct nouveau_grobj *sifm = ctx->sifm; + struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + const unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + /* Max width & height may not be the same on all HW, but must be POT */ + const unsigned max_w = 1024; + const unsigned max_h = 1024; + unsigned sub_w = w > max_w ? max_w : w; + unsigned sub_h = h > max_h ? max_h : h; + unsigned x; + unsigned y; + + /* Swizzled surfaces must be POT */ + assert(util_is_pot(dst->width) && util_is_pot(dst->height)); + + /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */ + assert(sub_w == w || util_is_pot(sub_w)); + assert(sub_h == h || util_is_pot(sub_h)); + + MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 + + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); + OUT_RELOCo(chan, dst_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); + OUT_RING (chan, nv04_surface_format(dst->format) | + log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | + log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); + + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); + OUT_RELOCo(chan, src_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); + OUT_RING (chan, swzsurf->handle); + + for (y = 0; y < h; y += sub_h) { + sub_h = MIN2(sub_h, h - y); + + for (x = 0; x < w; x += sub_w) { + sub_w = MIN2(sub_w, w - x); + + assert(!(dst->offset & 63)); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); + OUT_RELOCl(chan, dst_bo, dst->offset, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); + OUT_RING (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); + OUT_RING (chan, nv04_scaled_image_format(src->format)); + OUT_RING (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); + OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); + OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); + OUT_RING (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); + OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); + OUT_RING (chan, 1 << 20); + OUT_RING (chan, 1 << 20); + + BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); + OUT_RING (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w); + OUT_RING (chan, src_pitch | + NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | + NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); + OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format), + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, 0); + } + } + + return 0; +} + +static int +nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, + struct pipe_surface *dst, int dx, int dy, + struct pipe_surface *src, int sx, int sy, int w, int h) +{ + struct nouveau_channel *chan = ctx->m2mf->channel; + struct nouveau_grobj *m2mf = ctx->m2mf; + struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; + unsigned dst_offset = dst->offset + dy * dst_pitch + + dx * util_format_get_blocksize(dst->texture->format); + unsigned src_offset = src->offset + sy * src_pitch + + sx * util_format_get_blocksize(src->texture->format); + + MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2); + BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); + OUT_RELOCo(chan, src_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + while (h) { + int count = (h > 2047) ? 2047 : h; + + BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RELOCl(chan, src_bo, src_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst_bo, dst_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, src_pitch); + OUT_RING (chan, dst_pitch); + OUT_RING (chan, w * util_format_get_blocksize(src->texture->format)); + OUT_RING (chan, count); + OUT_RING (chan, 0x0101); + OUT_RING (chan, 0); + + h -= count; + src_offset += src_pitch * count; + dst_offset += dst_pitch * count; + } + + return 0; +} + +static int +nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + struct nouveau_channel *chan = ctx->surf2d->channel; + struct nouveau_grobj *surf2d = ctx->surf2d; + struct nouveau_grobj *blit = ctx->blit; + struct nouveau_bo *src_bo = nouveau_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; + int format; + + format = nv04_surface_format(dst->format); + if (format < 0) + return 1; + + MARK_RING (chan, 12, 4); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, format); + OUT_RING (chan, (dst_pitch << 16) | src_pitch); + OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, blit, 0x0300, 3); + OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, ( h << 16) | w); + + return 0; +} + +static void +nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + unsigned src_pitch = ((struct nv04_surface *)src)->pitch; + unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; + int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; + int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; + + assert(src->format == dst->format); + + /* Setup transfer to swizzle the texture to vram if needed */ + if (src_linear && !dst_linear && w > 1 && h > 1) { + nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); + return; + } + + /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback + * to NV_MEMORY_TO_MEMORY_FORMAT in this case. + */ + if ((src->offset & 63) || (dst->offset & 63) || + (src_pitch & 63) || (dst_pitch & 63)) { + nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); + return; + } + + nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h); +} + +static void +nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, + int dx, int dy, int w, int h, unsigned value) +{ + struct nouveau_channel *chan = ctx->surf2d->channel; + struct nouveau_grobj *surf2d = ctx->surf2d; + struct nouveau_grobj *rect = ctx->rect; + struct nouveau_bo *dst_bo = nouveau_bo(ctx->buf(dst)); + unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch; + int cs2d_format, gdirect_format; + + cs2d_format = nv04_surface_format(dst->format); + assert(cs2d_format >= 0); + + gdirect_format = nv04_rect_format(dst->format); + assert(gdirect_format >= 0); + + MARK_RING (chan, 16, 4); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, cs2d_format); + OUT_RING (chan, (dst_pitch << 16) | dst_pitch); + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); + OUT_RING (chan, gdirect_format); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); + OUT_RING (chan, value); + BEGIN_RING(chan, rect, + NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); + OUT_RING (chan, (dx << 16) | dy); + OUT_RING (chan, ( w << 16) | h); +} + +void +nv04_surface_2d_takedown(struct nv04_surface_2d **pctx) +{ + struct nv04_surface_2d *ctx; + + if (!pctx || !*pctx) + return; + ctx = *pctx; + *pctx = NULL; + + nouveau_notifier_free(&ctx->ntfy); + nouveau_grobj_free(&ctx->m2mf); + nouveau_grobj_free(&ctx->surf2d); + nouveau_grobj_free(&ctx->swzsurf); + nouveau_grobj_free(&ctx->rect); + nouveau_grobj_free(&ctx->blit); + nouveau_grobj_free(&ctx->sifm); + + FREE(ctx); +} + +struct nv04_surface_2d * +nv04_surface_2d_init(struct nouveau_screen *screen) +{ + struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d); + struct nouveau_channel *chan = screen->channel; + unsigned handle = 0x88000000, class; + int ret; + + if (!ctx) + return NULL; + + ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + + if (chan->device->chipset < 0x10) + class = NV04_CONTEXT_SURFACES_2D; + else + class = NV10_CONTEXT_SURFACES_2D; + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->surf2d, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + + if (chan->device->chipset < 0x10) + class = NV04_IMAGE_BLIT; + else + class = NV12_IMAGE_BLIT; + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1); + OUT_RING (chan, ctx->surf2d->handle); + BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1); + OUT_RING (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY); + + ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, + &ctx->rect); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); + OUT_RING (chan, ctx->surf2d->handle); + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); + BEGIN_RING(chan, ctx->rect, + NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + + switch (chan->device->chipset & 0xf0) { + case 0x00: + case 0x10: + class = NV04_SWIZZLED_SURFACE; + break; + case 0x20: + class = NV20_SWIZZLED_SURFACE; + break; + case 0x30: + class = NV30_SWIZZLED_SURFACE; + break; + case 0x40: + case 0x60: + class = NV40_SWIZZLED_SURFACE; + break; + default: + /* Famous last words: this really can't happen.. */ + assert(0); + break; + } + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + switch (chan->device->chipset & 0xf0) { + case 0x10: + case 0x20: + class = NV10_SCALED_IMAGE_FROM_MEMORY; + break; + case 0x30: + class = NV30_SCALED_IMAGE_FROM_MEMORY; + break; + case 0x40: + case 0x60: + class = NV40_SCALED_IMAGE_FROM_MEMORY; + break; + default: + class = NV04_SCALED_IMAGE_FROM_MEMORY; + break; + } + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + ctx->copy = nv04_surface_copy; + ctx->fill = nv04_surface_fill; + return ctx; +} + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +{ + int temp_flags; + + // printf("creating temp, flags is %i!\n", flags); + + if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; + } + else + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; + } + + ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + + struct pipe_texture templ; + memset(&templ, 0, sizeof(templ)); + templ.format = ns->base.texture->format; + templ.target = PIPE_TEXTURE_2D; + templ.width0 = ns->base.width; + templ.height0 = ns->base.height; + templ.depth0 = 1; + templ.last_level = 0; + + // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented + templ.nr_samples = ns->base.texture->nr_samples; + + templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); + temp_ns->backing = ns; + + if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) + eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + + return temp_ns; +} diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h new file mode 100644 index 00000000000..ce696a11a39 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.h @@ -0,0 +1,37 @@ +#ifndef __NV04_SURFACE_2D_H__ +#define __NV04_SURFACE_2D_H__ + +struct nv04_surface { + struct pipe_surface base; + unsigned pitch; + struct nv04_surface* backing; +}; + +struct nv04_surface_2d { + struct nouveau_notifier *ntfy; + struct nouveau_grobj *surf2d; + struct nouveau_grobj *swzsurf; + struct nouveau_grobj *m2mf; + struct nouveau_grobj *rect; + struct nouveau_grobj *blit; + struct nouveau_grobj *sifm; + + struct pipe_buffer *(*buf)(struct pipe_surface *); + + void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h); + void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst, + int dx, int dy, int w, int h, unsigned value); +}; + +struct nv04_surface_2d * +nv04_surface_2d_init(struct nouveau_screen *screen); + +void +nv04_surface_2d_takedown(struct nv04_surface_2d **); + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 5dced8dae47..0f5ed61aab7 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -5,7 +5,7 @@ #include "util/u_math.h" #include "nvfx_context.h" -#include "../nouveau/nv04_surface_2d.h" +#include "nv04_surface_2d.h" diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index e076b876b02..c0b4b9899dd 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -2,7 +2,7 @@ #define __NVFX_SCREEN_H__ #include "nouveau/nouveau_screen.h" -#include "nouveau/nv04_surface_2d.h" +#include "nv04_surface_2d.h" struct nvfx_screen { struct nouveau_screen base; -- cgit v1.2.3 From d40a069206ea76feafe35c417d79c8bd5d62b4c1 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 22 Feb 2010 11:44:31 +0100 Subject: nvfx: add NOUVEAU_SWTNL to force swtnl at runtime Replace the FORCE_SWTNL macro with a NOUVEAU_SWTNL environment variable. --- src/gallium/drivers/nvfx/nvfx_vbo.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 9d501ef1da9..54369f9b64e 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -10,7 +10,14 @@ #include "nouveau/nouveau_pushbuf.h" #include "nouveau/nouveau_util.h" -#define FORCE_SWTNL 0 +static boolean +nvfx_force_swtnl(struct nvfx_context *nvfx) +{ + static int force_swtnl = -1; + if(force_swtnl < 0) + force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0); + return force_swtnl; +} static INLINE int nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) @@ -175,7 +182,7 @@ nvfx_draw_arrays(struct pipe_context *pipe, unsigned restart = 0; nvfx_vbo_set_idxbuf(nvfx, NULL, 0); - if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { + if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { nvfx_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; @@ -465,7 +472,7 @@ nvfx_draw_elements(struct pipe_context *pipe, boolean idxbuf; idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); - if (FORCE_SWTNL || !nvfx_state_validate(nvfx)) { + if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { nvfx_draw_elements_swtnl(pipe, NULL, 0, mode, start, count); return; -- cgit v1.2.3 From ad2d0c26393760ebc7af52de9f693654381431f3 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 22 Feb 2010 12:35:18 +0100 Subject: nvfx: draw: create draw vp with ureg This avoids duplicating the vertex program generation logic and makes the same code work for both nv30 and nv40. --- src/gallium/drivers/nvfx/nvfx_draw.c | 67 +++++++++++++----------------------- 1 file changed, 24 insertions(+), 43 deletions(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 7308f0667c7..494d5435d63 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -1,5 +1,6 @@ #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" +#include "tgsi/tgsi_ureg.h" #include "util/u_pack_color.h" @@ -173,50 +174,30 @@ nvfx_render_destroy(struct draw_stage *draw) FREE(draw); } -static INLINE void -emit_mov(struct nvfx_vertex_program *vp, - unsigned dst, unsigned src, unsigned vor, unsigned mask) -{ - struct nvfx_vertex_program_exec *inst; - - vp->insns = realloc(vp->insns, - sizeof(struct nvfx_vertex_program_exec) * - ++vp->nr_insns); - inst = &vp->insns[vp->nr_insns - 1]; - - inst->data[0] = 0x401f9c6c; - inst->data[1] = 0x0040000d | (src << 8); - inst->data[2] = 0x8106c083; - inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); - inst->const_index = -1; - inst->has_branch_offset = FALSE; - - vp->ir |= (1 << src); - if (vor != ~0) - vp->or |= (1 << vor); -} - static struct nvfx_vertex_program * -create_drawvp(struct nvfx_context *nvfx) +nvfx_create_drawvp(struct nvfx_context *nvfx) { - struct nvfx_vertex_program *vp = CALLOC_STRUCT(nvfx_vertex_program); - unsigned i; - - // nv30 support comes in a later patch - assert(nvfx->is_nv4x); - - emit_mov(vp, NVFX_VP(INST_DEST_POS), 0, ~0, 0xf); - emit_mov(vp, NVFX_VP(INST_DEST_COL0), 3, 0, 0xf); - emit_mov(vp, NVFX_VP(INST_DEST_COL1), 4, 1, 0xf); - emit_mov(vp, NVFX_VP(INST_DEST_BFC0), 3, 2, 0xf); - emit_mov(vp, NVFX_VP(INST_DEST_BFC1), 4, 3, 0xf); - emit_mov(vp, NVFX_VP(INST_DEST_FOGC), 5, 4, 0x8); - for (i = 0; i < 8; i++) - emit_mov(vp, NVFX_VP(INST_DEST_TC(i)), 8 + i, 14 + i, 0xf); - - vp->insns[vp->nr_insns - 1].data[3] |= 1; - vp->translated = TRUE; - return vp; + struct ureg_program *ureg; + uint i; + + ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + if (ureg == NULL) + return NULL; + + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4)); + ureg_MOV(ureg, + ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X), + ureg_DECL_vs_input(ureg, 5)); + for (i = 0; i < 8; ++i) + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i)); + + ureg_END( ureg ); + + return ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); } struct draw_stage * @@ -225,7 +206,7 @@ nvfx_draw_render_stage(struct nvfx_context *nvfx) struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); if (!nvfx->swtnl.vertprog) - nvfx->swtnl.vertprog = create_drawvp(nvfx); + nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx); render->nvfx = nvfx; render->stage.draw = nvfx->draw; -- cgit v1.2.3 From ac7ae8bc6ae0d364103d655482a522c12504816b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 22 Feb 2010 13:15:24 +0100 Subject: nvfx: draw: make perspective corrective texturing work We must divide everything in the position by w, and emit position as a 4-component vector. Not sure why we must divide, but it works (see progs/redbook/checker). --- src/gallium/drivers/nvfx/nvfx_draw.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 494d5435d63..4e0575885d5 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -67,6 +67,13 @@ nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) OUT_RING (chan, fui(v->data[idx][2])); OUT_RING (chan, fui(v->data[idx][3])); break; + case 0xff: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3])); + OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3])); + OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3])); + OUT_RING (chan, fui(1.0f / v->data[idx][3])); + break; case EMIT_4UB: BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), @@ -330,7 +337,7 @@ nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); } - emit_attrib(nvfx, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); return FALSE; } -- cgit v1.2.3 From 152dffd3e196208a85148c4a2f7a9a6df44f3bff Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 22 Feb 2010 13:18:10 +0100 Subject: nvfx: draw: emit color as floating point Don't lose precision by converting to u8. --- src/gallium/drivers/nvfx/nvfx_draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 4e0575885d5..aea0baadf91 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -324,7 +324,7 @@ nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) for (i = 0; i < 2; i++) { if (!(colour & (1 << i))) continue; - emit_attrib(nvfx, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i); } for (i = 0; i < 8; i++) { -- cgit v1.2.3 From a174db480b9dbe3ae3475ce0cf4b3591234e8f05 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 22 Feb 2010 13:22:24 +0100 Subject: nvfx: draw: make swtnl draw_elements actually work. It was totally broken: the index buffer was passed as NULL! --- src/gallium/drivers/nvfx/nvfx_vbo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 54369f9b64e..257087f8f63 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -473,7 +473,7 @@ nvfx_draw_elements(struct pipe_context *pipe, idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { - nvfx_draw_elements_swtnl(pipe, NULL, 0, + nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize, mode, start, count); return; } -- cgit v1.2.3 From 49f2a89956203a99be37a6fb18a1fab79c5e5429 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 18:17:03 +0100 Subject: nvfx: clean up shader header Currently the behavior of shader.h depends on some constants that are defined differently in vertex and fragment programs. This patch cleans that up by splitting the relevant symbols in vertex program and fragment program variants --- src/gallium/drivers/nvfx/nvfx_draw.c | 1 - src/gallium/drivers/nvfx/nvfx_fragprog.c | 91 +++++++++++++------------------- src/gallium/drivers/nvfx/nvfx_shader.h | 56 ++++++++++++++------ src/gallium/drivers/nvfx/nvfx_vertprog.c | 47 ++++++----------- 4 files changed, 92 insertions(+), 103 deletions(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index aea0baadf91..5379b29efd1 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -9,7 +9,6 @@ #include "draw/draw_pipe.h" #include "nvfx_context.h" -#define NVFX_SHADER_NO_FUCKEDNESS #include "nv30_vertprog.h" #include "nv40_vertprog.h" diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index ecc193877b8..8066528a5b5 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -8,25 +8,8 @@ #include "tgsi/tgsi_util.h" #include "nvfx_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X -#define DEF_CTEST NVFX_FP_OP_COND_TR #include "nvfx_shader.h" -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) -#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) - #define MAX_CONSTS 128 #define MAX_IMM 32 struct nvfx_fpc { @@ -308,10 +291,10 @@ tgsi_mask(uint tgsi) { int mask = 0; - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; return mask; } @@ -337,7 +320,7 @@ src_native_swz(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc, } } - if (mask == MASK_ALL) + if (mask == NVFX_FP_MASK_ALL) return TRUE; *src = temp(fpc); @@ -393,7 +376,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -404,7 +387,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -415,7 +398,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -448,22 +431,22 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, tmp = nvfx_sr(NVFXSR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NVFX_VP_INST_COND_GE; + dst.cc_test = NVFX_COND_GE; arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NVFX_VP_INST_COND_LT; + dst.cc_test = NVFX_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; case TGSI_OPCODE_COS: arith(fpc, sat, COS, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DDX: - if (mask & (MASK_Z | MASK_W)) { + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { tmp = temp(fpc); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none); arith(fpc, 0, MOV, dst, mask, tmp, none, none); } else { @@ -471,13 +454,13 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, } break; case TGSI_OPCODE_DDY: - if (mask & (MASK_Z | MASK_W)) { + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { tmp = temp(fpc); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none); arith(fpc, 0, MOV, dst, mask, tmp, none, none); } else { @@ -492,7 +475,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, break; case TGSI_OPCODE_DPH: tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none); arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none); break; @@ -514,8 +497,8 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, case TGSI_OPCODE_KIL: dst = nvfx_sr(NVFXSR_NONE, 0); dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; + arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NVFX_COND_LT; arith(fpc, 0, KIL, dst, 0, none, none, none); break; case TGSI_OPCODE_LG2: @@ -551,9 +534,9 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); else { tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, + arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); arith(fpc, sat, EX2, dst, mask, swz(tmp, X, X, X, X), none, none); @@ -570,9 +553,9 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); else { tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z, swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); arith(fpc, sat, MAD, dst, mask, swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); @@ -583,7 +566,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); else { tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X, abs(swz(src[0], X, X, X, X)), none, none); arith(fpc, sat, EX2, dst, mask, neg(swz(tmp, X, X, X, X)), none, none); @@ -591,25 +574,25 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, break; case TGSI_OPCODE_SCS: /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) + if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, + if (mask & NVFX_FP_MASK_X) { + arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, + if (mask & NVFX_FP_MASK_Y) { + arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none); } } else { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, + if (mask & NVFX_FP_MASK_Y) { + arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none); } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, + if (mask & NVFX_FP_MASK_X) { + arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); } } @@ -657,7 +640,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, tmp = temp(fpc); arith(fpc, 0, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h index 191131a40a1..0b2f044f7fe 100644 --- a/src/gallium/drivers/nvfx/nvfx_shader.h +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -9,15 +9,6 @@ #define NVFX_VP_INST_SLOT_VEC 0 #define NVFX_VP_INST_SLOT_SCA 1 -#define NVFX_VP_INST_COND_FL 0 /* guess */ -#define NVFX_VP_INST_COND_LT 1 -#define NVFX_VP_INST_COND_EQ 2 -#define NVFX_VP_INST_COND_LE 3 -#define NVFX_VP_INST_COND_GT 4 -#define NVFX_VP_INST_COND_NE 5 -#define NVFX_VP_INST_COND_GE 6 -#define NVFX_VP_INST_COND_TR 7 /* guess */ - #define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ #define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ #define NVFX_VP_INST_IN_NORMAL 2 @@ -327,13 +318,45 @@ # define NVFX_FP_SWIZZLE_W 3 #define NVFX_FP_REG_NEGATE (1 << 17) -#ifndef NVFX_SHADER_NO_FUCKEDNESS #define NVFXSR_NONE 0 #define NVFXSR_OUTPUT 1 #define NVFXSR_INPUT 2 #define NVFXSR_TEMP 3 #define NVFXSR_CONST 4 +#define NVFX_COND_FL 0 +#define NVFX_COND_LT 1 +#define NVFX_COND_EQ 2 +#define NVFX_COND_LE 3 +#define NVFX_COND_GT 4 +#define NVFX_COND_NE 5 +#define NVFX_COND_GE 6 +#define NVFX_COND_TR 7 + +/* Yes, this are ordered differently... */ + +#define NVFX_VP_MASK_X 8 +#define NVFX_VP_MASK_Y 4 +#define NVFX_VP_MASK_Z 2 +#define NVFX_VP_MASK_W 1 +#define NVFX_VP_MASK_ALL 0xf + +#define NVFX_FP_MASK_X 1 +#define NVFX_FP_MASK_Y 2 +#define NVFX_FP_MASK_Z 4 +#define NVFX_FP_MASK_W 8 +#define NVFX_FP_MASK_ALL 0xf + +#define NVFX_SWZ_X 0 +#define NVFX_SWZ_Y 1 +#define NVFX_SWZ_Z 2 +#define NVFX_SWZ_W 3 + +#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) + struct nvfx_sreg { int type; int index; @@ -357,13 +380,13 @@ nvfx_sr(int type, int index) struct nvfx_sreg temp = { .type = type, .index = index, - .dst_scale = DEF_SCALE, + .dst_scale = 0, .abs = 0, .negate = 0, .swz = { 0, 1, 2, 3 }, .cc_update = 0, .cc_update_reg = 0, - .cc_test = DEF_CTEST, + .cc_test = NVFX_COND_TR, .cc_test_reg = 0, .cc_swz = { 0, 1, 2, 3 }, }; @@ -375,10 +398,10 @@ nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) { struct nvfx_sreg dst = src; - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; + dst.swz[NVFX_SWZ_X] = src.swz[x]; + dst.swz[NVFX_SWZ_Y] = src.swz[y]; + dst.swz[NVFX_SWZ_Z] = src.swz[z]; + dst.swz[NVFX_SWZ_W] = src.swz[w]; return dst; } @@ -402,6 +425,5 @@ nvfx_sr_scale(struct nvfx_sreg src, int scale) src.dst_scale = scale; return src; } -#endif #endif diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 3d0e8c23a15..e5e49bf0c2e 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -22,24 +22,9 @@ * 4. bugs */ -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 #include "nv30_vertprog.h" #include "nv40_vertprog.h" -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) - #define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) struct nvfx_vpc { @@ -307,7 +292,7 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, hw = vpc->vpi->data; - hw[0] |= (NVFX_VP_INST_COND_TR << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= (NVFX_COND_TR << NVFX_VP(INST_COND_SHIFT)); hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | @@ -405,10 +390,10 @@ tgsi_mask(uint tgsi) { int mask = 0; - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; return mask; } @@ -434,7 +419,7 @@ src_native_swz(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgs } } - if (mask == MASK_ALL) + if (mask == NVFX_VP_MASK_ALL) return TRUE; *src = temp(vpc); @@ -490,7 +475,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -501,7 +486,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -512,7 +497,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -588,9 +573,9 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, break; case TGSI_OPCODE_POW: tmp = temp(vpc); - arith(vpc, SCA, LG2, tmp, MASK_X, none, none, + arith(vpc, SCA, LG2, tmp, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)); - arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + arith(vpc, VEC, MUL, tmp, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); arith(vpc, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); @@ -619,7 +604,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, tmp = temp(vpc); arith(vpc, VEC, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), + arith(vpc, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -849,7 +834,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, NVFX_VP(INST_DEST_POS)); struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none); + arith(vpc, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none); } /* Insert code to handle user clip planes */ @@ -865,9 +850,9 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, unsigned mask; switch (i) { - case 0: case 3: mask = MASK_Y; break; - case 1: case 4: mask = MASK_Z; break; - case 2: case 5: mask = MASK_W; break; + case 0: case 3: mask = NVFX_VP_MASK_Y; break; + case 1: case 4: mask = NVFX_VP_MASK_Z; break; + case 2: case 5: mask = NVFX_VP_MASK_W; break; default: NOUVEAU_ERR("invalid clip dist #%d\n", i); goto out_err; -- cgit v1.2.3 From 9b3a908736130a62d79305586364f625a88a69ee Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sat, 13 Mar 2010 17:42:57 +0100 Subject: nvfx: remove src_native_swz, which was a no-op src_native_swz was used to translate 0/1 swizzles back when Gallium supported them. That support was later removed from Gallium, and the function currently always returns true. Remove it. --- src/gallium/drivers/nvfx/nvfx_fragprog.c | 44 -------------------------------- src/gallium/drivers/nvfx/nvfx_vertprog.c | 44 -------------------------------- 2 files changed, 88 deletions(-) (limited to 'src/gallium/drivers/nvfx') diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 8066528a5b5..76351430f44 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -298,39 +298,6 @@ tgsi_mask(uint tgsi) return mask; } -static boolean -src_native_swz(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nvfx_sreg *src) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == NVFX_FP_MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - static boolean nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_instruction *finst) @@ -358,17 +325,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - switch (fsrc->Register.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->Register.Index) { diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index e5e49bf0c2e..2d243be16a3 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -397,39 +397,6 @@ tgsi_mask(uint tgsi) return mask; } -static boolean -src_native_swz(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc, - struct nvfx_sreg *src) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= tgsi_mask(1 << c); - break; - default: - assert(0); - } - } - - if (mask == NVFX_VP_MASK_ALL) - return TRUE; - - *src = temp(vpc); - - if (mask) - arith(vpc, VEC, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - static boolean nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_instruction *finst) @@ -457,17 +424,6 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(nvfx, vpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - switch (fsrc->Register.File) { case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->Register.Index) { -- cgit v1.2.3