diff options
Diffstat (limited to 'src/gallium/state_trackers')
21 files changed, 4664 insertions, 0 deletions
diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile new file mode 100644 index 00000000000..50e3c843b55 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -0,0 +1,18 @@ +TARGET = libg3dvl.a +OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o +GALLIUMDIR = ../.. + +CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary + +############################################# + +.PHONY = all clean + +all: ${TARGET} + +${TARGET}: ${OBJECTS} + ar rcs $@ $^ + +clean: + rm -rf ${OBJECTS} ${TARGET} + diff --git a/src/gallium/state_trackers/g3dvl/tests/.gitignore b/src/gallium/state_trackers/g3dvl/tests/.gitignore new file mode 100644 index 00000000000..9b1ec4e2122 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/.gitignore @@ -0,0 +1,6 @@ +test_context +test_surface +test_i_rendering +test_p_rendering +test_pf_rendering +test_b_rendering diff --git a/src/gallium/state_trackers/g3dvl/tests/Makefile b/src/gallium/state_trackers/g3dvl/tests/Makefile new file mode 100644 index 00000000000..45cefa2e570 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/Makefile @@ -0,0 +1,45 @@ +GALLIUMDIR = ../../.. + +CFLAGS += -g -Wall -Werror \ + -I${GALLIUMDIR}/state_trackers/g3dvl \ + -I${GALLIUMDIR}/winsys/g3dvl \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${GALLIUMDIR}/drivers +LDFLAGS += -L${GALLIUMDIR}/state_trackers/g3dvl \ + -L${GALLIUMDIR}/drivers/softpipe \ + -L${GALLIUMDIR}/auxiliary/tgsi \ + -L${GALLIUMDIR}/auxiliary/draw \ + -L${GALLIUMDIR}/auxiliary/util \ + -L${GALLIUMDIR}/auxiliary/translate \ + -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/auxiliary/rtasm +LIBS += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil -lX11 -lm + +############################################# + +.PHONY = all clean + +all: test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering + +test_context: test_context.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_surface: test_surface.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_i_rendering: test_i_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_p_rendering: test_p_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_pf_rendering: test_pf_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_b_rendering: test_b_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +clean: + rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c new file mode 100644 index 00000000000..b78cc851ae4 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c @@ -0,0 +1,226 @@ +#include <stdio.h> +#include <X11/Xlib.h> +#include <vl_context.h> +#include <vl_surface.h> +#include <xsp_winsys.h> + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + -0x00A5,-0x00A5,-0x00A5,-0x0072,-0x00A5,-0x0072,-0x0072,-0x0072, + -0x0072,-0x00A5,-0x0072,-0x0072,-0x00A5,-0x0072,-0x0072,-0x0072, + -0x0072,-0x00A5,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x00A5, + -0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x00A5,-0x00A5, + + -0x004F,-0x004F,-0x004F,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x004F,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x004F, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x004F,-0x004F, + + -0x003E,-0x003E,-0x003E,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x003E,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x003E, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x003E,-0x003E +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *past_sfc, *future_sfc; + struct VL_MOTION_VECTOR motion_vector[2] = + { + { + {0, 0}, {0, 0} + }, + { + {0, 0}, {0, 0} + } + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &past_sfc); + vlCreateSurface(ctx, &future_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderBMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FRAME_MC, + motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + past_sfc, + future_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(past_sfc); + vlDestroySurface(future_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_context.c b/src/gallium/state_trackers/g3dvl/tests/test_context.c new file mode 100644 index 00000000000..2002977ee24 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_context.c @@ -0,0 +1,22 @@ +#include <vl_context.h> +#include <xsp_winsys.h> + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + + Display *display; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + + display = XOpenDisplay(NULL); + pipe = create_pipe_context(display); + + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlDestroyContext(ctx); + + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c new file mode 100644 index 00000000000..1f964711308 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c @@ -0,0 +1,137 @@ +#include <stdio.h> +#include <X11/Xlib.h> +#include <vl_context.h> +#include <vl_surface.h> +#include <xsp_winsys.h> + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c new file mode 100644 index 00000000000..2203349784f --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c @@ -0,0 +1,214 @@ +#include <stdio.h> +#include <X11/Xlib.h> +#include <vl_context.h> +#include <vl_surface.h> +#include <xsp_winsys.h> + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + -51,-51,-51, 0,-51, 0, 0, 0, + 0,-51, 0, 0,-51, 0, 0, 0, + 0,-51, 0, 0,-51,-51, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 99, 99, 99, 0, 0, 0, 0, 0, + 0, 0, 99, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 33, 33, 33, 0, 0, 0, 0, 0, + 0, 0, 33, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *ref_sfc; + struct VL_MOTION_VECTOR motion_vector = + { + {0, 0}, {0, 0} + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &ref_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderPMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FRAME_MC, + &motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + ref_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(ref_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c new file mode 100644 index 00000000000..43586fc553c --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c @@ -0,0 +1,214 @@ +#include <stdio.h> +#include <X11/Xlib.h> +#include <vl_context.h> +#include <vl_surface.h> +#include <xsp_winsys.h> + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *ref_sfc; + struct VL_MOTION_VECTOR motion_vector = + { + {0, 0}, {32, 32} + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &ref_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderPMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FIELD_MC, + &motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + ref_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(ref_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_surface.c b/src/gallium/state_trackers/g3dvl/tests/test_surface.c new file mode 100644 index 00000000000..4d1946396af --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_surface.c @@ -0,0 +1,26 @@ +#include <vl_context.h> +#include <vl_surface.h> +#include <xsp_winsys.h> + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + + Display *display; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc; + + display = XOpenDisplay(NULL); + pipe = create_pipe_context(display); + + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlDestroySurface(sfc); + vlDestroyContext(ctx); + + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c new file mode 100644 index 00000000000..1668ad1651b --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -0,0 +1,2258 @@ +#include "vl_context.h" +#include <assert.h> +#include <stdlib.h> +#include <pipe/p_context.h> +#include <pipe/p_winsys.h> +#include <pipe/p_screen.h> +#include <pipe/p_state.h> +#include <pipe/p_inlines.h> +#include <pipe/p_shader_tokens.h> +#include <tgsi/util/tgsi_parse.h> +#include <tgsi/util/tgsi_build.h> +#include "vl_shader_build.h" +#include "vl_data.h" +#include "vl_defs.h" +#include "vl_util.h" + +static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move pos in to pos out + * mov o1, i1 ; Move texcoord in to texcoord out */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlInitIDCT(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH; + context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT; + context->states.idct.viewport.scale[2] = 1; + context->states.idct.viewport.scale[3] = 1; + context->states.idct.viewport.translate[0] = 0; + context->states.idct.viewport.translate[1] = 0; + context->states.idct.viewport.translate[2] = 0; + context->states.idct.viewport.translate[3] = 0; + + context->states.idct.render_target.width = VL_BLOCK_WIDTH; + context->states.idct.render_target.height = VL_BLOCK_HEIGHT; + context->states.idct.render_target.num_cbufs = 1; + context->states.idct.render_target.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8L8_UNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template); + + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.width[0] = 16; + template.height[0] = 1; + + context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template); + + for (i = 0; i < 2; ++i) + { + context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i]; + context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i]; + /* + context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F); + context->states.idct.vertex_bufs[i].max_index = 3; + context->states.idct.vertex_bufs[i].buffer_offset = 0; + context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + context->states.idct.vertex_buf_elems[i].src_offset = 0; + context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.idct.vertex_buf_elems[i].nr_components = 2; + context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + */ + } + + vlCreateVertexShaderFrameIDCT(context); + vlCreateFragmentShaderFrameIDCT(context); + + return 0; +} + +static int vlDestroyIDCT(struct VL_CONTEXT *context) +{ + //unsigned int i; + + assert(context); + + context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler); + + //for (i = 0; i < 2; ++i) + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer); + + pipe_texture_release(&context->states.idct.texture); + pipe_texture_release(&context->states.idct.basis); + + //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs); + //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs); + + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer); + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer); + + return 0; +} + +static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + */ + for (i = 0; i < 2; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel + * tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel + * tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field ref macroblock texcoords + * decl o4 ; Bottom field ref macroblock texcoords + * decl o5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + mov o1, i1 ; Move input luma texcoords to output + mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock + add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o5, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Denormalized vertex pos + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field + tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i4.y, c2.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move past ref macroblock texcoords into position + * decl c4 ; Unused + * decl c5 ; Translation vector to move future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Past ref macroblock texcoords + * decl o4 ; Future ref macroblock texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on past ref macroblock + add o4, t0, c5 ; Translate rect into position on future ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position + * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position + * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field past ref macroblock texcoords + * decl o4 ; Bottom field past ref macroblock texcoords + * decl o5 ; Top field future ref macroblock texcoords + * decl o6 ; Bottom field future ref macroblock texcoords + * decl o7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock + * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock + * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock + * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o7, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock + * tex2d t2, i3, s4 ; Read texel from future ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Texcoords for s4 + * decl i5 ; Texcoords for s4 + * decl i6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i6.y, c2.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +int vlCreateDataBufsMC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + /* Create our vertex buffer and vertex buffer element */ + context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); + context->states.mc.vertex_bufs[0].max_index = 23; + context->states.mc.vertex_bufs[0].buffer_offset = 0; + context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 24 + ); + + context->states.mc.vertex_buf_elems[0].src_offset = 0; + context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0; + context->states.mc.vertex_buf_elems[0].nr_components = 2; + context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our texcoord buffers and texcoord buffer elements */ + for (i = 1; i < 3; ++i) + { + context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); + context->states.mc.vertex_bufs[i].max_index = 23; + context->states.mc.vertex_bufs[i].buffer_offset = 0; + context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + + context->states.mc.vertex_buf_elems[i].src_offset = 0; + context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.mc.vertex_buf_elems[i].nr_components = 2; + context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + } + + /* Fill buffers */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_VERTEX2F) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_luma_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + /* TODO: Accomodate 422, 444 */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer); + + /* Create our constant buffer */ + context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS); + context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.mc.vs_const_buf.size + ); + + context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS); + context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.mc.fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &vl_mc_fs_consts, + sizeof(struct VL_MC_FS_CONSTS) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitMC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + /* For MC we render to textures, which are rounded up to nearest POT */ + context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width); + context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height); + context->states.mc.viewport.scale[2] = 1; + context->states.mc.viewport.scale[3] = 1; + context->states.mc.viewport.translate[0] = 0; + context->states.mc.viewport.translate[1] = 0; + context->states.mc.viewport.translate[2] = 0; + context->states.mc.viewport.translate[3] = 0; + + context->states.mc.render_target.width = context->video_width; + context->states.mc.render_target.height = context->video_height; + context->states.mc.render_target.num_cbufs = 1; + /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ + context->states.mc.render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8L8_UNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8 * 4; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); + + if (context->video_format == VL_FORMAT_YCBCR_420) + template.height[0] = 8; + else if (context->video_format == VL_FORMAT_YCBCR_422) + template.height[0] = 8 * 2; + else if (context->video_format == VL_FORMAT_YCBCR_444) + template.height[0] = 8 * 4; + else + assert(0); + + context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template); + context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template); + + /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */ + + vlCreateVertexShaderIMC(context); + vlCreateFragmentShaderIMC(context); + vlCreateVertexShaderFramePMC(context); + vlCreateVertexShaderFieldPMC(context); + vlCreateFragmentShaderFramePMC(context); + vlCreateFragmentShaderFieldPMC(context); + vlCreateVertexShaderFrameBMC(context); + vlCreateVertexShaderFieldBMC(context); + vlCreateFragmentShaderFrameBMC(context); + vlCreateFragmentShaderFieldBMC(context); + vlCreateDataBufsMC(context); + + return 0; +} + +static int vlDestroyMC(struct VL_CONTEXT *context) +{ + unsigned int i; + + assert(context); + + for (i = 0; i < 5; ++i) + context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]); + + for (i = 0; i < 3; ++i) + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < 3; ++i) + pipe_texture_release(&context->states.mc.textures[i]); + + context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); + context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); + + for (i = 0; i < 2; ++i) + { + context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]); + context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]); + } + + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); + + return 0; +} + +static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale texcoord rect to source size + * decl c1 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* mov o0, i0 ; Move pos in to pos out */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o1, t0, c1 ; Translate texcoord rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + /* + Create our vertex buffer and vertex buffer element + VB contains 4 vertices that render a quad covering the entire window + to display a rendered surface + Quad is rendered as a tri strip + */ + context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); + context->states.csc.vertex_bufs[0].max_index = 3; + context->states.csc.vertex_bufs[0].buffer_offset = 0; + context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_surface_vertex_positions, + sizeof(struct VL_VERTEX2F) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer); + + context->states.csc.vertex_buf_elems[0].src_offset = 0; + context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0; + context->states.csc.vertex_buf_elems[0].nr_components = 2; + context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our texcoord buffer and texcoord buffer element + Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F); + context->states.csc.vertex_bufs[1].max_index = 3; + context->states.csc.vertex_bufs[1].buffer_offset = 0; + context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_TEXCOORD2F) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_surface_texcoords, + sizeof(struct VL_TEXCOORD2F) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + + context->states.csc.vertex_buf_elems[1].src_offset = 0; + context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1; + context->states.csc.vertex_buf_elems[1].nr_components = 2; + context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our vertex shader's constant buffer + Const buffer contains scaling and translation vectors + */ + context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS); + context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.vs_const_buf.size + ); + + /* + Create our fragment shader's constant buffer + Const buffer contains the color conversion matrix and bias vectors + */ + context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS); + context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.fs_const_buf.size + ); + + /* + TODO: Refactor this into a seperate function, + allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &vl_csc_fs_consts_601, + sizeof(struct VL_CSC_FS_CONSTS) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitCSC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + + assert(context); + + pipe = context->pipe; + + /* Delay creating the FB until vlPutSurface() so we know window size */ + context->states.csc.framebuffer.num_cbufs = 1; + context->states.csc.framebuffer.cbufs[0] = NULL; + context->states.csc.framebuffer.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler); + + vlCreateVertexShaderCSC(context); + vlCreateFragmentShaderCSC(context); + vlCreateDataBufsCSC(context); + + return 0; +} + +static int vlDestroyCSC(struct VL_CONTEXT *context) +{ + assert(context); + + /* + Since we create the final FB when we display our first surface, + it may not be created if vlPutSurface() is never called + */ + if (context->states.csc.framebuffer.cbufs[0]) + context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]); + context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler); + context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader); + context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitCommon(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + /* Don't need clipping, but viewport mapping done here */ + rast.bypass_clipping = 0; + rast.bypass_vs = 0; + rast.origin_lower_left = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast); + pipe->bind_rasterizer_state(pipe, context->states.common.raster); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + context->states.common.blend = pipe->create_blend_state(pipe, &blend); + pipe->bind_blend_state(pipe, context->states.common.blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (i = 0; i < 2; ++i) + { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].value_mask = 0; + dsa.stencil[i].write_mask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref = 0; + context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa); + + return 0; +} + +static int vlDestroyCommon(struct VL_CONTEXT *context) +{ + assert(context); + + context->pipe->delete_blend_state(context->pipe, context->states.common.blend); + context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster); + context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa); + + return 0; +} + +static int vlInit(struct VL_CONTEXT *context) +{ + assert(context); + + vlInitCommon(context); + vlInitCSC(context); + vlInitMC(context); + vlInitIDCT(context); + + return 0; +} + +static int vlDestroy(struct VL_CONTEXT *context) +{ + assert(context); + + /* XXX: Must unbind shaders before we can delete them for some reason */ + context->pipe->bind_vs_state(context->pipe, NULL); + context->pipe->bind_fs_state(context->pipe, NULL); + + vlDestroyCommon(context); + vlDestroyCSC(context); + vlDestroyMC(context); + vlDestroyIDCT(context); + + return 0; +} + +int vlCreateContext +( + Display *display, + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum VL_FORMAT video_format, + struct VL_CONTEXT **context +) +{ + struct VL_CONTEXT *ctx; + + assert(display); + assert(pipe); + assert(context); + + ctx = calloc(1, sizeof(struct VL_CONTEXT)); + + ctx->display = display; + ctx->pipe = pipe; + ctx->video_width = video_width; + ctx->video_height = video_height; + ctx->video_format = video_format; + + vlInit(ctx); + + /* Since we only change states in vlPutSurface() we need to start in render mode */ + vlBeginRender(ctx); + + *context = ctx; + + return 0; +} + +int vlDestroyContext(struct VL_CONTEXT *context) +{ + assert(context); + + vlDestroy(context); + + context->pipe->destroy(context->pipe); + + free(context); + + return 0; +} + +int vlBeginRender(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + /* Frame buffer set in vlRender*Macroblock() */ + /* Shaders, samplers, textures set in vlRender*Macroblock() */ + pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs); + pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems); + pipe->set_viewport_state(pipe, &context->states.mc.viewport); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf); + + return 0; +} + +int vlEndRender(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer); + pipe->set_viewport_state(pipe, &context->states.csc.viewport); + pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler); + /* Source texture set in vlPutSurface() */ + pipe->bind_vs_state(pipe, context->states.csc.vertex_shader); + pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); + pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); + pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h new file mode 100644 index 00000000000..bff318854aa --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -0,0 +1,83 @@ +#ifndef vl_context_h +#define vl_context_h + +#include <X11/Xlib.h> +#include <pipe/p_state.h> +#include "vl_types.h" + +struct pipe_context; + +struct VL_CONTEXT +{ + Display *display; + struct pipe_context *pipe; + unsigned int video_width; + unsigned int video_height; + enum VL_FORMAT video_format; + + struct + { + struct + { + struct pipe_rasterizer_state *raster; + struct pipe_depth_stencil_alpha_state *dsa; + struct pipe_blend_state *blend; + } common; + + struct + { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *sampler; + struct pipe_texture *texture; + struct pipe_texture *basis; + struct pipe_shader_state *frame_vs; + struct pipe_shader_state *frame_fs; + struct pipe_vertex_buffer *vertex_bufs[2]; + struct pipe_vertex_element *vertex_buf_elems[2]; + //struct pipe_constant_buffer vs_const_buf, fs_const_buf; + } idct; + + struct + { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *samplers[5]; + struct pipe_texture *textures[5]; + struct pipe_shader_state *i_vs, *p_vs[2], *b_vs[2]; + struct pipe_shader_state *i_fs, *p_fs[2], *b_fs[2]; + struct pipe_vertex_buffer vertex_bufs[3]; + struct pipe_vertex_element vertex_buf_elems[3]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; + } mc; + + struct + { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_sampler_state *sampler; + struct pipe_shader_state *vertex_shader, *fragment_shader; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_buf_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; + } csc; + } states; +}; + +int vlCreateContext +( + Display *display, + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum VL_FORMAT video_format, + struct VL_CONTEXT **context +); + +int vlDestroyContext(struct VL_CONTEXT *context); + +int vlBeginRender(struct VL_CONTEXT *context); +int vlEndRender(struct VL_CONTEXT *context); + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c new file mode 100644 index 00000000000..7e6ee8ac12f --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -0,0 +1,181 @@ +#include "vl_data.h" + +/* + * Represents 8 triangles (4 quads, 1 per block) in noormalized coords + * that render a macroblock. + * Need to be scaled to cover mbW*mbH macroblock pixels and translated into + * position on target surface. + */ +const struct VL_VERTEX2F vl_mb_vertex_positions[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, + {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, + + {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, + {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, + + {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 4 luma blocks arranged + * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. + */ +const struct VL_TEXCOORD2F vl_luma_texcoords[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, + + {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, + {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, + + {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, + {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 1 chroma block. + * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. + */ +const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; + +/* + * Represents texcoords for the above for rendering 2 chroma blocks arranged + * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. + * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. + */ +const struct VL_TEXCOORD2F *vl_chroma_422_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; + +/* + * Represents texcoords for the above for rendering 4 chroma blocks. + * Same case as 4 luma blocks. + */ +const struct VL_TEXCOORD2F *vl_chroma_444_texcoords = vl_luma_texcoords; + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +const struct VL_VERTEX2F vl_surface_vertex_positions[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + */ +const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_surface_vertex_positions; + +/* + * Used when rendering P and B macroblocks, multiplier is applied to the A channel, + * which is then added to the L channel, then the bias is subtracted from that to + * get back the differential. The differential is then added to the samples from the + * reference surface(s). + */ +const struct VL_MC_FS_CONSTS vl_mc_fs_consts = +{ + {256.0f, 256.0f, 256.0f, 0.0f}, + {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +/* + * Identity color conversion constants, for debugging + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h new file mode 100644 index 00000000000..8f347273ad7 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_data.h @@ -0,0 +1,24 @@ +#ifndef vl_data_h +#define vl_data_h + +#include "vl_types.h" + +extern const struct VL_VERTEX2F vl_mb_vertex_positions[24]; +extern const struct VL_TEXCOORD2F vl_luma_texcoords[24]; +extern const struct VL_TEXCOORD2F *vl_chroma_420_texcoords; +extern const struct VL_TEXCOORD2F *vl_chroma_422_texcoords; +extern const struct VL_TEXCOORD2F *vl_chroma_444_texcoords; + +extern const struct VL_VERTEX2F vl_surface_vertex_positions[4]; +extern const struct VL_TEXCOORD2F *vl_surface_texcoords; + +extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts; + +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full; + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_defs.h b/src/gallium/state_trackers/g3dvl/vl_defs.h new file mode 100644 index 00000000000..e668a7a10e0 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_defs.h @@ -0,0 +1,12 @@ +#ifndef vl_defs_h +#define vl_defs_h + +#define VL_BLOCK_WIDTH 8 +#define VL_BLOCK_HEIGHT 8 +#define VL_BLOCK_SIZE (VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT) +#define VL_MACROBLOCK_WIDTH 16 +#define VL_MACROBLOCK_HEIGHT 16 +#define VL_MACROBLOCK_SIZE (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT) + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c new file mode 100644 index 00000000000..365ad697257 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -0,0 +1,204 @@ +#include "vl_shader_build.h" +#include <assert.h> +#include <tgsi/util/tgsi_parse.h> +#include <tgsi/util/tgsi_build.h> + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.Declaration.Interpolate = interpolation;; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = src_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = tex; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.FullSrcRegisters[2].SrcRegister.File = src3_file; + inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h new file mode 100644 index 00000000000..9e64bbeeae0 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -0,0 +1,61 @@ +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include <pipe/p_shader_tokens.h> + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c new file mode 100644 index 00000000000..145ea32892a --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -0,0 +1,726 @@ +#include "vl_surface.h" +#include <assert.h> +#include <stdlib.h> +#include <pipe/p_context.h> +#include <pipe/p_state.h> +#include <pipe/p_format.h> +#include <pipe/p_inlines.h> +#include "vl_context.h" +#include "vl_defs.h" +#include "vl_util.h" + +static int vlTransformBlock(short *src, short *dst, short bias) +{ + static const float basis[8][8] = + { + {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, + {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, + {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, + {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, + {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, + {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, + {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, + {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} + }; + + unsigned int x, y; + short tmp[64]; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + tmp[y * VL_BLOCK_WIDTH + x] = (short) + ( + src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + + src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + + src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + + src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + + src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + + src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + + src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + + src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] + ); + + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + { + dst[y * VL_BLOCK_WIDTH + x] = bias + (short) + ( + tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + + tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + + tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + + tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + + tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + + tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + + tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + + tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] + ); + if (dst[y * VL_BLOCK_WIDTH + x] > 255) + dst[y * VL_BLOCK_WIDTH + x] = 255; + else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) + dst[y * VL_BLOCK_WIDTH + x] = 0; + } + return 0; +} + +static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + return 0; +} + +static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch + x] = 0x100; + + return 0; +} + +static int vlGrabBlocks +( + struct VL_CONTEXT *context, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + enum VL_SAMPLE_TYPE sample_type, + short *blocks +) +{ + struct pipe_surface *tex_surface; + short *texels; + unsigned int tex_pitch; + unsigned int tb, sb = 0; + + const int do_idct = 1; + short temp_block[64]; + + assert(context); + assert(blocks); + + tex_surface = context->pipe->screen->get_tex_surface + ( + context->pipe->screen, + context->states.mc.textures[0], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + for (tb = 0; tb < 4; ++tb) + { + if ((coded_block_pattern >> (5 - tb)) & 1) + { + if (dct_type == VL_DCT_FRAME_CODED) + if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFrameCodedFullBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + } + else + vlGrabFrameCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFrameCodedDiffBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + } + else + vlGrabFrameCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + else + if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFieldCodedFullBlock + ( + temp_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + } + else + vlGrabFieldCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFieldCodedDiffBlock + ( + temp_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + } + else + vlGrabFieldCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + ++sb; + } + else + vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); + } + + pipe_surface_unmap(tex_surface); + + /* TODO: Implement 422, 444 */ + for (tb = 0; tb < 2; ++tb) + { + tex_surface = context->pipe->screen->get_tex_surface + ( + context->pipe->screen, + context->states.mc.textures[tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + if ((coded_block_pattern >> (1 - tb)) & 1) + { + if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFrameCodedFullBlock + ( + temp_block, + texels, + tex_pitch + ); + } + else + vlGrabFrameCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels, + tex_pitch + ); + else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFrameCodedDiffBlock + ( + temp_block, + texels, + tex_pitch + ); + } + else + vlGrabFrameCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels, tex_pitch); + + pipe_surface_unmap(tex_surface); + } + + return 0; +} + +int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) +{ + struct pipe_context *pipe; + struct pipe_texture template; + struct VL_SURFACE *sfc; + + assert(context); + assert(surface); + + pipe = context->pipe; + + sfc = calloc(1, sizeof(struct VL_SURFACE)); + + sfc->context = context; + sfc->width = vlRoundUpPOT(context->video_width); + sfc->height = vlRoundUpPOT(context->video_height); + sfc->format = context->video_format; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = sfc->width; + template.height[0] = sfc->height; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + /* XXX: Needed? */ + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + sfc->texture = pipe->screen->texture_create(pipe->screen, &template); + + *surface = sfc; + + return 0; +} + +int vlDestroySurface(struct VL_SURFACE *surface) +{ + assert(surface); + pipe_texture_release(&surface->texture); + free(surface); + + return 0; +} + +int vlRenderIMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *surface +) +{ + struct pipe_context *pipe; + struct VL_MC_VS_CONSTS *vs_consts; + + assert(blocks); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != VL_FRAME_PICTURE) + return 0; + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); + + pipe = surface->context->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.mc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); + + surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); + pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures); + pipe->bind_sampler_states(pipe, 3, (void**)surface->context->states.mc.samplers); + pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); + pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderPMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *ref_surface, + struct VL_SURFACE *surface +) +{ + struct pipe_context *pipe; + struct VL_MC_VS_CONSTS *vs_consts; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != VL_FRAME_PICTURE) + return 0; + /* TODO: Implement other MC types */ + if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) + return 0; + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + + pipe = surface->context->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.mc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + + if (mc_type == VL_FIELD_MC) + { + vs_consts->denorm.x = (float)surface->width; + vs_consts->denorm.y = (float)surface->height; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]); + } + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); + + surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); + + surface->context->states.mc.textures[3] = ref_surface->texture; + pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); + pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderBMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *past_surface, + struct VL_SURFACE *future_surface, + struct VL_SURFACE *surface +) +{ + struct pipe_context *pipe; + struct VL_MC_VS_CONSTS *vs_consts; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != VL_FRAME_PICTURE) + return 0; + /* TODO: Implement other MC types */ + if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) + return 0; + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + + pipe = surface->context->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.mc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[1].top_field.z = 0.0f; + vs_consts->mb_tc_trans[1].top_field.w = 0.0f; + + if (mc_type == VL_FIELD_MC) + { + vs_consts->denorm.x = (float)surface->width; + vs_consts->denorm.y = (float)surface->height; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]); + } + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); + + surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); + + surface->context->states.mc.textures[3] = past_surface->texture; + surface->context->states.mc.textures[4] = future_surface->texture; + pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); + pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlPutSurface +( + struct VL_SURFACE *surface, + Drawable drawable, + unsigned int srcx, + unsigned int srcy, + unsigned int srcw, + unsigned int srch, + unsigned int destx, + unsigned int desty, + unsigned int destw, + unsigned int desth, + enum VL_PICTURE picture_type +) +{ + unsigned int create_fb = 0; + struct pipe_context *pipe; + struct VL_CSC_VS_CONSTS *vs_consts; + + assert(surface); + + pipe = surface->context->pipe; + + if (!surface->context->states.csc.framebuffer.cbufs[0]) + create_fb = 1; + else if + ( + surface->context->states.csc.framebuffer.width != destw || + surface->context->states.csc.framebuffer.height != desth + ) + { + pipe->winsys->surface_release + ( + pipe->winsys, + &surface->context->states.csc.framebuffer.cbufs[0] + ); + + create_fb = 1; + } + + if (create_fb) + { + surface->context->states.csc.viewport.scale[0] = destw; + surface->context->states.csc.viewport.scale[1] = desth; + surface->context->states.csc.viewport.scale[2] = 1; + surface->context->states.csc.viewport.scale[3] = 1; + surface->context->states.csc.viewport.translate[0] = 0; + surface->context->states.csc.viewport.translate[1] = 0; + surface->context->states.csc.viewport.translate[2] = 0; + surface->context->states.csc.viewport.translate[3] = 0; + + surface->context->states.csc.framebuffer.width = destw; + surface->context->states.csc.framebuffer.height = desth; + surface->context->states.csc.framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); + pipe->winsys->surface_alloc_storage + ( + pipe->winsys, + surface->context->states.csc.framebuffer.cbufs[0], + destw, + desth, + PIPE_FORMAT_A8R8G8B8_UNORM, + /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ + PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, + 0 + ); + } + + vlEndRender(surface->context); + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.csc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->src_scale.x = srcw / (float)surface->width; + vs_consts->src_scale.y = srch / (float)surface->height; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = srcx / (float)surface->width; + vs_consts->src_trans.y = srcy / (float)surface->height; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.csc.vs_const_buf.buffer); + + pipe->set_sampler_textures(pipe, 1, &surface->texture); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + /* TODO: Need to take destx, desty into consideration */ + pipe->winsys->flush_frontbuffer + ( + pipe->winsys, + surface->context->states.csc.framebuffer.cbufs[0], + &drawable + ); + + vlBeginRender(surface->context); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h new file mode 100644 index 00000000000..9f56b77e1e7 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -0,0 +1,81 @@ +#ifndef vl_surface_h +#define vl_surface_h + +#include <X11/Xlib.h> +#include "vl_types.h" + +struct pipe_texture; + +struct VL_SURFACE +{ + struct VL_CONTEXT *context; + unsigned int width; + unsigned int height; + enum VL_FORMAT format; + struct pipe_texture *texture; +}; + +int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface); + +int vlDestroySurface(struct VL_SURFACE *surface); + +int vlRenderIMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *surface +); + +int vlRenderPMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *ref_surface, + struct VL_SURFACE *surface +); + +int vlRenderBMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *past_surface, + struct VL_SURFACE *future_surface, + struct VL_SURFACE *surface +); + +int vlPutSurface +( + struct VL_SURFACE *surface, + Drawable drawable, + unsigned int srcx, + unsigned int srcy, + unsigned int srcw, + unsigned int srch, + unsigned int destx, + unsigned int desty, + unsigned int destw, + unsigned int desth, + enum VL_PICTURE picture_type +); + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h new file mode 100644 index 00000000000..4d210c9e0aa --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -0,0 +1,102 @@ +#ifndef vl_types_h +#define vl_types_h + +enum VL_FORMAT +{ + VL_FORMAT_YCBCR_420, + VL_FORMAT_YCBCR_422, + VL_FORMAT_YCBCR_444 +}; + +enum VL_PICTURE +{ + VL_TOP_FIELD, + VL_BOTTOM_FIELD, + VL_FRAME_PICTURE +}; + +enum VL_FIELD_ORDER +{ + VL_FIELD_FIRST, + VL_FIELD_SECOND +}; + +enum VL_DCT_TYPE +{ + VL_DCT_FIELD_CODED, + VL_DCT_FRAME_CODED +}; + +enum VL_SAMPLE_TYPE +{ + VL_FULL_SAMPLE, + VL_DIFFERENCE_SAMPLE +}; + +enum VL_MC_TYPE +{ + VL_FIELD_MC, + VL_FRAME_MC, + VL_DUAL_PRIME_MC, + VL_16x8_MC = VL_FRAME_MC +}; + +struct VL_VERTEX4F +{ + float x, y, z, w; +}; + +struct VL_VERTEX2F +{ + float x, y; +}; + +struct VL_TEXCOORD2F +{ + float s, t; +}; + +struct VL_MC_VS_CONSTS +{ + struct VL_VERTEX4F scale; + struct VL_VERTEX4F mb_pos_trans; + struct VL_VERTEX4F denorm; + struct + { + struct VL_VERTEX4F top_field; + struct VL_VERTEX4F bottom_field; + } mb_tc_trans[2]; +}; + +struct VL_MC_FS_CONSTS +{ + struct VL_VERTEX4F multiplier; + struct VL_VERTEX4F bias; + struct VL_VERTEX4F y_divider; +}; + +struct VL_CSC_VS_CONSTS +{ + struct VL_VERTEX4F src_scale; + struct VL_VERTEX4F src_trans; +}; + +struct VL_CSC_FS_CONSTS +{ + struct VL_VERTEX4F bias; + float matrix[16]; +}; + +struct VL_MOTION_VECTOR +{ + struct + { + int x, y; + } top_field, bottom_field; +}; + +struct VL_CONTEXT; +struct VL_SURFACE; + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c new file mode 100644 index 00000000000..2421ae22101 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.c @@ -0,0 +1,17 @@ +#include "vl_util.h" +#include <assert.h> + +unsigned int vlRoundUpPOT(unsigned int x) +{ + unsigned int i; + + assert(x > 0); + + --x; + + for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1) + x |= x >> i; + + return x + 1; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h new file mode 100644 index 00000000000..e4b72c4f870 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.h @@ -0,0 +1,7 @@ +#ifndef vl_util_h +#define vl_util_h + +unsigned int vlRoundUpPOT(unsigned int x); + +#endif + |