diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
113 files changed, 6592 insertions, 16536 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 9e4ff112dc3..005460f3547 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -5,25 +5,30 @@ include $(TOP)/configs/current LIBNAME = i965_dri.so DRIVER_SOURCES = \ - bufmgr_fake.c \ intel_batchbuffer.c \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ intel_context.c \ - intel_ioctl.c \ + intel_decode.c \ + intel_depthstencil.c \ + intel_fbo.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ intel_span.c \ - intel_pixel_copy.c \ + intel_pixel.c \ intel_pixel_bitmap.c \ + intel_pixel_copy.c \ + intel_pixel_draw.c \ intel_state.c \ intel_tex.c \ + intel_tex_copy.c \ + intel_tex_format.c \ + intel_tex_image.c \ intel_tex_layout.c \ + intel_tex_subimage.c \ intel_tex_validate.c \ - brw_aub.c \ - brw_aub_playback.c \ brw_cc.c \ brw_clip.c \ brw_clip_line.c \ @@ -44,16 +49,16 @@ DRIVER_SOURCES = \ brw_gs.c \ brw_gs_emit.c \ brw_gs_state.c \ - brw_hal.c \ brw_metaops.c \ brw_misc_state.c \ brw_program.c \ + brw_queryobj.c \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ brw_state_batch.c \ brw_state_cache.c \ - brw_state_pool.c \ + brw_state_dump.c \ brw_state_upload.c \ brw_tex.c \ brw_tex_layout.c \ @@ -70,6 +75,7 @@ DRIVER_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ + brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ @@ -84,10 +90,12 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel +DRIVER_DEFINES = -I../intel -I../intel/server -include ../Makefile.template +DRI_LIB_DEPS += -ldrm_intel -intel_tex_layout.o: ../intel/intel_tex_layout.c +include ../Makefile.template symlinks: +intel_decode.o: ../intel/intel_decode.c +intel_tex_layout.o: ../intel/intel_tex_layout.c diff --git a/src/mesa/drivers/dri/i965/brw_aub.c b/src/mesa/drivers/dri/i965/brw_aub.c deleted file mode 100644 index f851a5b7955..00000000000 --- a/src/mesa/drivers/dri/i965/brw_aub.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - -#include "brw_context.h" -#include "brw_aub.h" -#include "intel_regions.h" -#include <stdio.h> - -extern char *__progname; - - -/* Registers to control page table - */ -#define PGETBL_CTL 0x2020 -#define PGETBL_ENABLED 0x1 - -#define NR_GTT_ENTRIES 65536 /* 256 mb */ - -#define FAIL \ -do { \ - fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__); \ - exit(1); \ -} while (0) - - -/* Emit the headers at the top of each aubfile. Initialize the GTT. - */ -static void init_aubfile( FILE *aub_file ) -{ - struct aub_file_header fh; - struct aub_block_header bh; - unsigned int data; - - static int nr; - - nr++; - - /* Emit the aub header: - */ - memset(&fh, 0, sizeof(fh)); - - fh.instruction_type = AUB_FILE_HEADER; - fh.minor = 0x0; - fh.major = 0x7; - memcpy(fh.application, __progname, sizeof(fh.application)); - fh.day = (nr>>24) & 0xff; - fh.month = 0x0; - fh.year = 0x0; - fh.timezone = 0x0; - fh.second = nr & 0xff; - fh.minute = (nr>>8) & 0xff; - fh.hour = (nr>>16) & 0xff; - fh.comment_length = 0x0; - - if (fwrite(&fh, sizeof(fh), 1, aub_file) < 0) - FAIL; - - /* Setup the GTT starting at main memory address zero (!): - */ - memset(&bh, 0, sizeof(bh)); - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_MMI0_WRITE32; - bh.type = 0x0; - bh.address_space = ADDR_GTT; /* ??? */ - bh.general_state_type = 0x0; - bh.surface_state_type = 0x0; - bh.address = PGETBL_CTL; - bh.length = 0x4; - - if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) - FAIL; - - data = 0x0 | PGETBL_ENABLED; - - if (fwrite(&data, sizeof(data), 1, aub_file) < 0) - FAIL; -} - - -static void init_aub_gtt( struct brw_context *brw, - GLuint start_offset, - GLuint size ) -{ - FILE *aub_file = brw->intel.aub_file; - struct aub_block_header bh; - unsigned int i; - - assert(start_offset + size < NR_GTT_ENTRIES * 4096); - - - memset(&bh, 0, sizeof(bh)); - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_DATA_WRITE; - bh.type = 0x0; - bh.address_space = ADDR_MAIN; - bh.general_state_type = 0x0; - bh.surface_state_type = 0x0; - bh.address = start_offset / 4096 * 4; - bh.length = size / 4096 * 4; - - if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) - FAIL; - - for (i = 0; i < size / 4096; i++) { - GLuint data = brw->next_free_page | 1; - - brw->next_free_page += 4096; - - if (fwrite(&data, sizeof(data), 1, aub_file) < 0) - FAIL; - } - -} - -static void write_block_header( FILE *aub_file, - struct aub_block_header *bh, - const GLuint *data, - GLuint sz ) -{ - sz = (sz + 3) & ~3; - - if (fwrite(bh, sizeof(*bh), 1, aub_file) < 0) - FAIL; - - if (fwrite(data, sz, 1, aub_file) < 0) - FAIL; - - fflush(aub_file); -} - - -static void write_dump_bmp( FILE *aub_file, - struct aub_dump_bmp *db ) -{ - if (fwrite(db, sizeof(*db), 1, aub_file) < 0) - FAIL; - - fflush(aub_file); -} - - - -static void brw_aub_gtt_data( struct intel_context *intel, - GLuint offset, - const void *data, - GLuint sz, - GLuint type, - GLuint state_type ) -{ - struct aub_block_header bh; - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_DATA_WRITE; - bh.type = type; - bh.address_space = ADDR_GTT; - bh.pad0 = 0; - - if (type == DW_GENERAL_STATE) { - bh.general_state_type = state_type; - bh.surface_state_type = 0; - } - else { - bh.general_state_type = 0; - bh.surface_state_type = state_type; - } - - bh.pad1 = 0; - bh.address = offset; - bh.length = sz; - - write_block_header(intel->aub_file, &bh, data, sz); -} - - - -static void brw_aub_gtt_cmds( struct intel_context *intel, - GLuint offset, - const void *data, - GLuint sz ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - struct aub_block_header bh; - GLuint type = CW_PRIMARY_RING_A; - - - bh.instruction_type = AUB_BLOCK_HEADER; - bh.operation = BH_COMMAND_WRITE; - bh.type = type; - bh.address_space = ADDR_GTT; - bh.pad0 = 0; - bh.general_state_type = 0; - bh.surface_state_type = 0; - bh.pad1 = 0; - bh.address = offset; - bh.length = sz; - - write_block_header(brw->intel.aub_file, &bh, data, sz); -} - -static void brw_aub_dump_bmp( struct intel_context *intel, - GLuint buffer ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - intelScreenPrivate *intelScreen = brw->intel.intelScreen; - struct aub_dump_bmp db; - GLuint format; - - if (intelScreen->cpp == 4) - format = 0x7; - else - format = 0x3; - - - if (buffer == 0) { - db.instruction_type = AUB_DUMP_BMP; - db.xmin = 0; - db.ymin = 0; - db.format = format; - db.bpp = intelScreen->cpp * 8; - db.pitch = intelScreen->front.pitch / intelScreen->cpp; - db.xsize = intelScreen->width; - db.ysize = intelScreen->height; - db.addr = intelScreen->front.offset; - db.unknown = 0x0; /* 4: xmajor tiled, 0: not tiled */ - - write_dump_bmp(brw->intel.aub_file, &db); - } - else { - db.instruction_type = AUB_DUMP_BMP; - db.xmin = 0; - db.ymin = 0; - db.format = format; - db.bpp = intel->back_region->cpp * 8; - db.pitch = intel->back_region->pitch; - db.xsize = intel->back_region->pitch; - db.ysize = intel->back_region->height; - db.addr = intelScreen->back.offset; - db.unknown = intel->back_region->tiled ? 0x4 : 0x0; - - write_dump_bmp(brw->intel.aub_file, &db); - } -} - -/* Attempt to prevent monster aubfiles by closing and reopening when - * the state pools wrap. - */ -static void brw_aub_wrap( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - if (intel->aub_file) { - brw_aub_destroy(brw); - brw_aub_init(brw); - } - brw->wrap = 1; /* ??? */ -} - - -int brw_aub_init( struct brw_context *brw ) -{ - struct intel_context *intel = &brw->intel; - intelScreenPrivate *intelScreen = intel->intelScreen; - char filename[80]; - int val; - static int i = 0; - - i++; - - if (_mesa_getenv("INTEL_REPLAY")) - return 0; - - if (_mesa_getenv("INTEL_AUBFILE")) { - val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4); - _mesa_printf("--> Aub file: %s\n", filename); - brw->intel.aub_file = fopen(filename, "w"); - } - else if (_mesa_getenv("INTEL_AUB")) { - val = snprintf(filename, sizeof(filename), "%s.aub", __progname); - if (val < 0 || val > sizeof(filename)) - strcpy(filename, "default.aub"); - - _mesa_printf("--> Aub file: %s\n", filename); - brw->intel.aub_file = fopen(filename, "w"); - } - else { - return 0; - } - - if (!brw->intel.aub_file) { - _mesa_printf("couldn't open aubfile\n"); - exit(1); - } - - brw->intel.vtbl.aub_commands = brw_aub_gtt_cmds; - brw->intel.vtbl.aub_dump_bmp = brw_aub_dump_bmp; - brw->intel.vtbl.aub_gtt_data = brw_aub_gtt_data; - brw->intel.vtbl.aub_wrap = brw_aub_wrap; - - init_aubfile(brw->intel.aub_file); - - /* The GTT is located starting address zero in main memory. Pages - * to populate the gtt start after this point. - */ - brw->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095; - - /* More or less correspond with all the agp regions mapped by the - * driver: - */ - init_aub_gtt(brw, 0, 4096*4); /* so new fulsim doesn't crash */ - init_aub_gtt(brw, intelScreen->front.offset, intelScreen->back.size); - init_aub_gtt(brw, intelScreen->back.offset, intelScreen->back.size); - init_aub_gtt(brw, intelScreen->depth.offset, intelScreen->back.size); - init_aub_gtt(brw, intelScreen->tex.offset, intelScreen->tex.size); - - return 0; -} - -void brw_aub_destroy( struct brw_context *brw ) -{ - if (brw->intel.aub_file) { - fclose(brw->intel.aub_file); - brw->intel.aub_file = NULL; - } -} diff --git a/src/mesa/drivers/dri/i965/brw_aub.h b/src/mesa/drivers/dri/i965/brw_aub.h deleted file mode 100644 index 198e36dc3c0..00000000000 --- a/src/mesa/drivers/dri/i965/brw_aub.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - -#ifndef BRW_AUB_H -#define BRW_AUB_H - -struct aub_file_header { - unsigned int instruction_type; - unsigned int pad0:16; - unsigned int minor:8; - unsigned int major:8; - unsigned char application[8*4]; - unsigned int day:8; - unsigned int month:8; - unsigned int year:16; - unsigned int timezone:8; - unsigned int second:8; - unsigned int minute:8; - unsigned int hour:8; - unsigned int comment_length:16; - unsigned int pad1:16; -}; - -struct aub_block_header { - unsigned int instruction_type; - unsigned int operation:8; - unsigned int type:8; - unsigned int address_space:8; - unsigned int pad0:8; - unsigned int general_state_type:8; - unsigned int surface_state_type:8; - unsigned int pad1:16; - unsigned int address; - unsigned int length; -}; - -struct aub_dump_bmp { - unsigned int instruction_type; - unsigned int xmin:16; - unsigned int ymin:16; - unsigned int pitch:16; - unsigned int bpp:8; - unsigned int format:8; - unsigned int xsize:16; - unsigned int ysize:16; - unsigned int addr; - unsigned int unknown; -}; - -enum bh_operation { - BH_COMMENT, - BH_DATA_WRITE, - BH_COMMAND_WRITE, - BH_MMI0_WRITE32, - BH_END_SCENE, - BH_CONFIG_MEMORY_MAP, - BH_MAX_OPERATION -}; - -enum command_write_type { - CW_HWB_RING = 1, - CW_PRIMARY_RING_A, - CW_PRIMARY_RING_B, /* XXX - disagreement with listaub! */ - CW_PRIMARY_RING_C, - CW_MAX_TYPE -}; - -enum data_write_type { - DW_NOTYPE, - DW_BATCH_BUFFER, - DW_BIN_BUFFER, - DW_BIN_POINTER_LIST, - DW_SLOW_STATE_BUFFER, - DW_VERTEX_BUFFER, - DW_2D_MAP, - DW_CUBE_MAP, - DW_INDIRECT_STATE_BUFFER, - DW_VOLUME_MAP, - DW_1D_MAP, - DW_CONSTANT_BUFFER, - DW_CONSTANT_URB_ENTRY, - DW_INDEX_BUFFER, - DW_GENERAL_STATE, - DW_SURFACE_STATE, - DW_MEDIA_OBJECT_INDIRECT_DATA, - DW_MAX_TYPE -}; - -enum data_write_general_state_type { - DWGS_NOTYPE, - DWGS_VERTEX_SHADER_STATE, - DWGS_GEOMETRY_SHADER_STATE , - DWGS_CLIPPER_STATE, - DWGS_STRIPS_FANS_STATE, - DWGS_WINDOWER_IZ_STATE, - DWGS_COLOR_CALC_STATE, - DWGS_CLIPPER_VIEWPORT_STATE, /* was 0x7 */ - DWGS_STRIPS_FANS_VIEWPORT_STATE, - DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */ - DWGS_SAMPLER_STATE, - DWGS_KERNEL_INSTRUCTIONS, - DWGS_SCRATCH_SPACE, - DWGS_SAMPLER_DEFAULT_COLOR, - DWGS_INTERFACE_DESCRIPTOR, - DWGS_VLD_STATE, - DWGS_VFE_STATE, - DWGS_MAX_TYPE -}; - -enum data_write_surface_state_type { - DWSS_NOTYPE, - DWSS_BINDING_TABLE_STATE, - DWSS_SURFACE_STATE, - DWSS_MAX_TYPE -}; - -enum memory_map_type { - MM_DEFAULT, - MM_DYNAMIC, - MM_MAX_TYPE -}; - -enum address_space { - ADDR_GTT, - ADDR_LOCAL, - ADDR_MAIN, - ADDR_MAX -}; - - -#define AUB_FILE_HEADER 0xe085000b -#define AUB_BLOCK_HEADER 0xe0c10003 -#define AUB_DUMP_BMP 0xe09e0004 - -struct brw_context; -struct intel_context; - -int brw_aub_init( struct brw_context *brw ); -void brw_aub_destroy( struct brw_context *brw ); - -int brw_playback_aubfile(struct brw_context *brw, - const char *filename); - -#endif diff --git a/src/mesa/drivers/dri/i965/brw_aub_playback.c b/src/mesa/drivers/dri/i965/brw_aub_playback.c deleted file mode 100644 index 2433d50c116..00000000000 --- a/src/mesa/drivers/dri/i965/brw_aub_playback.c +++ /dev/null @@ -1,443 +0,0 @@ - -#include <stdio.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> - -#include "brw_aub.h" -#include "brw_defines.h" -#include "brw_context.h" -#include "intel_ioctl.h" -#include "bufmgr.h" - -struct aub_state { - struct intel_context *intel; - const char *map; - unsigned int csr; - unsigned int sz; -}; - - -static int gobble( struct aub_state *s, int size ) -{ - if (s->csr + size > s->sz) { - _mesa_printf("EOF in %s\n", __FUNCTION__); - return 1; - } - - s->csr += size; - return 0; -} - -static void flush_and_fence( struct aub_state *s ) -{ - struct intel_context *intel = s->intel; - GLuint buf[2]; - - buf[0] = intel->vtbl.flush_cmd(); - buf[1] = 0; - - intel_cmd_ioctl(intel, (char *)&buf, sizeof(buf)); - - intelWaitIrq( intel, intelEmitIrqLocked( intel )); -} - -static void flush_cmds( struct aub_state *s, - const void *data, - int len ) -{ - DBG("%s %d\n", __FUNCTION__, len); - - if (len & 0x4) { - unsigned int *tmp = malloc(len + 4); - DBG("padding to octword\n"); - memcpy(tmp, data, len); - tmp[len/4] = MI_NOOP; - flush_cmds(s, tmp, len+4); - free(tmp); - return; - } - - /* For ring data, just send off immediately via an ioctl. - * This differs slightly from how the stream was executed - * initially as this would have been a batchbuffer. - */ - intel_cmd_ioctl(s->intel, (void *)data, len); - - if (1) - flush_and_fence(s); -} - -static const char *pstrings[] = { - "none", - "POINTLIST", - "LINELIST", - "LINESTRIP", - "TRILIST", - "TRISTRIP", - "TRIFAN", - "QUADLIST", - "QUADSTRIP", - "LINELIST_ADJ", - "LINESTRIP_ADJ", - "TRILIST_ADJ", - "TRISTRIP_ADJ", - "TRISTRIP_REVERSE", - "POLYGON", - "RECTLIST", - "LINELOOP", - "POINTLIST_BF", - "LINESTRIP_CONT", - "LINESTRIP_BF", - "LINESTRIP_CONT_BF", - "TRIFAN_NOSTIPPLE", -}; - -static void do_3d_prim( struct aub_state *s, - const void *data, - int len ) -{ - struct brw_3d_primitive prim; - const struct brw_3d_primitive *orig = data; - int i; - - assert(len == sizeof(prim)); - memcpy(&prim, data, sizeof(prim)); - -#define START 0 -#define BLOCK (12*28) - - if (orig->verts_per_instance < BLOCK) - flush_cmds(s, &prim, sizeof(prim)); - else { - for (i = START; i + BLOCK < orig->verts_per_instance; i += BLOCK/2) { - prim.start_vert_location = i; - prim.verts_per_instance = BLOCK; - _mesa_printf("%sprim %d/%s verts %d..%d (of %d)\n", - prim.header.indexed ? "INDEXED " : "", - prim.header.topology, pstrings[prim.header.topology%16], - prim.start_vert_location, - prim.start_vert_location + prim.verts_per_instance, - orig->verts_per_instance); - flush_cmds(s, &prim, sizeof(prim)); - } - } -} - - - -static struct { - int cmd; - const char *name; - int has_length; -} cmd_info[] = { - { 0, "NOOP", 0 }, - { 0x5410, "XY_COLOR_BLT_RGB", 1 }, - { 0x5430, "XY_COLOR_BLT_RGBA", 1 }, - { 0x54d0, "XY_SRC_COPY_BLT_RGB", 1 }, - { 0x54f0, "XY_SRC_COPY_BLT_RGBA", 1 }, - { CMD_URB_FENCE, "URB_FENCE", 1 }, - { CMD_CONST_BUFFER_STATE, "CONST_BUFFER_STATE", 1 }, - { CMD_CONST_BUFFER, "CONST_BUFFER", 1 }, - { CMD_STATE_BASE_ADDRESS, "STATE_BASE_ADDRESS", 1 }, - { CMD_STATE_INSN_POINTER, "STATE_INSN_POINTER", 1 }, - { CMD_PIPELINE_SELECT, "PIPELINE_SELECT", 0, }, - { CMD_PIPELINED_STATE_POINTERS, "PIPELINED_STATE_POINTERS", 1 }, - { CMD_BINDING_TABLE_PTRS, "BINDING_TABLE_PTRS", 1 }, - { CMD_VERTEX_BUFFER, "VERTEX_BUFFER", 1 }, - { CMD_VERTEX_ELEMENT, "VERTEX_ELEMENT", 1 }, - { CMD_INDEX_BUFFER, "INDEX_BUFFER", 1 }, - { CMD_VF_STATISTICS, "VF_STATISTICS", 0 }, - { CMD_DRAW_RECT, "DRAW_RECT", 1 }, - { CMD_BLEND_CONSTANT_COLOR, "BLEND_CONSTANT_COLOR", 1 }, - { CMD_CHROMA_KEY, "CHROMA_KEY", 1 }, - { CMD_DEPTH_BUFFER, "DEPTH_BUFFER", 1 }, - { CMD_POLY_STIPPLE_OFFSET, "POLY_STIPPLE_OFFSET", 1 }, - { CMD_POLY_STIPPLE_PATTERN, "POLY_STIPPLE_PATTERN", 1 }, - { CMD_LINE_STIPPLE_PATTERN, "LINE_STIPPLE_PATTERN", 1 }, - { CMD_GLOBAL_DEPTH_OFFSET_CLAMP, "GLOBAL_DEPTH_OFFSET_CLAMP", 1 }, - { CMD_PIPE_CONTROL, "PIPE_CONTROL", 1 }, - { CMD_MI_FLUSH, "MI_FLUSH", 0 }, - { CMD_3D_PRIM, "3D_PRIM", 1 }, -}; - -#define NR_CMDS (sizeof(cmd_info)/sizeof(cmd_info[0])) - - -static int find_command( unsigned int cmd ) -{ - int i; - - for (i = 0; i < NR_CMDS; i++) - if (cmd == cmd_info[i].cmd) - return i; - - return -1; -} - - - -static int parse_commands( struct aub_state *s, - const unsigned int *data, - int len ) -{ - while (len) { - int cmd = data[0] >> 16; - int dwords; - int i; - - i = find_command(cmd); - - if (i < 0) { - _mesa_printf("couldn't find info for cmd %x\n", cmd); - return 1; - } - - if (cmd_info[i].has_length) - dwords = (data[0] & 0xff) + 2; - else - dwords = 1; - - _mesa_printf("%s (%d dwords) 0x%x\n", cmd_info[i].name, dwords, data[0]); - - if (len < dwords * 4) { - _mesa_printf("EOF in %s (%d bytes)\n", __FUNCTION__, len); - return 1; - } - - - if (0 && cmd == CMD_3D_PRIM) - do_3d_prim(s, data, dwords * 4); - else - flush_cmds(s, data, dwords * 4); - - data += dwords; - len -= dwords * 4; - } - - return 0; -} - - - -static void parse_data_write( struct aub_state *s, - const struct aub_block_header *bh, - void *dest, - const unsigned int *data, - int len ) -{ - switch (bh->type) { - case DW_GENERAL_STATE: - switch (bh->general_state_type) { - case DWGS_VERTEX_SHADER_STATE: { - struct brw_vs_unit_state vs; - assert(len == sizeof(vs)); - - _mesa_printf("DWGS_VERTEX_SHADER_STATE\n"); - memcpy(&vs, data, sizeof(vs)); - -/* vs.vs6.vert_cache_disable = 1; */ -/* vs.thread4.max_threads = 4; */ - - memcpy(dest, &vs, sizeof(vs)); - return; - } - case DWGS_CLIPPER_STATE: { - struct brw_clip_unit_state clip; - assert(len == sizeof(clip)); - - _mesa_printf("DWGS_CLIPPER_STATE\n"); - memcpy(&clip, data, sizeof(clip)); - -/* clip.thread4.max_threads = 0; */ -/* clip.clip5.clip_mode = BRW_CLIPMODE_REJECT_ALL; */ - - memcpy(dest, &clip, sizeof(clip)); - return; - } - - case DWGS_NOTYPE: - case DWGS_GEOMETRY_SHADER_STATE: - case DWGS_STRIPS_FANS_STATE: - break; - - case DWGS_WINDOWER_IZ_STATE: { - struct brw_wm_unit_state wm; - assert(len == sizeof(wm)); - - _mesa_printf("DWGS_WINDOWER_IZ_STATE\n"); - memcpy(&wm, data, sizeof(wm)); - -/* wm.wm5.max_threads = 10; */ - - memcpy(dest, &wm, sizeof(wm)); - return; - } - - case DWGS_COLOR_CALC_STATE: - case DWGS_CLIPPER_VIEWPORT_STATE: - case DWGS_STRIPS_FANS_VIEWPORT_STATE: - case DWGS_COLOR_CALC_VIEWPORT_STATE: - case DWGS_SAMPLER_STATE: - case DWGS_KERNEL_INSTRUCTIONS: - case DWGS_SCRATCH_SPACE: - case DWGS_SAMPLER_DEFAULT_COLOR: - case DWGS_INTERFACE_DESCRIPTOR: - case DWGS_VLD_STATE: - case DWGS_VFE_STATE: - default: - break; - } - break; - case DW_SURFACE_STATE: - break; - case DW_1D_MAP: - case DW_2D_MAP: - case DW_CUBE_MAP: - case DW_VOLUME_MAP: - case DW_CONSTANT_BUFFER: - case DW_CONSTANT_URB_ENTRY: - case DW_VERTEX_BUFFER: - case DW_INDEX_BUFFER: - default: - break; - } - - memcpy(dest, data, len); -} - - -/* In order to work, the memory layout has to be the same as the X - * server which created the aubfile. - */ -static int parse_block_header( struct aub_state *s ) -{ - struct aub_block_header *bh = (struct aub_block_header *)(s->map + s->csr); - void *data = (void *)(bh + 1); - unsigned int len = (bh->length + 3) & ~3; - - _mesa_printf("block header at 0x%x\n", s->csr); - - if (s->csr + len + sizeof(*bh) > s->sz) { - _mesa_printf("EOF in data in %s\n", __FUNCTION__); - return 1; - } - - if (bh->address_space == ADDR_GTT) { - - switch (bh->operation) - { - case BH_DATA_WRITE: { - void *dest = bmFindVirtual( s->intel, bh->address, len ); - if (dest == NULL) { - _mesa_printf("Couldn't find virtual address for offset %x\n", bh->address); - return 1; - } - -#if 1 - parse_data_write(s, bh, dest, data, len); -#else - memcpy(dest, data, len); -#endif - break; - } - case BH_COMMAND_WRITE: -#if 0 - intel_cmd_ioctl(s->intel, (void *)data, len); -#else - if (parse_commands(s, data, len) != 0) - _mesa_printf("parse_commands failed\n"); -#endif - break; - default: - break; - } - } - - s->csr += sizeof(*bh) + len; - return 0; -} - - -#define AUB_FILE_HEADER 0xe085000b -#define AUB_BLOCK_HEADER 0xe0c10003 -#define AUB_DUMP_BMP 0xe09e0004 - -int brw_playback_aubfile(struct brw_context *brw, - const char *filename) -{ - struct intel_context *intel = &brw->intel; - struct aub_state state; - struct stat sb; - int fd; - int retval = 0; - - state.intel = intel; - - fd = open(filename, O_RDONLY, 0); - if (fd < 0) { - _mesa_printf("couldn't open aubfile: %s\n", filename); - return 1; - } - - if (fstat(fd, &sb) != 0) { - _mesa_printf("couldn't open %s\n", filename); - return 1; - } - - state.csr = 0; - state.sz = sb.st_size; - state.map = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - - if (state.map == NULL) { - _mesa_printf("couldn't mmap %s\n", filename); - return 1; - } - - LOCK_HARDWARE(intel); - { - /* Make sure we don't confuse anything that might happen to be - * going on with the hardware: - */ -/* bmEvictAll(intel); */ -/* intel->vtbl.lost_hardware(intel); */ - - - /* Replay the aubfile item by item: - */ - while (retval == 0 && - state.csr != state.sz) { - unsigned int insn = *(unsigned int *)(state.map + state.csr); - - switch (insn) { - case AUB_FILE_HEADER: - retval = gobble(&state, sizeof(struct aub_file_header)); - break; - - case AUB_BLOCK_HEADER: - retval = parse_block_header(&state); - break; - - case AUB_DUMP_BMP: - retval = gobble(&state, sizeof(struct aub_dump_bmp)); - break; - - default: - _mesa_printf("unknown instruction %x\n", insn); - retval = 1; - break; - } - } - } - UNLOCK_HARDWARE(intel); - return retval; -} - - - - - - - diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 8a1d1527db3..fa8121e02d3 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -34,10 +34,10 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" -#include "macros.h" -#include "enums.h" +#include "main/macros.h" +#include "main/enums.h" -static void upload_cc_vp( struct brw_context *brw ) +static void prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; @@ -46,7 +46,8 @@ static void upload_cc_vp( struct brw_context *brw ) ccv.min_depth = 0.0; ccv.max_depth = 1.0; - brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); + dri_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); } const struct brw_tracked_state brw_cc_vp = { @@ -55,57 +56,148 @@ const struct brw_tracked_state brw_cc_vp = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_cc_vp + .prepare = prepare_cc_vp }; +struct brw_cc_unit_key { + GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; -static void upload_cc_unit( struct brw_context *brw ) + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; + GLclampf alpha_ref; + + GLboolean dither; + + GLboolean depth_test, depth_write; + GLenum depth_func; +}; + +static void +cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +{ + struct gl_stencil_attrib *stencil = brw->attribs.Stencil; + + memset(key, 0, sizeof(*key)); + + key->stencil = stencil->Enabled; + key->stencil_two_side = stencil->_TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = stencil->Function[0]; + key->stencil_fail_op[0] = stencil->FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = stencil->ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = stencil->ZPassFunc[0]; + key->stencil_ref[0] = stencil->Ref[0]; + key->stencil_write_mask[0] = stencil->WriteMask[0]; + key->stencil_test_mask[0] = stencil->ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = stencil->Function[1]; + key->stencil_fail_op[1] = stencil->FailFunc[1]; + key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[1]; + key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[1]; + key->stencil_ref[1] = stencil->Ref[1]; + key->stencil_write_mask[1] = stencil->WriteMask[1]; + key->stencil_test_mask[1] = stencil->ValueMask[1]; + } + + if (brw->attribs.Color->_LogicOpEnabled) + key->logic_op = brw->attribs.Color->LogicOp; + else + key->logic_op = GL_COPY; + + key->color_blend = brw->attribs.Color->BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = brw->attribs.Color->BlendEquationRGB; + key->blend_eq_a = brw->attribs.Color->BlendEquationA; + key->blend_src_rgb = brw->attribs.Color->BlendSrcRGB; + key->blend_dst_rgb = brw->attribs.Color->BlendDstRGB; + key->blend_src_a = brw->attribs.Color->BlendSrcA; + key->blend_dst_a = brw->attribs.Color->BlendDstA; + } + + key->alpha_enabled = brw->attribs.Color->AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = brw->attribs.Color->AlphaFunc; + key->alpha_ref = brw->attribs.Color->AlphaRef; + } + + key->dither = brw->attribs.Color->DitherFlag; + + key->depth_test = brw->attribs.Depth->Test; + if (key->depth_test) { + key->depth_func = brw->attribs.Depth->Func; + key->depth_write = brw->attribs.Depth->Mask; + } +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static dri_bo * +cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) { struct brw_cc_unit_state cc; - + dri_bo *bo; + memset(&cc, 0, sizeof(cc)); /* _NEW_STENCIL */ - if (brw->attribs.Stencil->Enabled) { - cc.cc0.stencil_enable = brw->attribs.Stencil->Enabled; - cc.cc0.stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[0]); - cc.cc0.stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[0]); - cc.cc0.stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[0]); - cc.cc0.stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[0]); - cc.cc1.stencil_ref = brw->attribs.Stencil->Ref[0]; - cc.cc1.stencil_write_mask = brw->attribs.Stencil->WriteMask[0]; - cc.cc1.stencil_test_mask = brw->attribs.Stencil->ValueMask[0]; - - if (brw->attribs.Stencil->TestTwoSide) { - cc.cc0.bf_stencil_enable = brw->attribs.Stencil->TestTwoSide; - cc.cc0.bf_stencil_func = intel_translate_compare_func(brw->attribs.Stencil->Function[1]); - cc.cc0.bf_stencil_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->FailFunc[1]); - cc.cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(brw->attribs.Stencil->ZFailFunc[1]); - cc.cc0.bf_stencil_pass_depth_pass_op = intel_translate_stencil_op(brw->attribs.Stencil->ZPassFunc[1]); - cc.cc1.bf_stencil_ref = brw->attribs.Stencil->Ref[1]; - cc.cc2.bf_stencil_write_mask = brw->attribs.Stencil->WriteMask[1]; - cc.cc2.bf_stencil_test_mask = brw->attribs.Stencil->ValueMask[1]; + if (key->stencil) { + cc.cc0.stencil_enable = 1; + cc.cc0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + cc.cc0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + cc.cc0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + cc.cc0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + cc.cc1.stencil_ref = key->stencil_ref[0]; + cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; + cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + cc.cc0.bf_stencil_enable = 1; + cc.cc0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + cc.cc0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + cc.cc1.bf_stencil_ref = key->stencil_ref[1]; + cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; + cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; } /* Not really sure about this: */ - if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) cc.cc0.stencil_write_enable = 1; } /* _NEW_COLOR */ - if (brw->attribs.Color->_LogicOpEnabled) { + if (key->logic_op != GL_COPY) { cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op( brw->attribs.Color->LogicOp ); - } - else if (brw->attribs.Color->BlendEnabled) { - GLenum eqRGB = brw->attribs.Color->BlendEquationRGB; - GLenum eqA = brw->attribs.Color->BlendEquationA; - GLenum srcRGB = brw->attribs.Color->BlendSrcRGB; - GLenum dstRGB = brw->attribs.Color->BlendDstRGB; - GLenum srcA = brw->attribs.Color->BlendSrcA; - GLenum dstA = brw->attribs.Color->BlendDstA; + cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -115,49 +207,78 @@ static void upload_cc_unit( struct brw_context *brw ) srcA = dstA = GL_ONE; } - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || eqA != eqRGB); } - if (brw->attribs.Color->AlphaEnabled) { + if (key->alpha_enabled) { cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(brw->attribs.Color->AlphaFunc); - - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], brw->attribs.Color->AlphaRef); - + cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); } - if (brw->attribs.Color->DitherFlag) { + if (key->dither) { cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; } /* _NEW_DEPTH */ - if (brw->attribs.Depth->Test) { - cc.cc2.depth_test = brw->attribs.Depth->Test; - cc.cc2.depth_test_function = intel_translate_compare_func(brw->attribs.Depth->Func); - cc.cc2.depth_write_enable = brw->attribs.Depth->Mask; + if (key->depth_test) { + cc.cc2.depth_test = 1; + cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); + cc.cc2.depth_write_enable = key->depth_write; } - + /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; - + cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ + if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; + cc.cc5.statistics_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, + key, sizeof(*key), + &brw->cc.vp_bo, 1, + &cc, sizeof(cc), + NULL, NULL); + + /* Emit CC viewport relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); + + return bo; +} + +static void prepare_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_key key; + + cc_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, + &key, sizeof(key), + &brw->cc.vp_bo, 1, + NULL); - brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = cc_unit_create_from_key(brw, &key); } const struct brw_tracked_state brw_cc_unit = { @@ -166,7 +287,7 @@ const struct brw_tracked_state brw_cc_unit = { .brw = 0, .cache = CACHE_NEW_CC_VP }, - .update = upload_cc_unit + .prepare = prepare_cc_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 3bec153075a..38d8b704d7e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -29,9 +29,9 @@ * Keith Whitwell <[email protected]> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "intel_batchbuffer.h" @@ -60,7 +60,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Begin the compilation: */ - brw_init_compile(&c.func); + brw_init_compile(brw, &c.func); c.func.single_program_flow = 1; @@ -119,31 +119,19 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ - brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->clip.prog_data ); + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_CLIP_PROG], - key, sizeof(*key), - &brw->clip.prog_data, - &brw->clip.prog_gs_offset); -} - - - - /* Calculate interpolants for triangle and line rasterization. */ -static void upload_clip_prog( struct brw_context *brw ) +static void upload_clip_prog(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; @@ -180,12 +168,10 @@ static void upload_clip_prog( struct brw_context *brw ) offset_front = 0; break; case GL_LINE: - key.do_unfilled = 1; fill_front = CLIP_LINE; offset_front = brw->attribs.Polygon->OffsetLine; break; case GL_POINT: - key.do_unfilled = 1; fill_front = CLIP_POINT; offset_front = brw->attribs.Polygon->OffsetPoint; break; @@ -200,22 +186,23 @@ static void upload_clip_prog( struct brw_context *brw ) offset_back = 0; break; case GL_LINE: - key.do_unfilled = 1; fill_back = CLIP_LINE; offset_back = brw->attribs.Polygon->OffsetLine; break; case GL_POINT: - key.do_unfilled = 1; fill_back = CLIP_POINT; offset_back = brw->attribs.Polygon->OffsetPoint; break; } } - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ - if (key.do_unfilled) { + if (brw->attribs.Polygon->BackMode != GL_FILL || + brw->attribs.Polygon->FrontMode != GL_FILL) { + key.do_unfilled = 1; + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; if (offset_back || offset_front) { @@ -248,7 +235,12 @@ static void upload_clip_prog( struct brw_context *brw ) } } - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data); + if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); } @@ -262,5 +254,5 @@ const struct brw_tracked_state brw_clip_prog = { .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_clip_prog + .prepare = upload_clip_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index 49b2770a514..e06747864b5 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:16; + GLuint attrs:32; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -51,7 +51,7 @@ struct brw_clip_prog_key { GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:1; + GLuint pad0:17; GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; @@ -167,4 +167,9 @@ void brw_clip_copy_colors( struct brw_clip_compile *c, void brw_clip_init_clipmask( struct brw_clip_compile *c ); +struct brw_reg get_tmp( struct brw_clip_compile *c ); + +void brw_clip_project_position(struct brw_clip_compile *c, + struct brw_reg pos ); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 83182270eac..c45d48dff8e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -29,11 +29,11 @@ * Keith Whitwell <[email protected]> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" - +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/program.h" + #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -130,6 +130,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *plane_loop; struct brw_instruction *plane_active; struct brw_instruction *is_negative; + struct brw_instruction *is_neg2; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); @@ -146,6 +147,16 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_planes(c); brw_clip_init_clipmask(c); + /* -ve rhw workaround */ + if (!BRW_IS_G4X(p->brw)) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + } + + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + plane_loop = brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) @@ -183,13 +194,20 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* Coming back in. We know that both cannot be negative * because the line would have been culled in that case. */ - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* If both are positive, do nothing */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_neg2); } brw_ENDIF(p, is_negative); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c index 2346980a562..d17b199b898 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_point.c +++ b/src/mesa/drivers/dri/i965/brw_clip_point.c @@ -29,11 +29,11 @@ * Keith Whitwell <[email protected]> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" - +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/program.h" + #include "intel_batchbuffer.h" #include "brw_defines.h" diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 1e6d6fa1762..740c7cbd109 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -32,55 +32,118 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "macros.h" +#include "main/macros.h" +struct brw_clip_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int clip_mode; + unsigned int curbe_offset; -static void upload_clip_unit( struct brw_context *brw ) -{ - struct brw_clip_unit_state clip; + unsigned int nr_urb_entries, urb_size; +}; - memset(&clip, 0, sizeof(clip)); +static void +clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) +{ + memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ - clip.thread0.grf_reg_count = ((brw->clip.prog_data->total_grf-1) & ~15) / 16; - clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; - clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; - clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; - clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + key->total_grf = brw->clip.prog_data->total_grf; + key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; + key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; + key->clip_mode = brw->clip.prog_data->clip_mode; /* BRW_NEW_CURBE_OFFSETS */ - clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + key->curbe_offset = brw->curbe.clip_start; /* BRW_NEW_URB_FENCE */ - clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; - clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */ + key->nr_urb_entries = brw->urb.nr_clip_entries; + key->urb_size = brw->urb.vsize; +} - if (INTEL_DEBUG & DEBUG_STATS) - clip.thread4.stats_enable = 1; +static dri_bo * +clip_unit_create_from_key(struct brw_context *brw, + struct brw_clip_unit_key *key) +{ + struct brw_clip_unit_state clip; + dri_bo *bo; + + memset(&clip, 0, sizeof(clip)); + + clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + /* reloc */ + clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; - /* CONSTANT */ clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip.thread1.single_program_flow = 1; + + clip.thread3.urb_entry_read_length = key->urb_entry_read_length; + clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; clip.thread3.dispatch_grf_start_reg = 1; clip.thread3.urb_entry_read_offset = 0; + + clip.thread4.nr_urb_entries = key->nr_urb_entries; + clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + clip.thread4.max_threads = 1; /* 2 threads */ + + if (INTEL_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.clip_mode = key->clip_mode; + + if (BRW_IS_G4X(brw)) + clip.clip5.negative_w_clip_test = 1; + clip.clip6.clipper_viewport_state_ptr = 0; clip.viewport_xmin = -1; clip.viewport_xmax = 1; clip.viewport_ymin = -1; clip.viewport_ymax = 1; - brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); + bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + &brw->clip.prog_bo, 1, + &clip, sizeof(clip), + NULL, NULL); + + /* Emit clip program relocation */ + assert(brw->clip.prog_bo); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + + return bo; } +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_key key; + + clip_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->clip.state_bo); + brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, + &key, sizeof(key), + &brw->clip.prog_bo, 1, + NULL); + if (brw->clip.state_bo == NULL) { + brw->clip.state_bo = clip_unit_create_from_key(brw, &key); + } +} const struct brw_tracked_state brw_clip_unit = { .dirty = { @@ -89,5 +152,5 @@ const struct brw_tracked_state brw_clip_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG }, - .update = upload_clip_unit + .prepare = upload_clip_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index f62b02cedfd..1dbba37fe7e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -29,11 +29,11 @@ * Keith Whitwell <[email protected]> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" - +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/program.h" + #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -42,6 +42,10 @@ #include "brw_util.h" #include "brw_clip.h" +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, @@ -78,7 +82,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, } c->reg.t = brw_vec1_grf(i, 0); - c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); c->reg.plane_equation = brw_vec4_grf(i, 4); @@ -435,15 +439,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c ) brw_ENDIF(p, do_clip); } - +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +} void brw_emit_tri_clip( struct brw_clip_compile *c ) { + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); + /* if -ve rhw workaround bit is set, + do cliptest */ + if (!BRW_IS_G4X(p->brw)) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + } /* Can't push into do_clip_tri because with polygon (or quad) * flatshading, need to apply the flatshade here because we don't * respect the PV when converting to trifan for emit: @@ -462,6 +554,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) */ brw_clip_kill_thread(c); } - - - diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index 918e0001870..d7ca517927b 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -29,11 +29,11 @@ * Keith Whitwell <[email protected]> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" - +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/program.h" + #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -58,10 +58,30 @@ static void compute_tri_direction( struct brw_clip_compile *c ) struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v0n = get_tmp(c); + struct brw_reg v1n = get_tmp(c); + struct brw_reg v2n = get_tmp(c); + + /* Convert to NDC. + * NOTE: We can't modify the original vertex coordinates, + * as it may impact further operations. + * So, we have to keep normalized coordinates in temp registers. + * + * TBD-KC + * Try to optimize unnecessary MOV's. + */ + brw_MOV(p, v0n, v0); + brw_MOV(p, v1n, v1); + brw_MOV(p, v2n, v2); + + brw_clip_project_position(c, v0n); + brw_clip_project_position(c, v1n); + brw_clip_project_position(c, v2n); + /* Calculate the vectors of two edges of the triangle: */ - brw_ADD(p, e, v0, negate(v2)); - brw_ADD(p, f, v1, negate(v2)); + brw_ADD(p, e, v0n, negate(v2n)); + brw_ADD(p, f, v1n, negate(v2n)); /* Take their crossproduct: */ @@ -220,8 +240,8 @@ static void apply_one_offset( struct brw_clip_compile *c, struct brw_indirect vert ) { struct brw_compile *p = &c->func; - struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); - struct brw_reg z = get_element(pos, 2); + struct brw_reg z = deref_1f(vert, c->header_position_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); brw_ADD(p, z, z, vec1(c->reg.offset)); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 19bef19801a..9d3b0be694a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -30,11 +30,11 @@ */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" - +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/program.h" + #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -46,8 +46,7 @@ - -static struct brw_reg get_tmp( struct brw_clip_compile *c ) +struct brw_reg get_tmp( struct brw_clip_compile *c ) { struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); @@ -90,7 +89,7 @@ void brw_clip_init_planes( struct brw_clip_compile *c ) /* Project 'pos' to screen space (or back again), overwrite with results: */ -static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) { struct brw_compile *p = &c->func; @@ -262,7 +261,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) c->reg.R0, 0, /* allocate */ 0, /* used */ - 0, /* msg len */ + 1, /* msg len */ 0, /* response len */ 1, /* eot */ 1, /* writes complete */ @@ -272,6 +271,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) + struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) { return brw_address(c->reg.fixed_planes); @@ -327,8 +327,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) /* Shift so that lowest outcode bit is rightmost: */ - brw_MOV(p, c->reg.planemask, incoming); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26)); + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); if (c->key.nr_userclip) { struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); @@ -342,13 +341,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) release_tmp(c, tmp); } - - /* Test for -ve rhw workaround - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 397a9bd3f5c..e2bc08a6cb7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -30,42 +30,47 @@ */ +#include "main/imports.h" +#include "main/api_noop.h" +#include "main/vtxfmt.h" +#include "main/simple_list.h" +#include "shader/shader_api.h" + #include "brw_context.h" -#include "brw_aub.h" #include "brw_defines.h" #include "brw_draw.h" +#include "brw_state.h" #include "brw_vs.h" -#include "imports.h" #include "intel_tex.h" #include "intel_blit.h" #include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_span.h" +#include "tnl/t_pipeline.h" #include "utils.h" -#include "api_noop.h" -#include "vtxfmt.h" + /*************************************** * Mesa's Driver Functions ***************************************/ -static const struct dri_extension brw_extensions[] = +static void brwUseProgram(GLcontext *ctx, GLuint program) { - { "GL_ARB_depth_texture", NULL }, - { "GL_ARB_fragment_program", NULL }, - { "GL_ARB_shadow", NULL }, - { "GL_EXT_shadow_funcs", NULL }, - /* ARB extn won't work if not enabled */ - { "GL_SGIX_depth_texture", NULL }, - { "GL_ARB_texture_env_crossbar", NULL }, - { NULL, NULL } -}; - + _mesa_use_program(ctx, program); +} +static void brwInitProgFuncs( struct dd_function_table *functions ) +{ + functions->UseProgram = brwUseProgram; +} static void brwInitDriverFunctions( struct dd_function_table *functions ) { intelInitDriverFunctions( functions ); - brwInitTextureFuncs( functions ); + brwInitFragProgFuncs( functions ); + brwInitProgFuncs( functions ); + brw_init_queryobj_functions(functions); } @@ -116,10 +121,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, return GL_FALSE; } + /* Initialize swrast, tnl driver tables: */ + intelInitSpanFuncs(ctx); + + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureCoordUnits = BRW_MAX_TEX_UNIT; - + ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ /* Advertise the full hardware capabilities. The new memory * manager should cope much better with overload situations: @@ -128,15 +138,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; ctx->Const.MaxTextureRectSize = (1<<11); - ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT; /* ctx->Const.MaxNativeVertexProgramTemps = 32; */ - - driInitExtensions( ctx, brw_extensions, GL_FALSE ); - - brw_aub_init( brw ); - brw_init_attribs( brw ); brw_init_metaops( brw ); brw_init_state( brw ); @@ -144,25 +148,14 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; - memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); - brw->emit_state_always = 0; - ctx->FragmentProgram._MaintainTexEnvProgram = 1; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - brw_draw_init( brw ); - - brw_ProgramCacheInit( ctx ); - - brw_FrameBufferTexInit( brw ); + make_empty_list(&brw->query.active_head); - { - const char *filename = getenv("INTEL_REPLAY"); - if (filename) { - brw_playback_aubfile(brw, filename); - exit(0); - } - } + brw_draw_init( brw ); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 08fdc545205..e3904be977f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -35,7 +35,7 @@ #include "intel_context.h" #include "brw_structs.h" -#include "imports.h" +#include "main/imports.h" /* Glossary: @@ -130,18 +130,29 @@ struct brw_context; #define BRW_NEW_CONTEXT 0x80 #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_INPUT_VARYING 0x200 -#define BRW_NEW_TNL_PROGRAM 0x400 #define BRW_NEW_PSP 0x800 #define BRW_NEW_METAOPS 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_LOCK 0x4000 - - +#define BRW_NEW_INDICES 0x8000 +#define BRW_NEW_VERTICES 0x10000 +/** + * Used for any batch entry with a relocated pointer that will be used + * by any 3D rendering. + */ +#define BRW_NEW_BATCH 0x8000 +/** brw->depth_region updated */ +#define BRW_NEW_DEPTH_BUFFER 0x10000 struct brw_state_flags { + /** State update flags signalled by mesa internals */ GLuint mesa; - GLuint cache; + /** + * State update flags signalled as the result of brw_tracked_state updates + */ GLuint brw; + /** State update flags signalled by brw_state_cache.c searches */ + GLuint cache; }; struct brw_vertex_program { @@ -230,32 +241,46 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - GLuint surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct buffer *buffer; - - GLuint size; - GLuint offset; /* offset of first free byte */ +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, - struct brw_context *brw; + BRW_MAX_CACHE }; struct brw_cache_item { + /** + * Effectively part of the key, cache_id identifies what kind of state + * buffer is involved, and also which brw->state.dirty.cache flag should + * be set when this cache item is chosen. + */ + enum brw_cache_id cache_id; + /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ const void *key; + dri_bo **reloc_bufs; + GLuint nr_reloc_bufs; - GLuint offset; /* offset within pool's buffer */ + dri_bo *bo; GLuint data_size; struct brw_cache_item *next; @@ -264,23 +289,19 @@ struct brw_cache_item { struct brw_cache { - GLuint id; - - const char *name; - struct brw_context *brw; - struct brw_mem_pool *pool; struct brw_cache_item **items; GLuint size, n_items; - - GLuint key_size; /* for fixed-size keys */ - GLuint aux_size; - GLuint aub_type; - GLuint aub_sub_type; - - GLuint last_addr; /* offset of active item */ + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ + GLuint aux_size[BRW_MAX_CACHE]; + char *name[BRW_MAX_CACHE]; + + /* Record of the last BOs chosen for each cache_id. Used to set + * brw->state.dirty.cache when a new cache item is chosen. + */ + dri_bo *last_bo[BRW_MAX_CACHE]; }; @@ -312,34 +333,8 @@ struct brw_state_pointers { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_GS_UNIT, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - - /* These two are in the SS pool: - */ - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, - - BRW_MAX_CACHE + void (*prepare)( struct brw_context *brw ); + void (*emit)( struct brw_context *brw ); }; /* Flags for brw->state.cache. @@ -363,16 +358,6 @@ enum brw_cache_id { #define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) #define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) - - - -enum brw_mempool_id { - BRW_GS_POOL, - BRW_SS_POOL, - BRW_MAX_POOL -}; - - struct brw_cached_batch_item { struct header *header; GLuint sz; @@ -389,12 +374,16 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; - struct brw_vertex_element_state *vep; - - GLuint index; + /** Size of a complete element */ GLuint element_size; + /** Number of uploaded elements for this input. */ GLuint count; - GLuint vbo_rebase_offset; + /** Byte stride between elements in the uploaded array */ + GLuint stride; + /** Offset of the first element within the buffer object */ + unsigned int offset; + /** Buffer object containing the uploaded vertex data */ + dri_bo *bo; }; @@ -421,7 +410,22 @@ struct brw_tnl_cache { GLuint size, n_items; }; +struct brw_query_object { + struct gl_query_object Base; + + /** Doubly linked list of active query objects in the context. */ + struct brw_query_object *prev, *next; + + /** Last query BO associated with this query. */ + dri_bo *bo; + /** First index in bo with query data for this object. */ + int first_index; + /** Last index in bo with query data for this object. */ + int last_index; + /* Total count of pixels from previous BOs */ + unsigned int count; +}; struct brw_context { @@ -429,51 +433,67 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; - GLboolean wrap; GLboolean tmp_fallback; + GLboolean no_batch_wrap; struct { struct brw_state_flags dirty; struct brw_tracked_state **atoms; GLuint nr_atoms; - - struct intel_region *draw_region; + GLuint nr_draw_regions; + struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + dri_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + int validated_bo_count; } state; struct brw_state_pointers attribs; - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cache cache; struct brw_cached_batch_item *cached_batch_items; struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - struct gl_client_array vbo_array[VERT_ATTRIB_MAX]; - struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) struct { - struct gl_buffer_object *vbo[BRW_NR_UPLOAD_BUFS]; - GLuint buf; + dri_bo *bo; GLuint offset; - GLuint size; - GLuint wrap; } upload; /* Summary of size and varying of active arrays, so we can check * for changes to this state: */ struct brw_vertex_info info; + unsigned int min_index, max_index; } vb; struct { + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by BRW_NEW_INDICES. + */ + const struct _mesa_index_buffer *ib; + + dri_bo *bo; + unsigned int offset; + } ib; + + struct { /* Will be allocated on demand if needed. */ struct brw_state_pointers attribs; @@ -483,18 +503,17 @@ struct brw_context struct gl_buffer_object *vbo; struct intel_region *saved_draw_region; + GLuint saved_nr_draw_regions; struct intel_region *saved_depth_region; - GLuint restore_draw_mask; + GLuint restore_draw_buffers[MAX_DRAW_BUFFERS]; + GLuint restore_num_draw_buffers; + struct gl_fragment_program *restore_fp; GLboolean active; } metaops; - /* Track fixed function t&l in a vertex program: - */ - struct gl_vertex_program *tnl_program; - struct brw_tnl_cache tnl_program_cache; /* Active vertex program: */ @@ -552,42 +571,51 @@ struct brw_context */ struct brw_tracked_state tracked_state; - GLuint gs_offset; + dri_bo *curbe_bo; + /** Offset within curbe_bo of space for current curbe entry */ + GLuint curbe_offset; + /** Offset within curbe_bo of space for next curbe entry */ + GLuint curbe_next_offset; GLfloat *last_buf; GLuint last_bufsz; + /** + * Whether we should create a new bo instead of reusing the old one + * (if we just dispatch the batch pointing at the old one. + */ + GLboolean need_new_bo; } curbe; struct { struct brw_vs_prog_data *prog_data; - GLuint prog_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; } vs; struct { struct brw_gs_prog_data *prog_data; GLboolean prog_active; - GLuint prog_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; } gs; struct { struct brw_clip_prog_data *prog_data; - GLuint prog_gs_offset; - GLuint vp_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; } clip; struct { struct brw_sf_prog_data *prog_data; - GLuint prog_gs_offset; - GLuint vp_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; } sf; struct { @@ -598,36 +626,39 @@ struct brw_context */ GLuint input_size_masks[4]; - - /* State structs - */ - struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT]; - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + /** Array of surface default colors (texture border color) */ + dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; GLuint render_surf; GLuint nr_surfaces; GLuint max_threads; - struct buffer *scratch_buffer; - GLuint scratch_buffer_size; + dri_bo *scratch_buffer; GLuint sampler_count; - GLuint sampler_gs_offset; + dri_bo *sampler_bo; - struct brw_surface_binding_table bind; - GLuint bind_ss_offset; + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_WM_MAX_SURF]; - GLuint prog_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; } wm; struct { - GLuint vp_gs_offset; - GLuint state_gs_offset; + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; } cc; - + struct { + struct brw_query_object active_head; + dri_bo *bo; + int index; + GLboolean active; + } query; /* Used to give every program string a unique id */ GLuint program_id; @@ -652,24 +683,27 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate); - - /*====================================================================== - * brw_state.c + * brw_queryobj.c */ -void brw_validate_state( struct brw_context *brw ); -void brw_init_state( struct brw_context *brw ); -void brw_destroy_state( struct brw_context *brw ); - +void brw_init_queryobj_functions(struct dd_function_table *functions); +void brw_prepare_query_begin(struct brw_context *brw); +void brw_emit_query_begin(struct brw_context *brw); +void brw_emit_query_end(struct brw_context *brw); +/*====================================================================== + * brw_state_dump.c + */ +void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ void brwUpdateTextureState( struct intel_context *intel ); -void brwInitTextureFuncs( struct dd_function_table *functions ); -void brw_FrameBufferTexInit( struct brw_context *brw ); +void brw_FrameBufferTexInit( struct brw_context *brw, + struct intel_region *region ); void brw_FrameBufferTexDestroy( struct brw_context *brw ); +void brw_validate_textures( struct brw_context *brw ); /*====================================================================== * brw_metaops.c @@ -696,11 +730,13 @@ void brw_upload_constant_buffer_state(struct brw_context *brw); * Inline conversion functions. These are better-typed than the * macros used previously: */ -static inline struct brw_context * +static INLINE struct brw_context * brw_context( GLcontext *ctx ) { return (struct brw_context *)ctx; } +#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) + #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 3f0aaa1f86d..c7bac7b0c52 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -31,10 +31,10 @@ -#include "glheader.h" -#include "context.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" @@ -42,7 +42,6 @@ #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" -#include "brw_aub.h" /* Partition the CURBE between the various users of constant values: @@ -90,7 +89,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) */ if (nr_fp_regs > brw->curbe.wm_size || nr_vp_regs > brw->curbe.vs_size || - nr_clip_regs > brw->curbe.clip_size || + nr_clip_regs != brw->curbe.clip_size || (total_regs < brw->curbe.total_size / 4 && brw->curbe.total_size > 16)) { @@ -127,7 +126,7 @@ const struct brw_tracked_state brw_curbe_offsets = { .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, - .update = calculate_curbe_offsets + .prepare = calculate_curbe_offsets }; @@ -156,19 +155,7 @@ void brw_upload_constant_buffer_state(struct brw_context *brw) assert(brw->urb.nr_cs_entries); BRW_CACHED_BATCH_STRUCT(brw, &cbs); -} - -#if 0 -const struct brw_tracked_state brw_constant_buffer_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_URB_FENCE, - .cache = 0 - }, - .update = brw_upload_constant_buffer_state -}; -#endif - +} static GLfloat fixed_plane[6][4] = { { 0, 0, -1, 1 }, @@ -183,12 +170,11 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static void upload_constant_buffer(struct brw_context *brw) +static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; GLuint sz = brw->curbe.total_size; GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; @@ -202,20 +188,13 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.tracked_state.dirty.mesa |= fp->param_state; if (sz == 0) { - struct brw_constant_buffer cb; - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 0; - cb.bits0.buffer_length = 0; - cb.bits0.buffer_address = 0; - BRW_BATCH_STRUCT(brw, &cb); if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - + return; } @@ -290,11 +269,11 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } - if (brw->curbe.last_buf && + if (brw->curbe.curbe_bo != NULL && + brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); -/* return; */ } else { if (brw->curbe.last_buf) @@ -302,61 +281,66 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; - - if (!brw_pool_alloc(pool, - bufsz, - 6, - &brw->curbe.gs_offset)) { - _mesa_printf("out of GS memory for curbe\n"); - assert(0); - return; + if (brw->curbe.curbe_bo != NULL && + (brw->curbe.need_new_bo || + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) + { + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; + } + + if (brw->curbe.curbe_bo == NULL) { + /* Allocate a single page for CURBE entries for this batchbuffer. + * They're generally around 64b. + */ + brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", + 4096, 1 << 6); + brw->curbe.curbe_next_offset = 0; } - + + brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; + brw->curbe.curbe_next_offset += bufsz; + brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ - bmBufferSubDataAUB(&brw->intel, - pool->buffer, - brw->curbe.gs_offset, - bufsz, - buf, - DW_CONSTANT_BUFFER, - 0); + dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); } - /* TODO: only emit the constant_buffer packet when necessary, ie: - - contents have changed - - offset has changed - - hw requirements due to other packets emitted. - */ - { - struct brw_constant_buffer cb; - - memset(&cb, 0, sizeof(cb)); - - cb.header.opcode = CMD_CONST_BUFFER; - cb.header.length = sizeof(cb)/4 - 2; - cb.header.valid = 1; - cb.bits0.buffer_length = sz - 1; - cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; - - /* Because this provokes an action (ie copy the constants into the - * URB), it shouldn't be shortcircuited if identical to the - * previous time - because eg. the urb destination may have - * changed, or the urb contents different to last time. - * - * Note that the data referred to is actually copied internally, - * not just used in place according to passed pointer. - * - * It appears that the CS unit takes care of using each available - * URB entry (Const URB Entry == CURBE) in turn, and issuing - * flushes as necessary when doublebuffering of CURBEs isn't - * possible. - */ -/* intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */ - BRW_BATCH_STRUCT(brw, &cb); -/* intel_batchbuffer_align(brw->intel.batch, 64, 0); */ + brw_add_validated_bo(brw, brw->curbe.curbe_bo); + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ +} + + +static void emit_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLuint sz = brw->curbe.total_size; + + BEGIN_BATCH(2, IGNORE_CLIPRECTS); + if (sz == 0) { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); + OUT_BATCH(0); + } else { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); + OUT_RELOC(brw->curbe.curbe_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + (sz - 1) + brw->curbe.curbe_offset); } + ADVANCE_BATCH(); } /* This tracked state is unique in that the state it monitors varies @@ -372,9 +356,11 @@ const struct brw_tracked_state brw_constant_buffer = { BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ - BRW_NEW_CURBE_OFFSETS), + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_BATCH), .cache = (CACHE_NEW_WM_PROG) }, - .update = upload_constant_buffer + .prepare = prepare_constant_buffer, + .emit = emit_constant_buffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e8f878a7018..39c32255f8b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -33,69 +33,6 @@ #ifndef BRW_DEFINES_H #define BRW_DEFINES_H -/* - */ -#define MI_NOOP 0x00 -#define MI_USER_INTERRUPT 0x02 -#define MI_WAIT_FOR_EVENT 0x03 -#define MI_FLUSH 0x04 -#define MI_REPORT_HEAD 0x07 -#define MI_ARB_ON_OFF 0x08 -#define MI_BATCH_BUFFER_END 0x0A -#define MI_OVERLAY_FLIP 0x11 -#define MI_LOAD_SCAN_LINES_INCL 0x12 -#define MI_LOAD_SCAN_LINES_EXCL 0x13 -#define MI_DISPLAY_BUFFER_INFO 0x14 -#define MI_SET_CONTEXT 0x18 -#define MI_STORE_DATA_IMM 0x20 -#define MI_STORE_DATA_INDEX 0x21 -#define MI_LOAD_REGISTER_IMM 0x22 -#define MI_STORE_REGISTER_MEM 0x24 -#define MI_BATCH_BUFFER_START 0x31 - -#define MI_SYNCHRONOUS_FLIP 0x0 -#define MI_ASYNCHRONOUS_FLIP 0x1 - -#define MI_BUFFER_SECURE 0x0 -#define MI_BUFFER_NONSECURE 0x1 - -#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 -#define MI_ARBITRATE_BETWEEN_INSTS 0x1 -#define MI_NO_ARBITRATION 0x3 - -#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 -#define MI_CONDITION_CODE_WAIT_0 0x1 -#define MI_CONDITION_CODE_WAIT_1 0x2 -#define MI_CONDITION_CODE_WAIT_2 0x3 -#define MI_CONDITION_CODE_WAIT_3 0x4 -#define MI_CONDITION_CODE_WAIT_4 0x5 - -#define MI_DISPLAY_PIPE_A 0x0 -#define MI_DISPLAY_PIPE_B 0x1 - -#define MI_DISPLAY_PLANE_A 0x0 -#define MI_DISPLAY_PLANE_B 0x1 -#define MI_DISPLAY_PLANE_C 0x2 - -#define MI_STANDARD_FLIP 0x0 -#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 -#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 -#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 - -#define MI_PHYSICAL_ADDRESS 0x0 -#define MI_VIRTUAL_ADDRESS 0x1 - -#define MI_BUFFER_MEMORY_MAIN 0x0 -#define MI_BUFFER_MEMORY_GTT 0x2 -#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 - -#define MI_FLIP_CONTINUE 0x0 -#define MI_FLIP_ON 0x1 -#define MI_FLIP_OFF 0x2 - -#define MI_UNTRUSTED_REGISTER_SPACE 0x0 -#define MI_TRUSTED_REGISTER_SPACE 0x1 - /* 3D state: */ #define _3DOP_3DSTATE_PIPELINED 0x0 @@ -119,7 +56,6 @@ #define _3DSTATE_LINE_STIPPLE 0x08 #define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 #define _3DCONTROL 0x00 -#define _3DPRIMITIVE 0x00 #define PIPE_CONTROL_NOWRITE 0x00 #define PIPE_CONTROL_WRITEIMMEDIATE 0x01 @@ -240,6 +176,8 @@ #define BRW_FRONTWINDING_CW 0 #define BRW_FRONTWINDING_CCW 1 +#define BRW_SPRITE_POINT_ENABLE 16 + #define BRW_INDEX_BYTE 0 #define BRW_INDEX_WORD 1 #define BRW_INDEX_DWORD 2 @@ -485,20 +423,6 @@ #define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 #define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 -#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 -#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 - -#define BRW_VFCOMPONENT_NOSTORE 0 -#define BRW_VFCOMPONENT_STORE_SRC 1 -#define BRW_VFCOMPONENT_STORE_0 2 -#define BRW_VFCOMPONENT_STORE_1_FLT 3 -#define BRW_VFCOMPONENT_STORE_1_INT 4 -#define BRW_VFCOMPONENT_STORE_VID 5 -#define BRW_VFCOMPONENT_STORE_IID 6 -#define BRW_VFCOMPONENT_STORE_PID 7 - - - /* Execution Unit (EU) defines */ @@ -815,14 +739,40 @@ #define CMD_STATE_BASE_ADDRESS 0x6101 #define CMD_STATE_INSN_POINTER 0x6102 -#define CMD_PIPELINE_SELECT 0x6104 +#define CMD_PIPELINE_SELECT_965 0x6104 +#define CMD_PIPELINE_SELECT_GM45 0x6904 #define CMD_PIPELINED_STATE_POINTERS 0x7800 #define CMD_BINDING_TABLE_PTRS 0x7801 + #define CMD_VERTEX_BUFFER 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define BRW_VB0_PITCH_SHIFT 0 + #define CMD_VERTEX_ELEMENT 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + #define CMD_INDEX_BUFFER 0x780a -#define CMD_VF_STATISTICS 0x780b +#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_GM45 0x680b #define CMD_DRAW_RECT 0x7900 #define CMD_BLEND_CONSTANT_COLOR 0x7901 @@ -832,6 +782,7 @@ #define CMD_POLY_STIPPLE_PATTERN 0x7907 #define CMD_LINE_STIPPLE_PATTERN 0x7908 #define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define CMD_AA_LINE_PARAMETERS 0x790a #define CMD_PIPE_CONTROL 0x7a00 @@ -845,6 +796,11 @@ #define R02_PRIM_END 0x1 #define R02_PRIM_START 0x2 +#include "intel_chipset.h" +#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) +#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 0c64d7e756d..d87b8f8a848 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -27,20 +27,18 @@ #include <stdlib.h> -#include "glheader.h" -#include "context.h" -#include "state.h" -#include "api_validate.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/enums.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" -#include "brw_aub.h" #include "brw_state.h" #include "brw_fallback.h" -#include "intel_ioctl.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" @@ -49,7 +47,7 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" - +#define FILE_DEBUG_FLAG DEBUG_BATCH static GLuint hw_prim[GL_POLYGON+1] = { _3DPRIM_POINTLIST, @@ -124,25 +122,6 @@ static GLuint trim(GLenum prim, GLuint length) } -static void brw_emit_cliprect( struct brw_context *brw, - const drm_clip_rect_t *rect ) -{ - struct brw_drawrect bdr; - - bdr.header.opcode = CMD_DRAW_RECT; - bdr.header.length = sizeof(bdr)/4 - 2; - bdr.xmin = rect->x1; - bdr.xmax = rect->x2 - 1; - bdr.ymin = rect->y1; - bdr.ymax = rect->y2 - 1; - bdr.xorg = brw->intel.drawX; - bdr.yorg = brw->intel.drawY; - - intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr), - INTEL_BATCH_NO_CLIPRECTS); -} - - static void brw_emit_prim( struct brw_context *brw, const struct _mesa_prim *prim ) @@ -165,20 +144,25 @@ static void brw_emit_prim( struct brw_context *brw, prim_packet.start_instance_location = 0; prim_packet.base_vert_location = 0; + /* Can't wrap here, since we rely on the validated state. */ + brw->no_batch_wrap = GL_TRUE; if (prim_packet.verts_per_instance) { - intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), - INTEL_BATCH_NO_CLIPRECTS); + intel_batchbuffer_data( brw->intel.batch, &prim_packet, + sizeof(prim_packet), LOOP_CLIPRECTS); } + brw->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, const struct gl_client_array *arrays[]) { - struct brw_vertex_element *inputs = brw->vb.inputs; struct brw_vertex_info old = brw->vb.info; GLuint i; - memset(inputs, 0, sizeof(*inputs)); + for (i = 0; i < VERT_ATTRIB_MAX; i++) + dri_bo_unreference(brw->vb.inputs[i].bo); + + memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); memset(&brw->vb.info, 0, sizeof(brw->vb.info)); for (i = 0; i < VERT_ATTRIB_MAX; i++) { @@ -189,7 +173,8 @@ static void brw_merge_inputs( struct brw_context *brw, if (arrays[i]->StrideB != 0) brw->vb.info.varying |= 1 << i; - brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2); + brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << + ((i%16) * 2); } } @@ -271,15 +256,24 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; - GLuint i, j; + GLboolean warn = GL_FALSE; + GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); + brw_validate_textures( brw ); + /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); - + + brw->ib.ib = ib; + brw->state.dirty.brw |= BRW_NEW_INDICES; + + brw->vb.min_index = min_index; + brw->vb.max_index = max_index; + brw->state.dirty.brw |= BRW_NEW_VERTICES; /* Have to validate state quite late. Will rebuild tnl_program, * which depends on varying information. * @@ -289,19 +283,25 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, LOCK_HARDWARE(intel); - if (brw->intel.numClipRects == 0) { - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { UNLOCK_HARDWARE(intel); return GL_TRUE; } + /* Flush the batch if it's approaching full, so that we don't wrap while + * we've got validated state that needs to be in the same batch as the + * primitives. This fraction is just a guess (minimal full state plus + * a primitive is around 512 bytes), and would be better if we had + * an upper bound of how much we might emit in a single + * brw_try_draw_prims(). + */ + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + LOOP_CLIPRECTS); { /* Set the first primitive early, ahead of validate_state: */ brw_set_prim(brw, prim[0].mode); - /* XXX: Need to separate validate and upload of state. - */ brw_validate_state( brw ); /* Various fallback checks: @@ -311,73 +311,46 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (check_fallbacks( brw, prim, nr_prims )) goto out; - - /* Upload index, vertex data: - */ - if (ib) - brw_upload_indices( brw, ib ); - if (!brw_upload_vertices( brw, min_index, max_index)) { - goto out; - } - - /* For single cliprect, state is already emitted: + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. */ - if (brw->intel.numClipRects == 1) { - for (i = 0; i < nr_prims; i++) { - brw_emit_prim(brw, &prim[i]); - } - } - else { - /* Otherwise, explicitly do the cliprects at this point: + if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + static GLboolean warned; + intel_batchbuffer_flush(intel->batch); + + /* Validate the state after we flushed the batch (which would have + * changed the set of dirty state). If we still fail to + * check_aperture, warn of what's happening, but attempt to continue + * on since it may succeed anyway, and the user would probably rather + * see a failure and a warning than a fallback. */ - for (j = 0; j < brw->intel.numClipRects; j++) { - brw_emit_cliprect(brw, &brw->intel.pClipRects[j]); - - /* Emit prims to batchbuffer: - */ - for (i = 0; i < nr_prims; i++) { - brw_emit_prim(brw, &prim[i]); - } + brw_validate_state(brw); + if (!warned && + dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + warn = GL_TRUE; + warned = GL_TRUE; } } - - intel->need_flush = GL_TRUE; - retval = GL_TRUE; - } - out: + brw_upload_state(brw); - /* Currently have to do this to synchronize with the map/unmap of - * the vertex buffer in brw_exec_api.c. Not sure if there is any - * way around this, as not every flush is due to a buffer filling - * up. - */ - if (!intel_batchbuffer_flush( brw->intel.batch )) { - DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__); - retval = GL_FALSE; - } - - if (retval && intel->thrashing) { - bmSetFence(intel); - } + for (i = 0; i < nr_prims; i++) { + brw_emit_prim(brw, &prim[i]); + } - /* Free any old data so it doesn't clog up texture memory - we - * won't be referencing it again. - */ - while (brw->vb.upload.wrap != brw->vb.upload.buf) { - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - BRW_UPLOAD_INIT_SIZE, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[brw->vb.upload.wrap]); - brw->vb.upload.wrap++; - brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS; + retval = GL_TRUE; } + out: UNLOCK_HARDWARE(intel); + if (warn) + fprintf(stderr, "i965: Single primitive emit potentially exceeded " + "available aperture space\n"); + if (!retval) DBG("%s failed\n", __FUNCTION__); @@ -420,7 +393,6 @@ void brw_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - struct intel_context *intel = intel_context(ctx); GLboolean retval; /* Decide if we want to rebase. If so we end up recursing once @@ -435,25 +407,10 @@ void brw_draw_prims( GLcontext *ctx, return; } - /* Make a first attempt at drawing: */ retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - - /* This looks like out-of-memory but potentially we have - * situation where there is enough memory but it has become - * fragmented. Clear out all heaps and start from scratch by - * faking a contended lock event: (done elsewhere) - */ - if (!retval && !intel->Fallback && bmError(intel)) { - DBG("retrying\n"); - /* Then try a second time only to upload textures and draw the - * primitives: - */ - retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - } - /* Otherwise, we really are out of memory. Pass the drawing * command to the software tnl module and which will in turn call * swrast to do the drawing. @@ -463,56 +420,32 @@ void brw_draw_prims( GLcontext *ctx, _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } - if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) { - intelFinish( &intel->ctx ); - intel->aub_wrap = 1; - } -} - - -static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr ) -{ - /* nothing to do, we don't rely on the contents being preserved */ } - void brw_draw_init( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct vbo_context *vbo = vbo_context(ctx); - GLuint i; - + /* Register our drawing function: */ vbo->draw_prims = brw_draw_prims; +} - brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE; +void brw_draw_destroy( struct brw_context *brw ) +{ + int i; - for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) { - brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); - - /* NOTE: These are set to no-backing-store. - */ - bmBufferSetInvalidateCB(&brw->intel, - intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])), - brw_invalidate_vbo_cb, - &brw->intel, - GL_TRUE); + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; } - ctx->Driver.BufferData( ctx, - GL_ARRAY_BUFFER_ARB, - BRW_UPLOAD_INIT_SIZE, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[0] ); -} + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + dri_bo_unreference(brw->vb.inputs[i].bo); + brw->vb.inputs[i].bo = NULL; + } -void brw_draw_destroy( struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - GLuint i; - - for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) - ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]); + dri_bo_unreference(brw->ib.bo); + brw->ib.bo = NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 0f7b7383102..9aebbdb1b86 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -28,7 +28,7 @@ #ifndef BRW_DRAW_H #define BRW_DRAW_H -#include "mtypes.h" /* for GLcontext... */ +#include "main/mtypes.h" /* for GLcontext... */ #include "vbo/vbo.h" struct brw_context; @@ -50,16 +50,4 @@ void brw_draw_destroy( struct brw_context *brw ); void brw_init_current_values(GLcontext *ctx, struct gl_client_array *arrays); - -/* brw_draw_upload.c - */ -void brw_upload_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer); - -GLboolean brw_upload_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ); - - - #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 6150cac4aa3..4080c5e3228 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -27,52 +27,21 @@ #include <stdlib.h> -#include "glheader.h" -#include "context.h" -#include "state.h" -#include "api_validate.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/enums.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" -#include "brw_aub.h" #include "brw_state.h" #include "brw_fallback.h" -#include "intel_ioctl.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" - - -struct brw_array_state { - union header_union header; - - struct { - union { - struct { - GLuint pitch:11; - GLuint pad:15; - GLuint access_type:1; - GLuint vb_index:5; - } bits; - GLuint dword; - } vb0; - - struct buffer *buffer; - GLuint offset; - - GLuint max_index; - GLuint instance_data_step_rate; - - } vb[BRW_VBP_MAX]; -}; - - -static struct buffer *array_buffer( const struct gl_client_array *array ) -{ - return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj)); -} +#include "intel_tex.h" static GLuint double_types[5] = { 0, @@ -247,194 +216,175 @@ static GLuint get_index_type(GLenum type) } } -static void copy_strided_array( GLubyte *dest, - const GLubyte *src, - GLuint size, - GLuint stride, - GLuint count ) -{ - if (size == stride) - do_memcpy(dest, src, count * size); - else { - GLuint i,j; - - for (i = 0; i < count; i++) { - for (j = 0; j < size; j++) - *dest++ = *src++; - src += (stride - size); - } - } -} - static void wrap_buffers( struct brw_context *brw, GLuint size ) { - GLcontext *ctx = &brw->intel.ctx; - if (size < BRW_UPLOAD_INIT_SIZE) size = BRW_UPLOAD_INIT_SIZE; - brw->vb.upload.buf++; - brw->vb.upload.buf %= BRW_NR_UPLOAD_BUFS; brw->vb.upload.offset = 0; - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - size, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->vb.upload.vbo[brw->vb.upload.buf]); + if (brw->vb.upload.bo != NULL) + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", + size, 1); + + /* Set the internal VBO\ to no-backing-store. We only use them as a + * temporary within a brw_try_draw_prims while the lock is held. + */ + /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH + FAKE TO PUSH THIS STUFF */ +// if (!brw->intel.ttm) +// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); } static void get_space( struct brw_context *brw, GLuint size, - struct gl_buffer_object **vbo_return, + dri_bo **bo_return, GLuint *offset_return ) { - size = (size + 63) & ~63; - - if (brw->vb.upload.offset + size > BRW_UPLOAD_INIT_SIZE) + size = ALIGN(size, 64); + + if (brw->vb.upload.bo == NULL || + brw->vb.upload.offset + size > brw->vb.upload.bo->size) { wrap_buffers(brw, size); + } - *vbo_return = brw->vb.upload.vbo[brw->vb.upload.buf]; + assert(*bo_return == NULL); + dri_bo_reference(brw->vb.upload.bo); + *bo_return = brw->vb.upload.bo; *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; } - - -static struct gl_client_array * +static void copy_array_to_vbo_array( struct brw_context *brw, - GLuint i, - const struct gl_client_array *array, - GLuint element_size, - GLuint count) + struct brw_vertex_element *element, + GLuint dst_stride) { - GLcontext *ctx = &brw->intel.ctx; - struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; - GLuint size = count * element_size; - struct gl_buffer_object *vbo; - GLuint offset; - GLuint new_stride; + GLuint size = element->count * dst_stride; - get_space(brw, size, &vbo, &offset); + get_space(brw, size, &element->bo, &element->offset); - if (array->StrideB == 0) { - assert(count == 1); - new_stride = 0; + if (element->glarray->StrideB == 0) { + assert(element->count == 1); + element->stride = 0; + } else { + element->stride = dst_stride; } - else - new_stride = element_size; - - vbo_array->Size = array->Size; - vbo_array->Type = array->Type; - vbo_array->Stride = new_stride; - vbo_array->StrideB = new_stride; - vbo_array->Ptr = (const void *)offset; - vbo_array->Enabled = 1; - vbo_array->Normalized = array->Normalized; - vbo_array->_MaxElement = array->_MaxElement; /* ? */ - vbo_array->BufferObj = vbo; - - { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - vbo); - - map += offset; - copy_strided_array( map, - array->Ptr, - element_size, - array->StrideB, - count); + if (dst_stride == element->glarray->StrideB) { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } else { + void *data; + char *dest; + const char *src = element->glarray->Ptr; + int i; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo_array->BufferObj); + dri_bo_subdata(element->bo, + element->offset, + size, + data); + _mesa_free(data); } - - return vbo_array; -} - - - -static struct gl_client_array * -interleaved_vbo_array( struct brw_context *brw, - GLuint i, - const struct gl_client_array *uploaded_array, - const struct gl_client_array *array, - const char *ptr) -{ - struct gl_client_array *vbo_array = &brw->vb.vbo_array[i]; - - vbo_array->Size = array->Size; - vbo_array->Type = array->Type; - vbo_array->Stride = array->Stride; - vbo_array->StrideB = array->StrideB; - vbo_array->Ptr = (const void *)((const char *)uploaded_array->Ptr + - ((const char *)array->Ptr - ptr)); - vbo_array->Enabled = 1; - vbo_array->Normalized = array->Normalized; - vbo_array->_MaxElement = array->_MaxElement; - vbo_array->BufferObj = uploaded_array->BufferObj; - - return vbo_array; } - -GLboolean brw_upload_vertices( struct brw_context *brw, - GLuint min_index, - GLuint max_index ) +static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); GLuint tmp = brw->vs.prog_data->inputs_read; - struct brw_vertex_element_packet vep; - struct brw_array_state vbp; GLuint i; - const void *ptr = NULL; + const unsigned char *ptr = NULL; GLuint interleave = 0; + unsigned int min_index = brw->vb.min_index; + unsigned int max_index = brw->vb.max_index; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled = 0; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; - - - memset(&vbp, 0, sizeof(vbp)); - memset(&vep, 0, sizeof(vep)); /* First build an array of pointers to ve's in vb.inputs_read */ if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - + + /* Accumulate the list of enabled arrays. */ while (tmp) { GLuint i = _mesa_ffsll(tmp)-1; struct brw_vertex_element *input = &brw->vb.inputs[i]; tmp &= ~(1<<i); enabled[nr_enabled++] = input; + } + + /* XXX: In the rare cases where this happens we fallback all + * the way to software rasterization, although a tnl fallback + * would be sufficient. I don't know of *any* real world + * cases with > 17 vertex attributes enabled, so it probably + * isn't an issue at this point. + */ + if (nr_enabled >= BRW_VEP_MAX) { + intel->Fallback = 1; + return; + } + + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; - input->index = i; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (!input->glarray->BufferObj->Name) { + if (input->glarray->BufferObj->Name != 0) { + struct intel_buffer_object *intel_buffer = + intel_buffer_object(input->glarray->BufferObj); + + /* Named buffer object: Just reference its contents directly. */ + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + input->offset = (unsigned long)input->glarray->Ptr; + input->stride = input->glarray->StrideB; + } else { + if (input->bo != NULL) { + /* Already-uploaded vertex data is present from a previous + * prepare_vertices, but we had to re-validate state due to + * check_aperture failing and a new batch being produced. + */ + continue; + } + + /* Queue the buffer object up to be uploaded in the next pass, + * when we've decided if we're doing interleaved or not. + */ if (i == 0) { /* Position array not properly enabled: */ - if (input->glarray->StrideB == 0) - return GL_FALSE; + if (input->glarray->StrideB == 0) { + intel->Fallback = 1; + return; + } interleave = input->glarray->StrideB; ptr = input->glarray->Ptr; } else if (interleave != input->glarray->StrideB || - (const char *)input->glarray->Ptr - (const char *)ptr < 0 || - (const char *)input->glarray->Ptr - (const char *)ptr > interleave) { + (const unsigned char *)input->glarray->Ptr - ptr < 0 || + (const unsigned char *)input->glarray->Ptr - ptr > interleave) + { interleave = 0; } @@ -451,131 +401,140 @@ GLboolean brw_upload_vertices( struct brw_context *brw, } } - /* Upload interleaved arrays if all uploads are interleaved - */ - if (nr_uploads > 1 && - interleave && - interleave <= 256) { - struct brw_vertex_element *input0 = upload[0]; - - input0->glarray = copy_array_to_vbo_array(brw, 0, - input0->glarray, - interleave, - input0->count); + /* Handle any arrays to be uploaded. */ + if (nr_uploads > 1 && interleave && interleave <= 256) { + /* All uploads are interleaved, so upload the arrays together as + * interleaved. First, upload the contents and set up upload[0]. + */ + copy_array_to_vbo_array(brw, upload[0], interleave); for (i = 1; i < nr_uploads; i++) { - upload[i]->glarray = interleaved_vbo_array(brw, - i, - input0->glarray, - upload[i]->glarray, - ptr); + /* Then, just point upload[i] at upload[0]'s buffer. */ + upload[i]->stride = interleave; + upload[i]->offset = upload[0]->offset + + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); + upload[i]->bo = upload[0]->bo; + dri_bo_reference(upload[i]->bo); } } else { + /* Upload non-interleaved arrays */ for (i = 0; i < nr_uploads; i++) { - struct brw_vertex_element *input = upload[i]; - - input->glarray = copy_array_to_vbo_array(brw, i, - input->glarray, - input->element_size, - input->count); - + copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); } } - /* XXX: In the rare cases where this happens we fallback all - * the way to software rasterization, although a tnl fallback - * would be sufficient. I don't know of *any* real world - * cases with > 17 vertex attributes enabled, so it probably - * isn't an issue at this point. - */ - if (nr_enabled >= BRW_VEP_MAX) - return GL_FALSE; + brw_prepare_query_begin(brw); - /* This still defines a hardware VB for each input, even if they - * are interleaved or from the same VBO. TBD if this makes a - * performance difference. - */ for (i = 0; i < nr_enabled; i++) { struct brw_vertex_element *input = enabled[i]; - input->vep = &vep.ve[i]; - input->vep->ve0.src_format = get_surface_type(input->glarray->Type, - input->glarray->Size, - input->glarray->Normalized); - input->vep->ve0.valid = 1; - input->vep->ve1.dst_offset = (i) * 4; - input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + brw_add_validated_bo(brw, input->bo); + } +} - switch (input->glarray->Size) { - case 0: input->vep->ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_0; - case 1: input->vep->ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: input->vep->ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: input->vep->ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } +static void brw_emit_vertices(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = intel_context(ctx); + GLuint tmp = brw->vs.prog_data->inputs_read; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint i; + GLuint nr_enabled = 0; - input->vep->ve0.vertex_buffer_index = i; - input->vep->ve0.src_offset = 0; + /* Accumulate the list of enabled arrays. */ + while (tmp) { + i = _mesa_ffsll(tmp)-1; + struct brw_vertex_element *input = &brw->vb.inputs[i]; - vbp.vb[i].vb0.bits.pitch = input->glarray->StrideB; - vbp.vb[i].vb0.bits.pad = 0; - vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; - vbp.vb[i].vb0.bits.vb_index = i; - vbp.vb[i].offset = (GLuint)input->glarray->Ptr; - vbp.vb[i].buffer = array_buffer(input->glarray); - vbp.vb[i].max_index = max_index; + tmp &= ~(1<<i); + enabled[nr_enabled++] = input; } + brw_emit_query_begin(brw); - - /* Now emit VB and VEP state packets: + /* Now emit VB and VEP state packets. + * + * This still defines a hardware VB for each input, even if they + * are interleaved or from the same VBO. TBD if this makes a + * performance difference. */ - vbp.header.bits.length = (1 + nr_enabled * 4) - 2; - vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + nr_enabled * 4) - 2)); - BEGIN_BATCH(vbp.header.bits.length+2, 0); - OUT_BATCH( vbp.header.dword ); - for (i = 0; i < nr_enabled; i++) { - OUT_BATCH( vbp.vb[i].vb0.dword ); - OUT_BATCH( bmBufferOffset(&brw->intel, vbp.vb[i].buffer) + vbp.vb[i].offset); - OUT_BATCH( vbp.vb[i].max_index ); - OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + struct brw_vertex_element *input = enabled[i]; + + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (input->stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset); + OUT_BATCH(brw->vb.max_index); + OUT_BATCH(0); /* Instance data step rate */ } ADVANCE_BATCH(); - vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; - vep.header.opcode = CMD_VERTEX_ELEMENT; - brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); - - return GL_TRUE; -} + BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); + for (i = 0; i < nr_enabled; i++) { + struct brw_vertex_element *input = enabled[i]; + uint32_t format = get_surface_type(input->glarray->Type, + input->glarray->Size, + input->glarray->Normalized); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; + switch (input->glarray->Size) { + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; + break; + } -static GLuint element_size( GLenum type ) -{ - switch(type) { - case GL_UNSIGNED_INT: return 4; - case GL_UNSIGNED_SHORT: return 2; - case GL_UNSIGNED_BYTE: return 1; - default: assert(0); return 0; + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } + ADVANCE_BATCH(); } +const struct brw_tracked_state brw_vertices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, + .cache = 0, + }, + .prepare = brw_prepare_vertices, + .emit = brw_emit_vertices, +}; - -void brw_upload_indices( struct brw_context *brw, - const struct _mesa_index_buffer *index_buffer ) +static void brw_prepare_indices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; - GLuint ib_size = get_size(index_buffer->type) * index_buffer->count; - struct gl_buffer_object *bufferobj = index_buffer->obj; - GLuint offset = (GLuint)index_buffer->ptr; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; + dri_bo *bo = NULL; + struct gl_buffer_object *bufferobj; + GLuint offset; + + if (index_buffer == NULL) + return; + + ib_size = get_size(index_buffer->type) * index_buffer->count; + bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ @@ -583,23 +542,58 @@ void brw_upload_indices( struct brw_context *brw, /* Get new bufferobj, offset: */ - get_space(brw, ib_size, &bufferobj, &offset); + get_space(brw, ib_size, &bo, &offset); /* Straight upload */ - ctx->Driver.BufferSubData( ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - offset, - ib_size, - index_buffer->ptr, - bufferobj); + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } else { + offset = (GLuint)index_buffer->ptr; + + /* If the index buffer isn't aligned to its element size, we have to + * rebase it into a temporary. + */ + if ((get_size(index_buffer->type) - 1) & offset) { + GLubyte *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + bufferobj); + map += offset; + + get_space(brw, ib_size, &bo, &offset); + + dri_bo_subdata(bo, offset, ib_size, map); + + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + } else { + bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), + INTEL_READ); + dri_bo_reference(bo); + } } + dri_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; + + brw_add_validated_bo(brw, brw->ib.bo); +} + +static void brw_emit_indices(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; + + if (index_buffer == NULL) + return; + + ib_size = get_size(index_buffer->type) * index_buffer->count; + /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; - struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj)); memset(&ib, 0, sizeof(ib)); @@ -609,11 +603,25 @@ void brw_upload_indices( struct brw_context *brw, ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, 0); + BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); - OUT_BATCH( bmBufferOffset(intel, buffer) + offset ); - OUT_BATCH( bmBufferOffset(intel, buffer) + offset + ib_size ); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset + ib_size); OUT_BATCH( 0 ); ADVANCE_BATCH(); } } + +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, + .emit = brw_emit_indices, +}; diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index d1244befd78..b3ae4eef334 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -101,8 +101,9 @@ void brw_pop_insn_state( struct brw_compile *p ) /*********************************************************************** */ -void brw_init_compile( struct brw_compile *p ) +void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) { + p->brw = brw; p->nr_insn = 0; p->current = p->stack; memset(p->current, 0, sizeof(p->current[0])); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 52f89d577ca..8cbe4215fbf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -65,7 +65,7 @@ struct brw_reg GLuint abs:1; /* source only */ GLuint vstride:4; /* source only */ GLuint width:3; /* src only, align1 only */ - GLuint hstride:2; /* src only, align1 only */ + GLuint hstride:2; /* align1 only */ GLuint address_mode:1; /* relative addressing, hopefully! */ GLuint pad0:1; @@ -105,11 +105,12 @@ struct brw_compile { GLuint flag_value; GLboolean single_program_flow; + struct brw_context *brw; }; -static __inline int type_sz( GLuint type ) +static INLINE int type_sz( GLuint type ) { switch( type ) { case BRW_REGISTER_TYPE_UD: @@ -128,7 +129,7 @@ static __inline int type_sz( GLuint type ) } } -static __inline struct brw_reg brw_reg( GLuint file, +static INLINE struct brw_reg brw_reg( GLuint file, GLuint nr, GLuint subnr, GLuint type, @@ -165,7 +166,7 @@ static __inline struct brw_reg brw_reg( GLuint file, return reg; } -static __inline struct brw_reg brw_vec16_reg( GLuint file, +static INLINE struct brw_reg brw_vec16_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -180,7 +181,7 @@ static __inline struct brw_reg brw_vec16_reg( GLuint file, WRITEMASK_XYZW); } -static __inline struct brw_reg brw_vec8_reg( GLuint file, +static INLINE struct brw_reg brw_vec8_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -196,7 +197,7 @@ static __inline struct brw_reg brw_vec8_reg( GLuint file, } -static __inline struct brw_reg brw_vec4_reg( GLuint file, +static INLINE struct brw_reg brw_vec4_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -212,7 +213,7 @@ static __inline struct brw_reg brw_vec4_reg( GLuint file, } -static __inline struct brw_reg brw_vec2_reg( GLuint file, +static INLINE struct brw_reg brw_vec2_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -227,7 +228,7 @@ static __inline struct brw_reg brw_vec2_reg( GLuint file, WRITEMASK_XY); } -static __inline struct brw_reg brw_vec1_reg( GLuint file, +static INLINE struct brw_reg brw_vec1_reg( GLuint file, GLuint nr, GLuint subnr ) { @@ -243,14 +244,14 @@ static __inline struct brw_reg brw_vec1_reg( GLuint file, } -static __inline struct brw_reg retype( struct brw_reg reg, +static INLINE struct brw_reg retype( struct brw_reg reg, GLuint type ) { reg.type = type; return reg; } -static __inline struct brw_reg suboffset( struct brw_reg reg, +static INLINE struct brw_reg suboffset( struct brw_reg reg, GLuint delta ) { reg.subnr += delta * type_sz(reg.type); @@ -258,7 +259,7 @@ static __inline struct brw_reg suboffset( struct brw_reg reg, } -static __inline struct brw_reg offset( struct brw_reg reg, +static INLINE struct brw_reg offset( struct brw_reg reg, GLuint delta ) { reg.nr += delta; @@ -266,7 +267,7 @@ static __inline struct brw_reg offset( struct brw_reg reg, } -static __inline struct brw_reg byte_offset( struct brw_reg reg, +static INLINE struct brw_reg byte_offset( struct brw_reg reg, GLuint bytes ) { GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; @@ -276,28 +277,28 @@ static __inline struct brw_reg byte_offset( struct brw_reg reg, } -static __inline struct brw_reg brw_uw16_reg( GLuint file, +static INLINE struct brw_reg brw_uw16_reg( GLuint file, GLuint nr, GLuint subnr ) { return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static __inline struct brw_reg brw_uw8_reg( GLuint file, +static INLINE struct brw_reg brw_uw8_reg( GLuint file, GLuint nr, GLuint subnr ) { return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static __inline struct brw_reg brw_uw1_reg( GLuint file, +static INLINE struct brw_reg brw_uw1_reg( GLuint file, GLuint nr, GLuint subnr ) { return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); } -static __inline struct brw_reg brw_imm_reg( GLuint type ) +static INLINE struct brw_reg brw_imm_reg( GLuint type ) { return brw_reg( BRW_IMMEDIATE_VALUE, 0, @@ -310,38 +311,38 @@ static __inline struct brw_reg brw_imm_reg( GLuint type ) 0); } -static __inline struct brw_reg brw_imm_f( GLfloat f ) +static INLINE struct brw_reg brw_imm_f( GLfloat f ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); imm.dw1.f = f; return imm; } -static __inline struct brw_reg brw_imm_d( GLint d ) +static INLINE struct brw_reg brw_imm_d( GLint d ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); imm.dw1.d = d; return imm; } -static __inline struct brw_reg brw_imm_ud( GLuint ud ) +static INLINE struct brw_reg brw_imm_ud( GLuint ud ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); imm.dw1.ud = ud; return imm; } -static __inline struct brw_reg brw_imm_uw( GLushort uw ) +static INLINE struct brw_reg brw_imm_uw( GLushort uw ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); - imm.dw1.ud = uw; + imm.dw1.ud = uw | (uw << 16); return imm; } -static __inline struct brw_reg brw_imm_w( GLshort w ) +static INLINE struct brw_reg brw_imm_w( GLshort w ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); - imm.dw1.d = w; + imm.dw1.d = w | (w << 16); return imm; } @@ -351,7 +352,7 @@ static __inline struct brw_reg brw_imm_w( GLshort w ) /* Vector of eight signed half-byte values: */ -static __inline struct brw_reg brw_imm_v( GLuint v ) +static INLINE struct brw_reg brw_imm_v( GLuint v ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); imm.vstride = BRW_VERTICAL_STRIDE_0; @@ -363,7 +364,7 @@ static __inline struct brw_reg brw_imm_v( GLuint v ) /* Vector of four 8-bit float values: */ -static __inline struct brw_reg brw_imm_vf( GLuint v ) +static INLINE struct brw_reg brw_imm_vf( GLuint v ) { struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); imm.vstride = BRW_VERTICAL_STRIDE_0; @@ -377,7 +378,7 @@ static __inline struct brw_reg brw_imm_vf( GLuint v ) #define VF_ONE 0x30 #define VF_NEG (1<<7) -static __inline struct brw_reg brw_imm_vf4( GLuint v0, +static INLINE struct brw_reg brw_imm_vf4( GLuint v0, GLuint v1, GLuint v2, GLuint v3) @@ -394,51 +395,51 @@ static __inline struct brw_reg brw_imm_vf4( GLuint v0, } -static __inline struct brw_reg brw_address( struct brw_reg reg ) +static INLINE struct brw_reg brw_address( struct brw_reg reg ) { return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); } -static __inline struct brw_reg brw_vec1_grf( GLuint nr, +static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) { return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_vec8_grf( GLuint nr, +static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) { return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_vec4_grf( GLuint nr, +static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) { return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_vec2_grf( GLuint nr, +static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) { return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_uw8_grf( GLuint nr, +static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) { return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } -static __inline struct brw_reg brw_null_reg( void ) +static INLINE struct brw_reg brw_null_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); } -static __inline struct brw_reg brw_address_reg( GLuint subnr ) +static INLINE struct brw_reg brw_address_reg( GLuint subnr ) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, @@ -449,7 +450,7 @@ static __inline struct brw_reg brw_address_reg( GLuint subnr ) * aren't xyzw. This goes against the convention for other scalar * regs: */ -static __inline struct brw_reg brw_ip_reg( void ) +static INLINE struct brw_reg brw_ip_reg( void ) { return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_IP, @@ -462,7 +463,7 @@ static __inline struct brw_reg brw_ip_reg( void ) WRITEMASK_XYZW); /* NOTE! */ } -static __inline struct brw_reg brw_acc_reg( void ) +static INLINE struct brw_reg brw_acc_reg( void ) { return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, @@ -470,7 +471,7 @@ static __inline struct brw_reg brw_acc_reg( void ) } -static __inline struct brw_reg brw_flag_reg( void ) +static INLINE struct brw_reg brw_flag_reg( void ) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_FLAG, @@ -478,14 +479,14 @@ static __inline struct brw_reg brw_flag_reg( void ) } -static __inline struct brw_reg brw_mask_reg( GLuint subnr ) +static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) { return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr); } -static __inline struct brw_reg brw_message_reg( GLuint nr ) +static INLINE struct brw_reg brw_message_reg( GLuint nr ) { return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, @@ -498,7 +499,7 @@ static __inline struct brw_reg brw_message_reg( GLuint nr ) /* This is almost always called with a numeric constant argument, so * make things easy to evaluate at compile time: */ -static __inline GLuint cvt( GLuint val ) +static INLINE GLuint cvt( GLuint val ) { switch (val) { case 0: return 0; @@ -512,7 +513,7 @@ static __inline GLuint cvt( GLuint val ) return 0; } -static __inline struct brw_reg stride( struct brw_reg reg, +static INLINE struct brw_reg stride( struct brw_reg reg, GLuint vstride, GLuint width, GLuint hstride ) @@ -524,43 +525,43 @@ static __inline struct brw_reg stride( struct brw_reg reg, return reg; } -static __inline struct brw_reg vec16( struct brw_reg reg ) +static INLINE struct brw_reg vec16( struct brw_reg reg ) { return stride(reg, 16,16,1); } -static __inline struct brw_reg vec8( struct brw_reg reg ) +static INLINE struct brw_reg vec8( struct brw_reg reg ) { return stride(reg, 8,8,1); } -static __inline struct brw_reg vec4( struct brw_reg reg ) +static INLINE struct brw_reg vec4( struct brw_reg reg ) { return stride(reg, 4,4,1); } -static __inline struct brw_reg vec2( struct brw_reg reg ) +static INLINE struct brw_reg vec2( struct brw_reg reg ) { return stride(reg, 2,2,1); } -static __inline struct brw_reg vec1( struct brw_reg reg ) +static INLINE struct brw_reg vec1( struct brw_reg reg ) { return stride(reg, 0,1,0); } -static __inline struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) { return vec1(suboffset(reg, elt)); } -static __inline struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) { return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); } -static __inline struct brw_reg brw_swizzle( struct brw_reg reg, +static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, GLuint x, GLuint y, GLuint z, @@ -574,33 +575,33 @@ static __inline struct brw_reg brw_swizzle( struct brw_reg reg, } -static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, +static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, GLuint x ) { return brw_swizzle(reg, x, x, x, x); } -static __inline struct brw_reg brw_writemask( struct brw_reg reg, +static INLINE struct brw_reg brw_writemask( struct brw_reg reg, GLuint mask ) { reg.dw1.bits.writemask &= mask; return reg; } -static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, +static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, GLuint mask ) { reg.dw1.bits.writemask = mask; return reg; } -static __inline struct brw_reg negate( struct brw_reg reg ) +static INLINE struct brw_reg negate( struct brw_reg reg ) { reg.negate ^= 1; return reg; } -static __inline struct brw_reg brw_abs( struct brw_reg reg ) +static INLINE struct brw_reg brw_abs( struct brw_reg reg ) { reg.abs = 1; return reg; @@ -608,7 +609,7 @@ static __inline struct brw_reg brw_abs( struct brw_reg reg ) /*********************************************************************** */ -static __inline struct brw_reg brw_vec4_indirect( GLuint subnr, +static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, GLint offset ) { struct brw_reg reg = brw_vec4_grf(0, 0); @@ -618,7 +619,7 @@ static __inline struct brw_reg brw_vec4_indirect( GLuint subnr, return reg; } -static __inline struct brw_reg brw_vec1_indirect( GLuint subnr, +static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, GLint offset ) { struct brw_reg reg = brw_vec1_grf(0, 0); @@ -628,38 +629,48 @@ static __inline struct brw_reg brw_vec1_indirect( GLuint subnr, return reg; } -static __inline struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) { return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); } -static __inline struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) { return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); } -static __inline struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) { return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); } -static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) { return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); } -static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr) { return brw_address_reg(ptr.addr_subnr); } -static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) { ptr.addr_offset += offset; return ptr; } -static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) { struct brw_indirect ptr; ptr.addr_subnr = addr_subnr; @@ -668,7 +679,10 @@ static __inline struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offse return ptr; } - +static INLINE struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} void brw_pop_insn_state( struct brw_compile *p ); void brw_push_insn_state( struct brw_compile *p ); @@ -680,7 +694,7 @@ void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); -void brw_init_compile( struct brw_compile *p ); +void brw_init_compile( struct brw_context *, struct brw_compile *p ); const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); @@ -808,9 +822,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size); -void brw_WHILE(struct brw_compile *p, +struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); /* Forward jumps: */ void brw_land_fwd_jump(struct brw_compile *p, @@ -860,5 +876,6 @@ void brw_math_invert( struct brw_compile *p, struct brw_reg dst, struct brw_reg src); - +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c index 2dff1ad2244..91dbbd5af62 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_debug.c +++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c @@ -30,9 +30,9 @@ */ -#include "mtypes.h" +#include "main/mtypes.h" +#include "main/imports.h" #include "brw_eu.h" -#include "imports.h" void brw_print_reg( struct brw_reg hwreg ) { diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9992b47d8ae..ce4cf46cfa6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -64,7 +64,9 @@ static void brw_set_dest( struct brw_instruction *insn, if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits1.da1.dest_subreg_nr = dest.subnr; - insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; } else { insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; @@ -78,7 +80,9 @@ static void brw_set_dest( struct brw_instruction *insn, */ if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; - insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; } else { insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; @@ -164,7 +168,7 @@ static void brw_set_src0( struct brw_instruction *insn, } -static void brw_set_src1( struct brw_instruction *insn, +void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); @@ -186,7 +190,7 @@ static void brw_set_src1( struct brw_instruction *insn, * in the future: */ assert (reg.address_mode == BRW_ADDRESS_DIRECT); - assert (reg.file == BRW_GENERAL_REGISTER_FILE); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; @@ -318,7 +322,8 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, insn->bits3.dp_read.end_of_thread = end_of_thread; } -static void brw_set_sampler_message( struct brw_instruction *insn, +static void brw_set_sampler_message(struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint sampler, GLuint msg_type, @@ -328,14 +333,24 @@ static void brw_set_sampler_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.sampler.binding_table_index = binding_table_index; - insn->bits3.sampler.sampler = sampler; - insn->bits3.sampler.msg_type = msg_type; - insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; - insn->bits3.sampler.response_length = response_length; - insn->bits3.sampler.msg_length = msg_length; - insn->bits3.sampler.end_of_thread = eot; - insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + if (BRW_IS_G4X(brw)) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } } @@ -502,6 +517,8 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.predicate_control = BRW_PREDICATE_NORMAL; insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; p->current->header.predicate_control = BRW_PREDICATE_NONE; @@ -527,6 +544,8 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = if_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; /* Patch the if instruction to point at this instruction. */ @@ -568,6 +587,7 @@ void brw_ENDIF(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; assert(patch_insn->bits3.if_else.jump_count == 0); @@ -597,6 +617,34 @@ void brw_ENDIF(struct brw_compile *p, } } +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + /* DO/WHILE loop: */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) @@ -608,13 +656,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ - brw_set_dest(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ return insn; } @@ -622,7 +672,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) -void brw_WHILE(struct brw_compile *p, +struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -646,14 +696,16 @@ void brw_WHILE(struct brw_compile *p, insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.jump_count = do_insn - insn + 1; insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; } @@ -985,7 +1037,7 @@ void brw_SAMPLE(struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, src0); - brw_set_sampler_message(insn, + brw_set_sampler_message(p->brw, insn, binding_table_index, sampler, msg_type, diff --git a/src/mesa/drivers/dri/i965/brw_exec_generic.c b/src/mesa/drivers/dri/i965/brw_exec_generic.c deleted file mode 100644 index 11d1ef76f59..00000000000 --- a/src/mesa/drivers/dri/i965/brw_exec_generic.c +++ /dev/null @@ -1,530 +0,0 @@ -/************************************************************************** - -Copyright 2004 Tungsten Graphics Inc., Cedar Park, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <[email protected]> - */ - -#include "glheader.h" -#include "context.h" -#include "macros.h" -#include "vtxfmt.h" -#include "dlist.h" -#include "state.h" -#include "light.h" -#include "api_arrayelt.h" -#include "api_noop.h" - -#include "brw_exec.h" - - -/* Versions of all the entrypoints for situations where codegen isn't - * available. - * - * Note: Only one size for each attribute may be active at once. - * Eg. if Color3f is installed/active, then Color4f may not be, even - * if the vertex actually contains 4 color coordinates. This is - * because the 3f version won't otherwise set color[3] to 1.0 -- this - * is the job of the chooser function when switching between Color4f - * and Color3f. - */ -#define ATTRFV( ATTR, N ) \ -static void attrib_##ATTR##_##N( const GLfloat *v ) \ -{ \ - GET_CURRENT_CONTEXT( ctx ); \ - struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; \ - \ - if ((ATTR) == 0) { \ - GLuint i; \ - \ - if (N>0) exec->vtx.vbptr[0] = v[0]; \ - if (N>1) exec->vtx.vbptr[1] = v[1]; \ - if (N>2) exec->vtx.vbptr[2] = v[2]; \ - if (N>3) exec->vtx.vbptr[3] = v[3]; \ - \ - for (i = N; i < exec->vtx.vertex_size; i++) \ - exec->vtx.vbptr[i] = exec->vtx.vertex[i]; \ - \ - exec->vtx.vbptr += exec->vtx.vertex_size; \ - exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; \ - \ - if (++exec->vtx.vert_count >= exec->vtx.max_vert) \ - brw_exec_vtx_wrap( exec ); \ - } \ - else { \ - GLfloat *dest = exec->vtx.attrptr[ATTR]; \ - if (N>0) dest[0] = v[0]; \ - if (N>1) dest[1] = v[1]; \ - if (N>2) dest[2] = v[2]; \ - if (N>3) dest[3] = v[3]; \ - } \ -} - -#define INIT(TAB, ATTR) \ - TAB[ATTR][0] = attrib_##ATTR##_1; \ - TAB[ATTR][1] = attrib_##ATTR##_2; \ - TAB[ATTR][2] = attrib_##ATTR##_3; \ - TAB[ATTR][3] = attrib_##ATTR##_4; - - -#define ATTRS( ATTRIB ) \ - ATTRFV( ATTRIB, 1 ) \ - ATTRFV( ATTRIB, 2 ) \ - ATTRFV( ATTRIB, 3 ) \ - ATTRFV( ATTRIB, 4 ) - -ATTRS( 0 ) -ATTRS( 1 ) -ATTRS( 2 ) -ATTRS( 3 ) -ATTRS( 4 ) -ATTRS( 5 ) -ATTRS( 6 ) -ATTRS( 7 ) -ATTRS( 8 ) -ATTRS( 9 ) -ATTRS( 10 ) -ATTRS( 11 ) -ATTRS( 12 ) -ATTRS( 13 ) -ATTRS( 14 ) -ATTRS( 15 ) - -void brw_exec_generic_attr_table_init( brw_attrfv_func (*tab)[4] ) -{ - INIT( tab, 0 ); - INIT( tab, 1 ); - INIT( tab, 2 ); - INIT( tab, 3 ); - INIT( tab, 4 ); - INIT( tab, 5 ); - INIT( tab, 6 ); - INIT( tab, 7 ); - INIT( tab, 8 ); - INIT( tab, 9 ); - INIT( tab, 10 ); - INIT( tab, 11 ); - INIT( tab, 12 ); - INIT( tab, 13 ); - INIT( tab, 14 ); - INIT( tab, 15 ); -} - -/* These can be made efficient with codegen. Further, by adding more - * logic to do_choose(), the double-dispatch for legacy entrypoints - * like glVertex3f() can be removed. - */ -#define DISPATCH_ATTRFV( ATTR, COUNT, P ) \ -do { \ - GET_CURRENT_CONTEXT( ctx ); \ - struct brw_exec_context *exec = IMM_CONTEXT(ctx)->exec; \ - exec->vtx.tabfv[ATTR][COUNT-1]( P ); \ -} while (0) - -#define DISPATCH_ATTR1FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 1, V ) -#define DISPATCH_ATTR2FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 2, V ) -#define DISPATCH_ATTR3FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 3, V ) -#define DISPATCH_ATTR4FV( ATTR, V ) DISPATCH_ATTRFV( ATTR, 4, V ) - -#define DISPATCH_ATTR1F( ATTR, S ) DISPATCH_ATTRFV( ATTR, 1, &(S) ) - -#define DISPATCH_ATTR2F( ATTR, S,T ) \ -do { \ - GLfloat v[2]; \ - v[0] = S; v[1] = T; \ - DISPATCH_ATTR2FV( ATTR, v ); \ -} while (0) -#define DISPATCH_ATTR3F( ATTR, S,T,R ) \ -do { \ - GLfloat v[3]; \ - v[0] = S; v[1] = T; v[2] = R; \ - DISPATCH_ATTR3FV( ATTR, v ); \ -} while (0) -#define DISPATCH_ATTR4F( ATTR, S,T,R,Q ) \ -do { \ - GLfloat v[4]; \ - v[0] = S; v[1] = T; v[2] = R; v[3] = Q; \ - DISPATCH_ATTR4FV( ATTR, v ); \ -} while (0) - - -static void GLAPIENTRY brw_Vertex2f( GLfloat x, GLfloat y ) -{ - DISPATCH_ATTR2F( BRW_ATTRIB_POS, x, y ); -} - -static void GLAPIENTRY brw_Vertex2fv( const GLfloat *v ) -{ - DISPATCH_ATTR2FV( BRW_ATTRIB_POS, v ); -} - -static void GLAPIENTRY brw_Vertex3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_POS, x, y, z ); -} - -static void GLAPIENTRY brw_Vertex3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_POS, v ); -} - -static void GLAPIENTRY brw_Vertex4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( BRW_ATTRIB_POS, x, y, z, w ); -} - -static void GLAPIENTRY brw_Vertex4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( BRW_ATTRIB_POS, v ); -} - -static void GLAPIENTRY brw_TexCoord1f( GLfloat x ) -{ - DISPATCH_ATTR1F( BRW_ATTRIB_TEX0, x ); -} - -static void GLAPIENTRY brw_TexCoord1fv( const GLfloat *v ) -{ - DISPATCH_ATTR1FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_TexCoord2f( GLfloat x, GLfloat y ) -{ - DISPATCH_ATTR2F( BRW_ATTRIB_TEX0, x, y ); -} - -static void GLAPIENTRY brw_TexCoord2fv( const GLfloat *v ) -{ - DISPATCH_ATTR2FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_TexCoord3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_TEX0, x, y, z ); -} - -static void GLAPIENTRY brw_TexCoord3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_TexCoord4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( BRW_ATTRIB_TEX0, x, y, z, w ); -} - -static void GLAPIENTRY brw_TexCoord4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( BRW_ATTRIB_TEX0, v ); -} - -static void GLAPIENTRY brw_Normal3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_NORMAL, x, y, z ); -} - -static void GLAPIENTRY brw_Normal3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_NORMAL, v ); -} - -static void GLAPIENTRY brw_FogCoordfEXT( GLfloat x ) -{ - DISPATCH_ATTR1F( BRW_ATTRIB_FOG, x ); -} - -static void GLAPIENTRY brw_FogCoordfvEXT( const GLfloat *v ) -{ - DISPATCH_ATTR1FV( BRW_ATTRIB_FOG, v ); -} - -static void GLAPIENTRY brw_Color3f( GLfloat x, GLfloat y, GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_COLOR0, x, y, z ); -} - -static void GLAPIENTRY brw_Color3fv( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR0, v ); -} - -static void GLAPIENTRY brw_Color4f( GLfloat x, GLfloat y, GLfloat z, - GLfloat w ) -{ - DISPATCH_ATTR4F( BRW_ATTRIB_COLOR0, x, y, z, w ); -} - -static void GLAPIENTRY brw_Color4fv( const GLfloat *v ) -{ - DISPATCH_ATTR4FV( BRW_ATTRIB_COLOR0, v ); -} - -static void GLAPIENTRY brw_SecondaryColor3fEXT( GLfloat x, GLfloat y, - GLfloat z ) -{ - DISPATCH_ATTR3F( BRW_ATTRIB_COLOR1, x, y, z ); -} - -static void GLAPIENTRY brw_SecondaryColor3fvEXT( const GLfloat *v ) -{ - DISPATCH_ATTR3FV( BRW_ATTRIB_COLOR1, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord1f( GLenum target, GLfloat x ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR1F( attr, x ); -} - -static void GLAPIENTRY brw_MultiTexCoord1fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR1FV( attr, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord2f( GLenum target, GLfloat x, - GLfloat y ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR2F( attr, x, y ); -} - -static void GLAPIENTRY brw_MultiTexCoord2fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR2FV( attr, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord3f( GLenum target, GLfloat x, - GLfloat y, GLfloat z) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR3F( attr, x, y, z ); -} - -static void GLAPIENTRY brw_MultiTexCoord3fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR3FV( attr, v ); -} - -static void GLAPIENTRY brw_MultiTexCoord4f( GLenum target, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR4F( attr, x, y, z, w ); -} - -static void GLAPIENTRY brw_MultiTexCoord4fv( GLenum target, - const GLfloat *v ) -{ - GLuint attr = (target & 0x7) + BRW_ATTRIB_TEX0; - DISPATCH_ATTR4FV( attr, v ); -} - - -static void GLAPIENTRY brw_VertexAttrib1fNV( GLuint index, GLfloat x ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1F( index, x ); -} - -static void GLAPIENTRY brw_VertexAttrib1fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib2fNV( GLuint index, GLfloat x, - GLfloat y ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2F( index, x, y ); -} - -static void GLAPIENTRY brw_VertexAttrib2fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib3fNV( GLuint index, GLfloat x, - GLfloat y, GLfloat z ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3F( index, x, y, z ); -} - -static void GLAPIENTRY brw_VertexAttrib3fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib4fNV( GLuint index, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4F( index, x, y, z, w ); -} - -static void GLAPIENTRY brw_VertexAttrib4fvNV( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4FV( index, v ); -} - - -/* - * XXX adjust index - */ - -static void GLAPIENTRY brw_VertexAttrib1fARB( GLuint index, GLfloat x ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1F( index, x ); -} - -static void GLAPIENTRY brw_VertexAttrib1fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR1FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib2fARB( GLuint index, GLfloat x, - GLfloat y ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2F( index, x, y ); -} - -static void GLAPIENTRY brw_VertexAttrib2fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR2FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib3fARB( GLuint index, GLfloat x, - GLfloat y, GLfloat z ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3F( index, x, y, z ); -} - -static void GLAPIENTRY brw_VertexAttrib3fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR3FV( index, v ); -} - -static void GLAPIENTRY brw_VertexAttrib4fARB( GLuint index, GLfloat x, - GLfloat y, GLfloat z, - GLfloat w ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4F( index, x, y, z, w ); -} - -static void GLAPIENTRY brw_VertexAttrib4fvARB( GLuint index, - const GLfloat *v ) -{ - if (index >= BRW_ATTRIB_FIRST_MATERIAL) index = ERROR_ATTRIB; - DISPATCH_ATTR4FV( index, v ); -} - - -/* Install the generic versions of the 2nd level dispatch - * functions. Some of these have a codegen alternative. - */ -void brw_exec_vtx_generic_init( struct brw_exec_context *exec ) -{ - GLvertexformat *vfmt = &exec->vtxfmt; - - vfmt->Color3f = brw_Color3f; - vfmt->Color3fv = brw_Color3fv; - vfmt->Color4f = brw_Color4f; - vfmt->Color4fv = brw_Color4fv; - vfmt->FogCoordfEXT = brw_FogCoordfEXT; - vfmt->FogCoordfvEXT = brw_FogCoordfvEXT; - vfmt->MultiTexCoord1fARB = brw_MultiTexCoord1f; - vfmt->MultiTexCoord1fvARB = brw_MultiTexCoord1fv; - vfmt->MultiTexCoord2fARB = brw_MultiTexCoord2f; - vfmt->MultiTexCoord2fvARB = brw_MultiTexCoord2fv; - vfmt->MultiTexCoord3fARB = brw_MultiTexCoord3f; - vfmt->MultiTexCoord3fvARB = brw_MultiTexCoord3fv; - vfmt->MultiTexCoord4fARB = brw_MultiTexCoord4f; - vfmt->MultiTexCoord4fvARB = brw_MultiTexCoord4fv; - vfmt->Normal3f = brw_Normal3f; - vfmt->Normal3fv = brw_Normal3fv; - vfmt->SecondaryColor3fEXT = brw_SecondaryColor3fEXT; - vfmt->SecondaryColor3fvEXT = brw_SecondaryColor3fvEXT; - vfmt->TexCoord1f = brw_TexCoord1f; - vfmt->TexCoord1fv = brw_TexCoord1fv; - vfmt->TexCoord2f = brw_TexCoord2f; - vfmt->TexCoord2fv = brw_TexCoord2fv; - vfmt->TexCoord3f = brw_TexCoord3f; - vfmt->TexCoord3fv = brw_TexCoord3fv; - vfmt->TexCoord4f = brw_TexCoord4f; - vfmt->TexCoord4fv = brw_TexCoord4fv; - vfmt->Vertex2f = brw_Vertex2f; - vfmt->Vertex2fv = brw_Vertex2fv; - vfmt->Vertex3f = brw_Vertex3f; - vfmt->Vertex3fv = brw_Vertex3fv; - vfmt->Vertex4f = brw_Vertex4f; - vfmt->Vertex4fv = brw_Vertex4fv; - vfmt->VertexAttrib1fNV = brw_VertexAttrib1fNV; - vfmt->VertexAttrib1fvNV = brw_VertexAttrib1fvNV; - vfmt->VertexAttrib2fNV = brw_VertexAttrib2fNV; - vfmt->VertexAttrib2fvNV = brw_VertexAttrib2fvNV; - vfmt->VertexAttrib3fNV = brw_VertexAttrib3fNV; - vfmt->VertexAttrib3fvNV = brw_VertexAttrib3fvNV; - vfmt->VertexAttrib4fNV = brw_VertexAttrib4fNV; - vfmt->VertexAttrib4fvNV = brw_VertexAttrib4fvNV; - vfmt->VertexAttrib1fARB = brw_VertexAttrib1fARB; - vfmt->VertexAttrib1fvARB = brw_VertexAttrib1fvARB; - vfmt->VertexAttrib2fARB = brw_VertexAttrib2fARB; - vfmt->VertexAttrib2fvARB = brw_VertexAttrib2fvARB; - vfmt->VertexAttrib3fARB = brw_VertexAttrib3fARB; - vfmt->VertexAttrib3fvARB = brw_VertexAttrib3fvARB; - vfmt->VertexAttrib4fARB = brw_VertexAttrib4fARB; - vfmt->VertexAttrib4fvARB = brw_VertexAttrib4fvARB; -} diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 86464b2ec5f..4ea660a51a3 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -25,52 +25,47 @@ * **************************************************************************/ +#include "main/glheader.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/mtypes.h" + #include "swrast_setup/swrast_setup.h" #include "swrast/swrast.h" #include "tnl/tnl.h" -#include "context.h" #include "brw_context.h" #include "brw_fallback.h" -#include "glheader.h" -#include "enums.h" -#include "glapi.h" -#include "imports.h" -#include "macros.h" -#include "mtypes.h" - - - - - +#include "glapi/glapi.h" +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS static GLboolean do_check_fallback(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; GLuint i; - + /* BRW_NEW_METAOPS */ if (brw->metaops.active) return GL_FALSE; - if (brw->intel.no_rast) - return GL_TRUE; - - /* _NEW_BUFFERS - */ - if (ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_FRONT_LEFT && - ctx->DrawBuffer->_ColorDrawBufferMask[0] != BUFFER_BIT_BACK_LEFT) + if (brw->intel.no_rast) { + DBG("FALLBACK: rasterization disabled\n"); return GL_TRUE; + } /* _NEW_RENDERMODE * * XXX: need to save/restore RenderMode in metaops state, or * somehow move to a new attribs pointer: */ - if (ctx->RenderMode != GL_RENDER) + if (ctx->RenderMode != GL_RENDER) { + DBG("FALLBACK: render mode\n"); return GL_TRUE; + } /* _NEW_TEXTURE: */ @@ -79,8 +74,10 @@ static GLboolean do_check_fallback(struct brw_context *brw) if (texUnit->_ReallyEnabled) { struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current); struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel]; - if (texImage->Border) + if (texImage->Border) { + DBG("FALLBACK: texture border\n"); return GL_TRUE; + } } } @@ -88,6 +85,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) */ if (brw->attribs.Stencil->Enabled && !brw->intel.hw_stencil) { + DBG("FALLBACK: stencil\n"); return GL_TRUE; } @@ -106,7 +104,7 @@ const struct brw_tracked_state brw_check_fallback = { .brw = BRW_NEW_METAOPS, .cache = 0 }, - .update = check_fallback + .prepare = check_fallback }; diff --git a/src/mesa/drivers/dri/i965/brw_fallback.h b/src/mesa/drivers/dri/i965/brw_fallback.h index 684a46cd170..50dcdacd17a 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.h +++ b/src/mesa/drivers/dri/i965/brw_fallback.h @@ -28,7 +28,7 @@ #ifndef BRW_FALLBACK_H #define BRW_FALLBACK_H -#include "mtypes.h" /* for GLcontext... */ +#include "main/mtypes.h" /* for GLcontext... */ struct brw_context; struct vbo_prim; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 73263a5fff4..a8b74a0afe6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -29,9 +29,9 @@ * Keith Whitwell <[email protected]> */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "intel_batchbuffer.h" @@ -65,7 +65,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Begin the compilation: */ - brw_init_compile(&c.func); + brw_init_compile(brw, &c.func); c.func.single_program_flow = 1; @@ -119,26 +119,15 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ - brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->gs.prog_data ); + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_GS_PROG], - key, sizeof(*key), - &brw->gs.prog_data, - &brw->gs.prog_gs_offset); -} - - static const GLenum gs_prim[GL_POLYGON+1] = { GL_POINTS, GL_LINES, @@ -173,10 +162,9 @@ static void populate_key( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_gs_prog( struct brw_context *brw ) +static void prepare_gs_prog(struct brw_context *brw) { struct brw_gs_prog_key key; - /* Populate the key: */ populate_key(brw, &key); @@ -187,7 +175,12 @@ static void upload_gs_prog( struct brw_context *brw ) } if (brw->gs.prog_active) { - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data); + if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); } } @@ -199,5 +192,5 @@ const struct brw_tracked_state brw_gs_prog = { .brw = BRW_NEW_PRIMITIVE, .cache = CACHE_NEW_VS_PROG }, - .update = upload_gs_prog + .prepare = prepare_gs_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 29a4e80ce1b..18a4537c323 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -40,11 +40,11 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { + GLuint attrs:32; GLuint primitive:4; - GLuint attrs:16; GLuint hint_gs_always:1; GLuint need_gs_prog:1; - GLuint pad:10; + GLuint pad:26; }; struct brw_gs_compile { diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 9abb94d82ed..22e0d25c2e7 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -30,9 +30,9 @@ */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "shader/program.h" #include "intel_batchbuffer.h" diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 5826c01d4f9..27023cf0344 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -34,49 +34,103 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "macros.h" +#include "main/macros.h" +struct brw_gs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curbe_offset; -static void upload_gs_unit( struct brw_context *brw ) -{ - struct brw_gs_unit_state gs; + unsigned int nr_urb_entries, urb_size; + GLboolean prog_active; +}; - memset(&gs, 0, sizeof(gs)); +static void +gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); /* CACHE_NEW_GS_PROG */ - if (brw->gs.prog_active) { - gs.thread0.grf_reg_count = ((brw->gs.prog_data->total_grf-1) & ~15) / 16; - gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; - gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; - } - else { - gs.thread0.grf_reg_count = 0; - gs.thread0.kernel_start_pointer = 0; - gs.thread3.urb_entry_read_length = 1; + key->prog_active = brw->gs.prog_active; + if (key->prog_active) { + key->total_grf = brw->gs.prog_data->total_grf; + key->urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } else { + key->total_grf = 1; + key->urb_entry_read_length = 1; } + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + /* BRW_NEW_URB_FENCE */ - gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; - gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + key->nr_urb_entries = brw->urb.nr_gs_entries; + key->urb_size = brw->urb.vsize; +} - gs.thread4.max_threads = 0; /* Hardware requirement */ +static dri_bo * +gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + struct brw_gs_unit_state gs; + dri_bo *bo; - if (INTEL_DEBUG & DEBUG_STATS) - gs.thread4.stats_enable = 1; + memset(&gs, 0, sizeof(gs)); + + gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + if (key->prog_active) /* reloc */ + gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; - /* CONSTANT */ gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; gs.thread3.const_urb_entry_read_offset = 0; gs.thread3.const_urb_entry_read_length = 0; gs.thread3.urb_entry_read_offset = 0; - + gs.thread3.urb_entry_read_length = key->urb_entry_read_length; + + gs.thread4.nr_urb_entries = key->nr_urb_entries; + gs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (INTEL_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + &brw->gs.prog_bo, 1, + &gs, sizeof(gs), + NULL, NULL); - brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); + if (key->prog_active) { + /* Emit GS program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + + return bo; } +static void prepare_gs_unit(struct brw_context *brw) +{ + struct brw_gs_unit_key key; + + gs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->gs.state_bo); + brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT, + &key, sizeof(key), + &brw->gs.prog_bo, 1, + NULL); + if (brw->gs.state_bo == NULL) { + brw->gs.state_bo = gs_unit_create_from_key(brw, &key); + } +} const struct brw_tracked_state brw_gs_unit = { .dirty = { @@ -85,5 +139,5 @@ const struct brw_tracked_state brw_gs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG }, - .update = upload_gs_unit + .prepare = prepare_gs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_hal.c b/src/mesa/drivers/dri/i965/brw_hal.c deleted file mode 100644 index 3126102749b..00000000000 --- a/src/mesa/drivers/dri/i965/brw_hal.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - -#include "intel_batchbuffer.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_hal.h" -#include <dlfcn.h> - -static void *brw_hal_lib; -static GLboolean brw_hal_tried; - -void * -brw_hal_find_symbol (char *symbol) -{ - if (!brw_hal_tried) - { - char *brw_hal_name = getenv ("INTEL_HAL"); - - if (!brw_hal_name) - brw_hal_name = "/usr/lib/xorg/modules/drivers/intel_hal.so"; - - brw_hal_lib = dlopen (brw_hal_name, RTLD_LAZY|RTLD_LOCAL); - brw_hal_tried = 1; - } - if (!brw_hal_lib) - return NULL; - return dlsym (brw_hal_lib, symbol); -} diff --git a/src/mesa/drivers/dri/i965/brw_hal.h b/src/mesa/drivers/dri/i965/brw_hal.h deleted file mode 100644 index cd86e395b59..00000000000 --- a/src/mesa/drivers/dri/i965/brw_hal.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - -void * -brw_hal_find_symbol (char *symbol); diff --git a/src/mesa/drivers/dri/i965/brw_metaops.c b/src/mesa/drivers/dri/i965/brw_metaops.c index 6e030f191ef..41bfa2e2567 100644 --- a/src/mesa/drivers/dri/i965/brw_metaops.c +++ b/src/mesa/drivers/dri/i965/brw_metaops.c @@ -32,15 +32,16 @@ -#include "glheader.h" -#include "context.h" -#include "macros.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" #include "shader/arbprogparse.h" #include "intel_screen.h" #include "intel_batchbuffer.h" #include "intel_regions.h" +#include "intel_buffers.h" #include "brw_context.h" #include "brw_defines.h" @@ -156,7 +157,7 @@ static const char *fp_tex_prog = * FragmentProgram->_Current * VertexProgram->_Enabled * brw->vertex_program - * DrawBuffer->_ColorDrawBufferMask[0] + * DrawBuffer->_ColorDrawBufferIndexes[0] * * * More if drawpixels-through-texture is added. @@ -195,7 +196,7 @@ static void init_metaops_state( struct brw_context *brw ) vp_prog, strlen(vp_prog), brw->metaops.vp); - brw->metaops.attribs.VertexProgram->Current = brw->metaops.vp; + brw->metaops.attribs.VertexProgram->_Current = brw->metaops.vp; brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE; brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; @@ -361,13 +362,21 @@ static void meta_draw_region( struct intel_context *intel, struct brw_context *brw = brw_context(&intel->ctx); if (!brw->metaops.saved_draw_region) { - brw->metaops.saved_draw_region = brw->state.draw_region; + brw->metaops.saved_draw_region = brw->state.draw_regions[0]; + brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions; brw->metaops.saved_depth_region = brw->state.depth_region; } - brw->state.draw_region = draw_region; + brw->state.draw_regions[0] = draw_region; + brw->state.nr_draw_regions = 1; brw->state.depth_region = depth_region; + if (intel->frame_buffer_texobj != NULL) + brw_FrameBufferTexDestroy(brw); + + if (draw_region) + brw_FrameBufferTexInit(brw, draw_region); + brw->state.dirty.mesa |= _NEW_BUFFERS; } @@ -376,8 +385,7 @@ static void meta_draw_quad(struct intel_context *intel, GLfloat x0, GLfloat x1, GLfloat y0, GLfloat y1, GLfloat z, - GLubyte red, GLubyte green, - GLubyte blue, GLubyte alpha, + GLuint color, GLfloat s0, GLfloat s1, GLfloat t0, GLfloat t1) { @@ -388,7 +396,6 @@ static void meta_draw_quad(struct intel_context *intel, struct gl_client_array *attribs[VERT_ATTRIB_MAX]; struct _mesa_prim prim[1]; GLfloat pos[4][3]; - GLubyte color[4]; ctx->Driver.BufferData(ctx, GL_ARRAY_BUFFER_ARB, @@ -413,7 +420,6 @@ static void meta_draw_quad(struct intel_context *intel, pos[3][1] = y1; pos[3][2] = z; - ctx->Driver.BufferSubData(ctx, GL_ARRAY_BUFFER_ARB, 0, @@ -421,16 +427,15 @@ static void meta_draw_quad(struct intel_context *intel, pos, brw->metaops.vbo); - color[0] = red; - color[1] = green; - color[2] = blue; - color[3] = alpha; + /* Convert incoming ARGB to required RGBA */ + /* Note this color is stored as GL_UNSIGNED_BYTE */ + color = (color & 0xff00ff00) | (((color >> 16) | (color << 16)) & 0xff00ff); ctx->Driver.BufferSubData(ctx, GL_ARRAY_BUFFER_ARB, sizeof(pos), sizeof(color), - color, + &color, brw->metaops.vbo); /* Ignoring texture coords. @@ -486,6 +491,7 @@ static void install_meta_state( struct intel_context *intel ) { GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); + GLuint i; if (!brw->metaops.vbo) { init_metaops_state(brw); @@ -495,12 +501,18 @@ static void install_meta_state( struct intel_context *intel ) meta_no_texture(&brw->intel); meta_flat_shade(&brw->intel); - brw->metaops.restore_draw_mask = ctx->DrawBuffer->_ColorDrawBufferMask[0]; + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + brw->metaops.restore_draw_buffers[i] + = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; + } + brw->metaops.restore_num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; + brw->metaops.restore_fp = ctx->FragmentProgram.Current; /* This works without adjusting refcounts. Fix later? */ - brw->metaops.saved_draw_region = brw->state.draw_region; + brw->metaops.saved_draw_region = brw->state.draw_regions[0]; + brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions; brw->metaops.saved_depth_region = brw->state.depth_region; brw->metaops.active = 1; @@ -511,13 +523,20 @@ static void leave_meta_state( struct intel_context *intel ) { GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(ctx); + GLuint i; restore_attribs(brw); - ctx->DrawBuffer->_ColorDrawBufferMask[0] = brw->metaops.restore_draw_mask; + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + ctx->DrawBuffer->_ColorDrawBufferIndexes[i] + = brw->metaops.restore_draw_buffers[i]; + } + ctx->DrawBuffer->_NumColorDrawBuffers = brw->metaops.restore_num_draw_buffers; + ctx->FragmentProgram.Current = brw->metaops.restore_fp; - brw->state.draw_region = brw->metaops.saved_draw_region; + brw->state.draw_regions[0] = brw->metaops.saved_draw_region; + brw->state.nr_draw_regions = brw->metaops.saved_nr_draw_regions; brw->state.depth_region = brw->metaops.saved_depth_region; brw->metaops.saved_draw_region = NULL; brw->metaops.saved_depth_region = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index d5779680ffc..627705fa9ba 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -68,148 +68,114 @@ const struct brw_tracked_state brw_blend_constant_color = { .brw = 0, .cache = 0 }, - .update = upload_blend_constant_color + .emit = upload_blend_constant_color }; -/*********************************************************************** - * Drawing rectangle -- Need for AUB file only. - */ +/* Constant single cliprect for framebuffer object or DRI2 drawing */ static void upload_drawing_rect(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - struct brw_drawrect bdr; - int x1, y1; - int x2, y2; + GLcontext *ctx = &intel->ctx; - /* If there is a single cliprect, set it here. Otherwise iterate - * over them in brw_draw_prim(). - */ - if (brw->intel.numClipRects > 1) - return; - - x1 = brw->intel.pClipRects[0].x1; - y1 = brw->intel.pClipRects[0].y1; - x2 = brw->intel.pClipRects[0].x2; - y2 = brw->intel.pClipRects[0].y2; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; - if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; - - memset(&bdr, 0, sizeof(bdr)); - bdr.header.opcode = CMD_DRAW_RECT; - bdr.header.length = sizeof(bdr)/4 - 2; - bdr.xmin = x1; - bdr.ymin = y1; - bdr.xmax = x2; - bdr.ymax = y2; - bdr.xorg = dPriv->x; - bdr.yorg = dPriv->y; - - /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted - * uncached in brw_draw.c: - */ - BRW_BATCH_STRUCT(brw, &bdr); + if (!intel->constant_cliprect) + return; + + BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(0); /* xmin, ymin */ + OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | + ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_drawing_rect = { .dirty = { - .mesa = _NEW_WINDOW_POS, + .mesa = _NEW_BUFFERS, .brw = 0, .cache = 0 }, - .update = upload_drawing_rect + .emit = upload_drawing_rect }; -/*********************************************************************** - * Binding table pointers - */ +static void prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->wm.bind_bo); +} +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ static void upload_binding_table_pointers(struct brw_context *brw) { - struct brw_binding_table_pointers btp; - memset(&btp, 0, sizeof(btp)); - - /* The binding table has been emitted to the SS pool already, so we - * know what its offset is. When the batch buffer is fired, the - * binding table and surface structs will get fixed up to point to - * where the textures actually landed, but that won't change the - * value of the offsets here: - */ - btp.header.opcode = CMD_BINDING_TABLE_PTRS; - btp.header.length = sizeof(btp)/4 - 2; - btp.vs = 0; - btp.gs = 0; - btp.clp = 0; - btp.sf = 0; - btp.wm = brw->wm.bind_ss_offset; - - BRW_CACHED_BATCH_STRUCT(brw, &btp); + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + OUT_RELOC(brw->wm.bind_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_binding_table_pointers = { .dirty = { .mesa = 0, - .brw = 0, - .cache = CACHE_NEW_SURF_BIND + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, }, - .update = upload_binding_table_pointers + .prepare = prepare_binding_table_pointers, + .emit = upload_binding_table_pointers, }; -/*********************************************************************** - * Pipelined state pointers. This is the key state packet from which - * the hardware chases pointers to all the uploaded state in VRAM. +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ - static void upload_pipelined_state_pointers(struct brw_context *brw ) { - struct brw_pipelined_state_pointers psp; - memset(&psp, 0, sizeof(psp)); - - psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; - psp.header.length = sizeof(psp)/4 - 2; - - psp.vs.offset = brw->vs.state_gs_offset >> 5; - psp.sf.offset = brw->sf.state_gs_offset >> 5; - psp.wm.offset = brw->wm.state_gs_offset >> 5; - psp.cc.offset = brw->cc.state_gs_offset >> 5; - - /* GS gets turned on and off regularly. Need to re-emit URB fence - * after this occurs. - */ - if (brw->gs.prog_active) { - psp.gs.offset = brw->gs.state_gs_offset >> 5; - psp.gs.enable = 1; - } + struct intel_context *intel = &brw->intel; - if (!brw->metaops.active) { - psp.clp.offset = brw->clip.state_gs_offset >> 5; - psp.clp.enable = 1; - } + BEGIN_BATCH(7, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + if (brw->gs.prog_active) + OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); + if (!brw->metaops.active) + OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + brw->state.dirty.brw |= BRW_NEW_PSP; +} - if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) - brw->state.dirty.brw |= BRW_NEW_PSP; +static void prepare_psp_urb_cbs(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); } -const struct brw_tracked_state brw_pipelined_state_pointers = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_METAOPS, - .cache = (CACHE_NEW_VS_UNIT | - CACHE_NEW_GS_UNIT | - CACHE_NEW_GS_PROG | - CACHE_NEW_CLIP_UNIT | - CACHE_NEW_SF_UNIT | - CACHE_NEW_WM_UNIT | - CACHE_NEW_CC_UNIT) - }, - .update = upload_pipelined_state_pointers -}; - static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); @@ -217,11 +183,10 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) brw_upload_constant_buffer_state(brw); } - const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS | BRW_NEW_BATCH, .cache = (CACHE_NEW_VS_UNIT | CACHE_NEW_GS_UNIT | CACHE_NEW_GS_PROG | @@ -230,74 +195,85 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, - .update = upload_psp_urb_cbs + .prepare = prepare_psp_urb_cbs, + .emit = upload_psp_urb_cbs, }; +static void prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + if (region != NULL) + brw_add_validated_bo(brw, region->buffer); +} - -/*********************************************************************** - * Depthbuffer - currently constant, but rotation would change that. - */ - -static void upload_depthbuffer(struct brw_context *brw) +static void emit_depthbuffer(struct brw_context *brw) { - /* 0x79050003 Depth Buffer */ struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - struct brw_depthbuffer bd; - memset(&bd, 0, sizeof(bd)); - - bd.header.bits.opcode = CMD_DEPTH_BUFFER; - bd.header.bits.length = sizeof(bd)/4-2; - bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1; - - switch (region->cpp) { - case 2: - bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM; - break; - case 4: - if (intel->depth_buffer_is_float) - bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT; - else - bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; - break; - default: - assert(0); - return; + unsigned int len = BRW_IS_G4X(brw) ? 6 : 5; + + if (region == NULL) { + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } else { + unsigned int format; + + switch (region->cpp) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (intel->depth_buffer_is_float) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(((region->pitch * region->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + ((region->tiling != I915_TILING_NONE) << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((region->pitch - 1) << 6) | + ((region->height - 1) << 19)); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); } - - bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */ - - /* The depthbuffer can only use YMAJOR tiling... This is a bit of - * a shame as it clashes with the 2d blitter which only supports - * XMAJOR tiling... - */ - bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR; - bd.dword1.bits.tiled_surface = intel->depth_region->tiled; - bd.dword1.bits.surface_type = BRW_SURFACE_2D; - - /* BRW_NEW_LOCK */ - bd.dword2_base_addr = bmBufferOffset(intel, region->buffer); - - bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - bd.dword3.bits.lod = 0; - bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */ - bd.dword3.bits.height = region->height - 1; - - bd.dword4.bits.min_array_element = 0; - bd.dword4.bits.depth = 0; - - BRW_CACHED_BATCH_STRUCT(brw, &bd); } const struct brw_tracked_state brw_depthbuffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 + .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, + .cache = 0, }, - .update = upload_depthbuffer + .prepare = prepare_depthbuffer, + .emit = emit_depthbuffer, }; @@ -327,7 +303,7 @@ const struct brw_tracked_state brw_polygon_stipple = { .brw = 0, .cache = 0 }, - .update = upload_polygon_stipple + .emit = upload_polygon_stipple }; @@ -350,13 +326,42 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) BRW_CACHED_BATCH_STRUCT(brw, &bpso); } +#define _NEW_WINDOW_POS 0x40000000 + const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { .mesa = _NEW_WINDOW_POS, .brw = 0, .cache = 0 }, - .update = upload_polygon_stipple_offset + .emit = upload_polygon_stipple_offset +}; + +/********************************************************************** + * AA Line parameters + */ +static void upload_aa_line_parameters(struct brw_context *brw) +{ + struct brw_aa_line_parameters balp; + + if (!BRW_IS_G4X(brw)) + return; + + /* use legacy aa line coverage computation */ + memset(&balp, 0, sizeof(balp)); + balp.header.opcode = CMD_AA_LINE_PARAMETERS; + balp.header.length = sizeof(balp) / 4 - 2; + + BRW_CACHED_BATCH_STRUCT(brw, &balp); +} + +const struct brw_tracked_state brw_aa_line_parameters = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_aa_line_parameters }; /*********************************************************************** @@ -391,41 +396,7 @@ const struct brw_tracked_state brw_line_stipple = { .brw = 0, .cache = 0 }, - .update = upload_line_stipple -}; - - - -/*********************************************************************** - * Misc constant state packets - */ - -static void upload_pipe_control(struct brw_context *brw) -{ - struct brw_pipe_control pc; - - return; - - memset(&pc, 0, sizeof(pc)); - - pc.header.opcode = CMD_PIPE_CONTROL; - pc.header.length = sizeof(pc)/4 - 2; - pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; - - pc.header.instruction_state_cache_flush_enable = 1; - - pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; - - BRW_BATCH_STRUCT(brw, &pc); -} - -const struct brw_tracked_state brw_pipe_control = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .update = upload_pipe_control + .emit = upload_line_stipple }; @@ -441,7 +412,7 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT; + ps.header.opcode = CMD_PIPELINE_SELECT(brw); ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } @@ -477,7 +448,7 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - vfs.opcode = CMD_VF_STATISTICS; + vfs.opcode = CMD_VF_STATISTICS(brw); if (INTEL_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; @@ -491,43 +462,39 @@ const struct brw_tracked_state brw_invarient_state = { .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_invarient_state + .emit = upload_invarient_state }; - -/* State pool addresses: +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. */ static void upload_state_base_address( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; - struct brw_state_base_address sba; - - memset(&sba, 0, sizeof(sba)); - - sba.header.opcode = CMD_STATE_BASE_ADDRESS; - sba.header.length = 0x4; - - /* BRW_NEW_LOCK */ - sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5; - sba.bits0.modify_enable = 1; - /* BRW_NEW_LOCK */ - sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5; - sba.bits1.modify_enable = 1; - - sba.bits2.modify_enable = 1; - sba.bits3.modify_enable = 1; - sba.bits4.modify_enable = 1; - - BRW_CACHED_BATCH_STRUCT(brw, &sba); + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); } - const struct brw_tracked_state brw_state_base_address = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK, - .cache = 0 + .brw = BRW_NEW_CONTEXT, + .cache = 0, }, - .update = upload_state_base_address + .emit = upload_state_base_address }; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 752fe49bcbf..c38610b24ed 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -29,15 +29,15 @@ * Keith Whitwell <[email protected]> */ +#include "main/imports.h" +#include "main/enums.h" #include "shader/prog_parameter.h" -#include "brw_context.h" -#include "brw_aub.h" -#include "brw_util.h" -#include "program.h" -#include "imports.h" -#include "enums.h" +#include "shader/program.h" +#include "shader/programopt.h" #include "tnl/tnl.h" +#include "brw_context.h" +#include "brw_util.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -125,6 +125,9 @@ static void brwProgramStringNotify( GLcontext *ctx, struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; if (p == vp) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + if (p->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &p->program); + } p->id = brw->program_id++; p->param_state = p->program.Base.Parameters->StateFlags; diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c new file mode 100644 index 00000000000..cb9169e2eef --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -0,0 +1,259 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** @file support for ARB_query_object + * + * ARB_query_object is implemented by using the PIPE_CONTROL command to stall + * execution on the completion of previous depth tests, and write the + * current PS_DEPTH_COUNT to a buffer object. + * + * We use before and after counts when drawing during a query so that + * we don't pick up other clients' query data in ours. To reduce overhead, + * a single BO is used to record the query data for all active queries at + * once. This also gives us a simple bound on how much batchbuffer space is + * required for handling queries, so that we can be sure that we won't + * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. + */ +#include "main/simple_list.h" +#include "main/imports.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +/** Waits on the query object's BO and totals the results for this query */ +static void +brw_queryobj_get_results(struct brw_query_object *query) +{ + int i; + uint64_t *results; + + if (query->bo == NULL) + return; + + /* Map and count the pixels from the current query BO */ + dri_bo_map(query->bo, GL_FALSE); + results = query->bo->virtual; + for (i = query->first_index; i <= query->last_index; i++) { + query->Base.Result += results[i * 2 + 1] - results[i * 2]; + } + dri_bo_unmap(query->bo); + + dri_bo_unreference(query->bo); + query->bo = NULL; +} + +static struct gl_query_object * +brw_new_query_object(GLcontext *ctx, GLuint id) +{ + struct brw_query_object *query; + + query = _mesa_calloc(sizeof(struct brw_query_object)); + + query->Base.Id = id; + query->Base.Result = 0; + query->Base.Active = GL_FALSE; + query->Base.Ready = GL_TRUE; + + return &query->Base; +} + +static void +brw_delete_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + dri_bo_unreference(query->bo); + _mesa_free(query); +} + +static void +brw_begin_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = intel_context(ctx); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Reset our driver's tracking of query state. */ + dri_bo_unreference(query->bo); + query->bo = NULL; + query->first_index = -1; + query->last_index = -1; + + insert_at_head(&brw->query.active_head, query); + intel->stats_wm++; +} + +/** + * Begin the ARB_occlusion_query query on a query object. + */ +static void +brw_end_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = intel_context(ctx); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Flush the batchbuffer in case it has writes to our query BO. + * Have later queries write to a new query BO so that further rendering + * doesn't delay the collection of our results. + */ + if (query->bo) { + brw_emit_query_end(brw); + intel_batchbuffer_flush(intel->batch); + + dri_bo_unreference(brw->query.bo); + brw->query.bo = NULL; + } + + remove_from_list(query); + + intel->stats_wm--; +} + +static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + brw_queryobj_get_results(query); + query->Base.Ready = GL_TRUE; +} + +static void brw_check_query(GLcontext *ctx, struct gl_query_object *q) +{ + /* XXX: Need to expose dri_bo_is_idle from bufmgr. */ +#if 0 + struct brw_query_object *query = (struct brw_query_object *)q; + + if (dri_bo_is_idle(query->bo)) { + brw_queryobj_get_results(query); + query->Base.Ready = GL_TRUE; + } +#else + brw_wait_query(ctx, q); +#endif +} + +/** Called to set up the query BO and account for its aperture space */ +void +brw_prepare_query_begin(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + /* Skip if we're not doing any queries. */ + if (is_empty_list(&brw->query.active_head)) + return; + + /* Get a new query BO if we're going to need it. */ + if (brw->query.bo == NULL || + brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { + dri_bo_unreference(brw->query.bo); + brw->query.bo = NULL; + + brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1); + brw->query.index = 0; + } + + brw_add_validated_bo(brw, brw->query.bo); +} + +/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ +void +brw_emit_query_begin(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_query_object *query; + + /* Skip if we're not doing any queries, or we've emitted the start. */ + if (brw->query.active || is_empty_list(&brw->query.active_head)) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + /* This object could be mapped cacheable, but we don't have an exposed + * mechanism to support that. Since it's going uncached, tell GEM that + * we're writing to it. The usual clflush should be all that's required + * to pick up the results. + */ + OUT_RELOC(brw->query.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + foreach(query, &brw->query.active_head) { + if (query->bo != brw->query.bo) { + if (query->bo != NULL) + brw_queryobj_get_results(query); + dri_bo_reference(brw->query.bo); + query->bo = brw->query.bo; + query->first_index = brw->query.index; + } + query->last_index = brw->query.index; + } + brw->query.active = GL_TRUE; +} + +/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ +void +brw_emit_query_end(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + if (!brw->query.active) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + OUT_RELOC(brw->query.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2 + 1) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + brw->query.active = GL_FALSE; + brw->query.index++; +} + +void brw_init_queryobj_functions(struct dd_function_table *functions) +{ + functions->NewQueryObject = brw_new_query_object; + functions->DeleteQuery = brw_delete_query; + functions->BeginQuery = brw_begin_query; + functions->EndQuery = brw_end_query; + functions->CheckQuery = brw_check_query; + functions->WaitQuery = brw_wait_query; +} diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index d5175399d6c..9dce6cd8e6c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -30,9 +30,9 @@ */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "intel_batchbuffer.h" @@ -43,8 +43,6 @@ #include "brw_sf.h" #include "brw_state.h" -#define DO_SETUP_BITS ((1<<FRAG_ATTRIB_MAX)-1) - static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { @@ -57,7 +55,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Begin the compilation: */ - brw_init_compile(&c.func); + brw_init_compile(brw, &c.func); c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); @@ -74,6 +72,11 @@ static void compile_sf_prog( struct brw_context *brw, if (c.key.attrs & (1<<i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; + if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { + c.point_attrs[i].CoordReplace = + brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0]; + } else + c.point_attrs[i].CoordReplace = GL_FALSE; idx++; } @@ -82,15 +85,18 @@ static void compile_sf_prog( struct brw_context *brw, switch (key->primitive) { case SF_TRIANGLES: c.nr_verts = 3; - brw_emit_tri_setup( &c ); + brw_emit_tri_setup( &c, GL_TRUE ); break; case SF_LINES: c.nr_verts = 2; - brw_emit_line_setup( &c ); + brw_emit_line_setup( &c, GL_TRUE ); break; case SF_POINTS: c.nr_verts = 1; - brw_emit_point_setup( &c ); + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); break; case SF_UNFILLED_TRIS: c.nr_verts = 3; @@ -108,29 +114,18 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ - brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->sf.prog_data ); + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_SF_PROG], - key, sizeof(*key), - &brw->sf.prog_data, - &brw->sf.prog_gs_offset); -} - - /* Calculate interpolants for triangle and line rasterization. */ -static void upload_sf_prog( struct brw_context *brw ) +static void upload_sf_prog(struct brw_context *brw) { struct brw_sf_prog_key key; @@ -162,7 +157,8 @@ static void upload_sf_prog( struct brw_context *brw ) break; } - + key.do_point_sprite = brw->attribs.Point->PointSprite; + key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; /* _NEW_LIGHT */ key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); @@ -171,18 +167,22 @@ static void upload_sf_prog( struct brw_context *brw ) if (key.do_twoside_color) key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); - - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data); + if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); } const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON), + .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_sf_prog + .prepare = upload_sf_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index fb72b84ba8a..1c0fb70fe06 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -34,9 +34,9 @@ #define BRW_SF_H +#include "shader/program.h" #include "brw_context.h" #include "brw_eu.h" -#include "program.h" #define SF_POINTS 0 @@ -45,14 +45,19 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { + GLuint attrs:32; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; - GLuint attrs:16; GLuint frontface_ccw:1; - GLuint pad:11; + GLuint do_point_sprite:1; + GLuint pad:10; + GLenum SpriteOrigin; }; +struct brw_sf_point_tex { + GLboolean CoordReplace; +}; struct brw_sf_compile { struct brw_compile func; @@ -94,12 +99,14 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; -void brw_emit_tri_setup( struct brw_sf_compile *c ); -void brw_emit_line_setup( struct brw_sf_compile *c ); -void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_anyprim_setup( struct brw_sf_compile *c ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index cbaf018c44a..ffdb0ae6df8 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -30,9 +30,9 @@ */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "intel_batchbuffer.h" @@ -59,6 +59,35 @@ static GLboolean have_attr(struct brw_sf_compile *c, return (c->key.attrs & (1<<attr)) ? 1 : 0; } +/** + * Sets VERT_RESULT_FOGC.Y for gl_FrontFacing + * + * This is currently executed if the fragment program uses VERT_RESULT_FOGC + * at all, but this could be eliminated with a scan of the FP contents. + */ +static void +do_front_facing( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + int i; + + if (!have_attr(c, VERT_RESULT_FOGC)) + return; + + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), + c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L, + c->det, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + for (i = 0; i < 3; i++) { + struct brw_reg fogc = get_vert_attr(c, c->vert[i],FRAG_ATTRIB_FOGC); + brw_MOV(p, get_element(fogc, 1), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, get_element(fogc, 1), brw_imm_f(1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_pop_insn_state(p); +} /*********************************************************************** @@ -343,15 +372,19 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, -void brw_emit_tri_setup( struct brw_sf_compile *c ) +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) { struct brw_compile *p = &c->func; GLuint i; c->nr_verts = 3; - alloc_regs(c); + + if (allocate) + alloc_regs(c); + invert_det(c); copy_z_inv_w(c); + do_front_facing(c); if (c->key.do_twoside_color) do_twoside_color(c); @@ -428,14 +461,17 @@ void brw_emit_tri_setup( struct brw_sf_compile *c ) -void brw_emit_line_setup( struct brw_sf_compile *c ) +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) { struct brw_compile *p = &c->func; GLuint i; c->nr_verts = 2; - alloc_regs(c); + + if (allocate) + alloc_regs(c); + invert_det(c); copy_z_inv_w(c); @@ -497,17 +533,103 @@ void brw_emit_line_setup( struct brw_sf_compile *c ) } } +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + if (!tex->CoordReplace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + } + + if (tex->CoordReplace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->dx0, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + } + + { + brw_set_predicate_control_flag_value(p, pc); + if (tex->CoordReplace) { + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } else { + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} /* Points setup - several simplifications as all attributes are * constant across the face of the point (point sprites excluded!) */ -void brw_emit_point_setup( struct brw_sf_compile *c ) +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate) { struct brw_compile *p = &c->func; GLuint i; c->nr_verts = 1; - alloc_regs(c); + + if (allocate) + alloc_regs(c); + copy_z_inv_w(c); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ @@ -561,10 +683,14 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); struct brw_reg primmask; struct brw_instruction *jmp; struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + GLuint saveflag; + c->nr_verts = 3; alloc_regs(c); primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); @@ -582,8 +708,15 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { - brw_emit_tri_setup( c ); - /* note - thread killed in subroutine */ + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_tri_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine, so must + * restore the flag which is changed when building + * the subroutine. fix #13240 + */ } brw_land_fwd_jump(p, jmp); @@ -596,12 +729,28 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_LINESTRIP_CONT_BF))); jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { - brw_emit_line_setup( c ); + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_line_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); - brw_emit_point_setup( c ); + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_point_sprite_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + } + brw_land_fwd_jump(p, jmp); + + brw_emit_point_setup( c, GL_FALSE ); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 9a6e5f5f192..4f925d18105 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -34,70 +34,70 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "macros.h" +#include "main/macros.h" +#include "intel_fbo.h" static void upload_sf_vp(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; + struct intel_renderbuffer *irb = + intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + GLfloat y_scale, y_bias; memset(&sfv, 0, sizeof(sfv)); - - if (brw->intel.driDrawable) - { - /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ - - if (!brw->metaops.active) { - const GLfloat *v = brw->intel.ctx.Viewport._WindowMap.m; - - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = - v[MAT_SY]; - sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = - v[MAT_TY] + brw->intel.driDrawable->h; - sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale; - } - else { - sfv.viewport.m00 = 1; - sfv.viewport.m11 = - 1; - sfv.viewport.m22 = 1; - sfv.viewport.m30 = 0; - sfv.viewport.m31 = brw->intel.driDrawable->h; - sfv.viewport.m32 = 0; + + if (ctx->DrawBuffer->Name) { + /* User-created FBO */ + if (irb && !irb->RenderToTexture) { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } else { + y_scale = 1.0; + y_bias = 0; } + } else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; } - /* XXX: what state for this? */ - if (brw->intel.driDrawable) - { - intelScreenPrivate *screen = brw->intel.intelScreen; - /* _NEW_SCISSOR */ - GLint x = brw->attribs.Scissor->X; - GLint y = brw->attribs.Scissor->Y; - GLuint w = brw->attribs.Scissor->Width; - GLuint h = brw->attribs.Scissor->Height; - - GLint x1 = x; - GLint y1 = brw->intel.driDrawable->h - (y + h); - GLint x2 = x + w - 1; - GLint y2 = y1 + h - 1; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 < 0) x2 = 0; - if (y2 < 0) y2 = 0; - - if (x2 >= screen->width) x2 = screen->width-1; - if (y2 >= screen->height) y2 = screen->height-1; - if (x1 >= screen->width) x1 = screen->width-1; - if (y1 >= screen->height) y1 = screen->height-1; - - sfv.scissor.xmin = x1; - sfv.scissor.xmax = x2; - sfv.scissor.ymin = y1; - sfv.scissor.ymax = y2; + /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ + + if (!brw->metaops.active) { + const GLfloat *v = ctx->Viewport._WindowMap.m; + + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + } else { + sfv.viewport.m00 = 1; + sfv.viewport.m11 = - 1; + sfv.viewport.m22 = 1; + sfv.viewport.m30 = 0; + sfv.viewport.m31 = ctx->DrawBuffer->Height; + sfv.viewport.m32 = 0; } - brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); + /* _NEW_SCISSOR */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + + dri_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); } const struct brw_tracked_state brw_sf_vp = { @@ -107,87 +107,131 @@ const struct brw_tracked_state brw_sf_vp = { .brw = BRW_NEW_METAOPS, .cache = 0 }, - .update = upload_sf_vp + .prepare = upload_sf_vp }; +struct brw_sf_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int nr_urb_entries, urb_size, sfsize; -static void upload_sf_unit( struct brw_context *brw ) + GLenum front_face, cull_face; + GLboolean scissor, line_smooth, point_sprite, point_attenuated; + float line_width; + float point_size; +}; + +static void +sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_SF_PROG */ + key->total_grf = brw->sf.prog_data->total_grf; + key->urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_sf_entries; + key->urb_size = brw->urb.vsize; + key->sfsize = brw->urb.sfsize; + + key->scissor = brw->attribs.Scissor->Enabled; + key->front_face = brw->attribs.Polygon->FrontFace; + + if (brw->attribs.Polygon->CullFlag) + key->cull_face = brw->attribs.Polygon->CullFaceMode; + else + key->cull_face = GL_NONE; + + key->line_width = brw->attribs.Line->Width; + key->line_smooth = brw->attribs.Line->SmoothFlag; + + key->point_sprite = brw->attribs.Point->PointSprite; + key->point_size = brw->attribs.Point->Size; + key->point_attenuated = brw->attribs.Point->_Attenuated; +} + +static dri_bo * +sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, + dri_bo **reloc_bufs) { struct brw_sf_unit_state sf; + dri_bo *bo; + memset(&sf, 0, sizeof(sf)); - /* CACHE_NEW_SF_PROG */ - sf.thread0.grf_reg_count = ((brw->sf.prog_data->total_grf-1) & ~15) / 16; - sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; - sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; sf.thread3.urb_entry_read_offset = 1; + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; - /* BRW_NEW_URB_FENCE */ - sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; - sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; - sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + sf.thread4.nr_urb_entries = key->nr_urb_entries; + sf.thread4.urb_entry_allocation_size = key->sfsize - 1; + sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - sf.thread4.max_threads = 0; + sf.thread4.max_threads = 0; if (INTEL_DEBUG & DEBUG_STATS) - sf.thread4.stats_enable = 1; + sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; - + sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ + sf.sf5.viewport_transform = 1; - + /* _NEW_SCISSOR */ - if (brw->attribs.Scissor->Enabled) - sf.sf6.scissor = 1; + if (key->scissor) + sf.sf6.scissor = 1; /* _NEW_POLYGON */ - if (brw->attribs.Polygon->FrontFace == GL_CCW) + if (key->front_face == GL_CCW) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - if (brw->attribs.Polygon->CullFlag) { - switch (brw->attribs.Polygon->CullFaceMode) { - case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; - break; - case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BOTH; - break; - default: - assert(0); - break; - } - } - else + switch (key->cull_face) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + case GL_NONE: sf.sf6.cull_mode = BRW_CULLMODE_NONE; - + break; + default: + assert(0); + break; + } /* _NEW_LINE */ /* XXX use ctx->Const.Min/MaxLineWidth here */ - sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1); + sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); sf.sf6.line_endcap_aa_region_width = 1; - if (brw->attribs.Line->SmoothFlag) + if (key->line_smooth) sf.sf6.aa_enable = 1; - else if (sf.sf6.line_width <= 0x2) - sf.sf6.line_width = 0; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; /* _NEW_POINT */ - sf.sf6.point_rast_rule = 1; /* opengl conventions */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */ /* XXX clamp max depends on AA vs. non-AA */ - sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 3.0) * (1<<3); - sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated; - + + sf.sf7.sprite_point = key->point_sprite; + sf.sf7.point_size = CLAMP(nearbyint(key->point_size), 1, 255) * (1<<3); + sf.sf7.use_point_size_state = !key->point_attenuated; + sf.sf7.aa_line_distance_mode = 0; + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ sf.sf7.trifan_pv = 2; @@ -200,9 +244,48 @@ static void upload_sf_unit( struct brw_context *brw ) sf.sf6.dest_org_vbias = 0x8; sf.sf6.dest_org_hbias = 0x8; - brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); + bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc_bufs, 2, + &sf, sizeof(sf), + NULL, NULL); + + /* Emit SF program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + + return bo; } +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_key key; + dri_bo *reloc_bufs[2]; + + sf_unit_populate_key(brw, &key); + + reloc_bufs[0] = brw->sf.prog_bo; + reloc_bufs[1] = brw->sf.vp_bo; + + dri_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT, + &key, sizeof(key), + reloc_bufs, 2, + NULL); + if (brw->sf.state_bo == NULL) { + brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); + } +} const struct brw_tracked_state brw_sf_unit = { .dirty = { @@ -215,7 +298,5 @@ const struct brw_tracked_state brw_sf_unit = { .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, - .update = upload_sf_unit + .prepare = upload_sf_unit, }; - - diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b4cbdd7a380..bb22c03eeb6 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -35,6 +35,16 @@ #include "brw_context.h" +static inline void +brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) +{ + assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); + + if (bo != NULL) { + dri_bo_reference(bo); + brw->state.validated_bos[brw->state.validated_bo_count++] = bo; + } +}; const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; @@ -48,8 +58,8 @@ const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; const struct brw_tracked_state brw_gs_prog; const struct brw_tracked_state brw_gs_unit; -const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_aa_line_parameters; const struct brw_tracked_state brw_pipelined_state_pointers; const struct brw_tracked_state brw_binding_table_pointers; const struct brw_tracked_state brw_depthbuffer; @@ -74,73 +84,72 @@ const struct brw_tracked_state brw_wm_unit; const struct brw_tracked_state brw_psp_urb_cbs; const struct brw_tracked_state brw_active_vertprog; -const struct brw_tracked_state brw_tnl_vertprog; const struct brw_tracked_state brw_pipe_control; const struct brw_tracked_state brw_clear_surface_cache; const struct brw_tracked_state brw_clear_batch_cache; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_indices; +const struct brw_tracked_state brw_vertices; + +/*********************************************************************** + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + /*********************************************************************** * brw_state_cache.c */ -GLuint brw_cache_data(struct brw_cache *cache, - const void *data ); - -GLuint brw_cache_data_sz(struct brw_cache *cache, - const void *data, - GLuint data_sz); - -GLuint brw_upload_cache( struct brw_cache *cache, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); - -GLboolean brw_search_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - void *aux_return, - GLuint *offset_return); - -void brw_init_caches( struct brw_context *brw ); -void brw_destroy_caches( struct brw_context *brw ); +dri_bo *brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return ); + +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + void *aux_return); +void brw_state_cache_check_size( struct brw_context *brw ); + +void brw_init_cache( struct brw_context *brw ); +void brw_destroy_cache( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), 0) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); - void brw_destroy_batch_cache( struct brw_context *brw ); - - -/*********************************************************************** - * brw_state_pool.c - */ -void brw_init_pools( struct brw_context *brw ); -void brw_destroy_pools( struct brw_context *brw ); - -GLboolean brw_pool_alloc( struct brw_mem_pool *pool, - GLuint size, - GLuint alignment, - GLuint *offset_return); - -void brw_pool_fence( struct brw_context *brw, - struct brw_mem_pool *pool, - GLuint fence ); - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ); - -void brw_clear_all_caches( struct brw_context *brw ); -void brw_invalidate_pools( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 909b0acd121..dc87859f3f5 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -32,9 +32,8 @@ #include "brw_state.h" -#include "brw_aub.h" #include "intel_batchbuffer.h" -#include "imports.h" +#include "main/imports.h" @@ -49,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz, 0); + intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -76,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz, 0); + intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -92,21 +91,12 @@ static void clear_batch_cache( struct brw_context *brw ) } brw->cached_batch_items = NULL; - - - brw_clear_all_caches(brw); - - bmReleaseBuffers(&brw->intel); - - brw_invalidate_pools(brw); } void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); - brw->wrap = 0; - /* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ brw->state.dirty.mesa |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 71c6938f9a3..d5b51664066 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -28,12 +28,37 @@ * Authors: * Keith Whitwell <[email protected]> */ - + +/** @file brw_state_cache.c + * + * This file implements a simple static state cache for 965. The consumers + * can query the hash table of state using a cache_id, opaque key data, + * and list of buffers that will be used in relocations, and receive the + * corresponding state buffer object of state (plus associated auxiliary + * data) in return. + * + * The inner workings are a simple hash table based on a CRC of the key data. + * The cache_id and relocation target buffers associated with the state + * buffer are included as auxiliary key data, but are not part of the hash + * value (this should be fixed, but will likely be fixed instead by making + * consumers use structured keys). + * + * Replacement is not implemented. Instead, when the cache gets too big, at + * a safe point (unlock) we throw out all of the cache data and let it + * regenerate for the next rendering operation. + * + * The reloc_buf pointers need to be included as key data, otherwise the + * non-unique values stuffed in the offset in key data through + * brw_cache_data() may result in successful probe for state buffers + * even when the buffer being referenced doesn't match. The result would be + * that the same state cache entry is used twice for different buffers, + * only one of the two buffers referenced gets put into the offset, and the + * incorrect program is run for the other instance. + */ #include "brw_state.h" -#include "brw_aub.h" #include "intel_batchbuffer.h" -#include "imports.h" +#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -44,16 +69,8 @@ #include "brw_sf.h" #include "brw_gs.h" - -/*********************************************************************** - * Check cache for uploaded version of struct, else upload new one. - * Fail when memory is exhausted. - * - * XXX: FIXME: Currently search is so slow it would be quicker to - * regenerate the data every time... - */ - -static GLuint hash_key( const void *key, GLuint key_size ) +static GLuint hash_key( const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -62,23 +79,63 @@ static GLuint hash_key( const void *key, GLuint key_size ) /* I'm sure this can be improved on: */ - for (i = 0; i < key_size/4; i++) + for (i = 0; i < key_size/4; i++) { hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + /* Include the BO pointers as key data as well */ + ikey = (GLuint *)reloc_bufs; + key_size = nr_reloc_bufs * sizeof(dri_bo *); + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } return hash; } -static struct brw_cache_item *search_cache( struct brw_cache *cache, - GLuint hash, - const void *key, - GLuint key_size) +/** + * Marks a new buffer as being chosen for the given cache id. + */ +static void +update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, + dri_bo *bo) +{ + if (bo == cache->last_bo[cache_id]) + return; /* no change */ + + dri_bo_unreference(cache->last_bo[cache_id]); + cache->last_bo[cache_id] = bo; + dri_bo_reference(cache->last_bo[cache_id]); + cache->brw->state.dirty.cache |= 1 << cache_id; +} + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, + GLuint hash, const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { struct brw_cache_item *c; +#if 0 + int bucketcount = 0; + + for (c = cache->items[hash % cache->size]; c; c = c->next) + bucketcount++; + + fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, + cache->size, bucketcount, cache->n_items); +#endif + for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && + if (c->cache_id == cache_id && + c->hash == hash && c->key_size == key_size && - memcmp(c->key, key, key_size) == 0) + memcmp(c->key, key, key_size) == 0 && + c->nr_reloc_bufs == nr_reloc_bufs && + memcmp(c->reloc_bufs, reloc_bufs, + nr_reloc_bufs * sizeof(dri_bo *)) == 0) return c; } @@ -93,8 +150,7 @@ static void rehash( struct brw_cache *cache ) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items)); - _mesa_memset(items, 0, size * sizeof(*items)); + items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -108,142 +164,180 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } - -GLboolean brw_search_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - void *aux_return, - GLuint *offset_return) +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return ) { struct brw_cache_item *item; - GLuint addr = 0; - GLuint hash = hash_key(key, key_size); + GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); - item = search_cache(cache, hash, key, key_size); + item = search_cache(cache, cache_id, hash, key, key_size, + reloc_bufs, nr_reloc_bufs); - if (item) { - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - - *offset_return = addr = item->offset; - } - - if (item == NULL || addr != cache->last_addr) { - cache->brw->state.dirty.cache |= 1<<cache->id; - cache->last_addr = addr; - } - - return item != NULL; + if (item == NULL) + return NULL; + + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + + dri_bo_reference(item->bo); + return item->bo; } -GLuint brw_upload_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) -{ - GLuint offset; +dri_bo * +brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return ) +{ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size); - void *tmp = _mesa_malloc(key_size + cache->aux_size); - - if (!brw_pool_alloc(cache->pool, data_size, 6, &offset)) { - /* Should not be possible: - */ - _mesa_printf("brw_pool_alloc failed\n"); - exit(1); - } + GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); + GLuint aux_size = cache->aux_size[cache_id]; + void *tmp; + dri_bo *bo; + int i; + + /* Create the buffer object to contain the data */ + bo = dri_bo_alloc(cache->brw->intel.bufmgr, + cache->name[cache_id], data_size, 1 << 6); + + + /* Set up the memory containing the key, aux_data, and reloc_bufs */ + tmp = _mesa_malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); + memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); + for (i = 0; i < nr_reloc_bufs; i++) { + if (reloc_bufs[i] != NULL) + dri_bo_reference(reloc_bufs[i]); + } - if (cache->aux_size) - memcpy(tmp+key_size, aux, cache->aux_size); - + item->cache_id = cache_id; item->key = tmp; item->hash = hash; item->key_size = key_size; - item->offset = offset; + item->reloc_bufs = tmp + key_size + aux_size; + item->nr_reloc_bufs = nr_reloc_bufs; + + item->bo = bo; + dri_bo_reference(bo); item->data_size = data_size; - if (++cache->n_items > cache->size * 1.5) + if (cache->n_items > cache->size * 1.5) rehash(cache); - + hash %= cache->size; item->next = cache->items[hash]; cache->items[hash] = item; - + cache->n_items++; + if (aux_return) { - assert(cache->aux_size); + assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n", - cache->name, - data_size, - cache->pool->buffer, - offset); + _mesa_printf("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size, cache_id); - /* Copy data to the buffer: - */ - bmBufferSubDataAUB(&cache->brw->intel, - cache->pool->buffer, - offset, - data_size, - data, - cache->aub_type, - cache->aub_sub_type); - - - cache->brw->state.dirty.cache |= 1<<cache->id; - cache->last_addr = offset; - - return offset; + /* Copy data to the buffer */ + dri_bo_subdata(bo, 0, data_size, data); + + update_cache_last(cache, cache_id, bo); + + return bo; } /* This doesn't really work with aux data. Use search/upload instead */ -GLuint brw_cache_data_sz(struct brw_cache *cache, - const void *data, - GLuint data_size) +dri_bo * +brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { - GLuint addr; + dri_bo *bo; + struct brw_cache_item *item; + GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); - if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { - addr = brw_upload_cache(cache, - data, data_size, - data, data_size, - NULL, NULL); + item = search_cache(cache, cache_id, hash, data, data_size, + reloc_bufs, nr_reloc_bufs); + if (item) { + update_cache_last(cache, cache_id, item->bo); + dri_bo_reference(item->bo); + return item->bo; } - return addr; + bo = brw_upload_cache(cache, cache_id, + data, data_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, NULL); + + return bo; } -GLuint brw_cache_data(struct brw_cache *cache, - const void *data) +/** + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + */ +dri_bo * +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { - return brw_cache_data_sz(cache, data, cache->key_size); + return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], + reloc_bufs, nr_reloc_bufs); } +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + +static void +brw_init_cache_id( struct brw_context *brw, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) +{ + struct brw_cache *cache = &brw->cache; + cache->name[id] = strdup(name); + cache->key_size[id] = key_size; + cache->aux_size[id] = aux_size; +} - - -static void brw_init_cache( struct brw_context *brw, - const char *name, - GLuint id, - GLuint key_size, - GLuint aux_size, - GLuint aub_type, - GLuint aub_sub_type ) +void brw_init_cache( struct brw_context *brw ) { - struct brw_cache *cache = &brw->cache[id]; + struct brw_cache *cache = &brw->cache; + cache->brw = brw; - cache->id = id; - cache->name = name; - cache->items = NULL; cache->size = 7; cache->n_items = 0; @@ -251,200 +345,133 @@ static void brw_init_cache( struct brw_context *brw, _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - cache->key_size = key_size; - cache->aux_size = aux_size; - cache->aub_type = aub_type; - cache->aub_sub_type = aub_sub_type; - switch (aub_type) { - case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; - case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; - default: assert(0); break; - } -} - -void brw_init_caches( struct brw_context *brw ) -{ - - brw_init_cache(brw, - "CC_VP", - BRW_CC_VP, - sizeof(struct brw_cc_viewport), - 0, - DW_GENERAL_STATE, - DWGS_COLOR_CALC_VIEWPORT_STATE); - - brw_init_cache(brw, - "CC_UNIT", - BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_COLOR_CALC_STATE); - - brw_init_cache(brw, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), - sizeof(struct brw_wm_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), - 0, - DW_GENERAL_STATE, - DWGS_SAMPLER_DEFAULT_COLOR); - - brw_init_cache(brw, - "SAMPLER", - BRW_SAMPLER, - 0, /* variable key/data size */ - 0, - DW_GENERAL_STATE, - DWGS_SAMPLER_STATE); - - brw_init_cache(brw, - "WM_UNIT", - BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_WINDOWER_IZ_STATE); - - brw_init_cache(brw, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), - sizeof(struct brw_sf_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "SF_VP", - BRW_SF_VP, - sizeof(struct brw_sf_viewport), - 0, - DW_GENERAL_STATE, - DWGS_STRIPS_FANS_VIEWPORT_STATE); - - brw_init_cache(brw, - "SF_UNIT", - BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_STRIPS_FANS_STATE); - - brw_init_cache(brw, - "VS_UNIT", - BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_VERTEX_SHADER_STATE); - - brw_init_cache(brw, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), - sizeof(struct brw_vs_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "CLIP_UNIT", - BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_CLIPPER_STATE); - - brw_init_cache(brw, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), - sizeof(struct brw_clip_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "GS_UNIT", - BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), - 0, - DW_GENERAL_STATE, - DWGS_GEOMETRY_SHADER_STATE); - - brw_init_cache(brw, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), - sizeof(struct brw_gs_prog_data), - DW_GENERAL_STATE, - DWGS_KERNEL_INSTRUCTIONS); - - brw_init_cache(brw, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0, - DW_SURFACE_STATE, - DWSS_SURFACE_STATE); - - brw_init_cache(brw, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - sizeof(struct brw_surface_binding_table), - 0, - DW_SURFACE_STATE, - DWSS_BINDING_TABLE_STATE); + brw_init_cache_id(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0); + + brw_init_cache_id(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0); + + brw_init_cache_id(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data)); + + brw_init_cache_id(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0); + + brw_init_cache_id(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0); + + brw_init_cache_id(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0); + + brw_init_cache_id(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data)); + + brw_init_cache_id(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0); + + brw_init_cache_id(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0); + + brw_init_cache_id(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0); + + brw_init_cache_id(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data)); + + brw_init_cache_id(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0); + + brw_init_cache_id(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data)); + + brw_init_cache_id(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0); + + brw_init_cache_id(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data)); + + brw_init_cache_id(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); } - -/* When we lose hardware context, need to invalidate the surface cache - * as these structs must be explicitly re-uploaded. They are subject - * to fixup by the memory manager as they contain absolute agp - * offsets, so we need to ensure there is a fresh version of the - * struct available to receive the fixup. - * - * XXX: Need to ensure that there aren't two versions of a surface or - * bufferobj with different backing data active in the same buffer at - * once? Otherwise the cache could confuse them. Maybe better not to - * cache at all? - * - * --> Isn't this the same as saying need to ensure batch is flushed - * before new data is uploaded to an existing buffer? We - * already try to make sure of that. - */ -static void clear_cache( struct brw_cache *cache ) +static void +brw_clear_cache( struct brw_context *brw ) { struct brw_cache_item *c, *next; GLuint i; - for (i = 0; i < cache->size; i++) { - for (c = cache->items[i]; c; c = next) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < brw->cache.size; i++) { + for (c = brw->cache.items[i]; c; c = next) { + int j; + next = c->next; + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); free((void *)c->key); free(c); } - cache->items[i] = NULL; + brw->cache.items[i] = NULL; } - cache->n_items = 0; -} - -void brw_clear_all_caches( struct brw_context *brw ) -{ - GLint i; - - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); - - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); + brw->cache.n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -456,14 +483,25 @@ void brw_clear_all_caches( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } +void brw_state_cache_check_size( struct brw_context *brw ) +{ + /* un-tuned guess. We've got around 20 state objects for a total of around + * 32k, so 1000 of them is around 1.5MB. + */ + if (brw->cache.n_items > 1000) + brw_clear_cache(brw); +} - - - -void brw_destroy_caches( struct brw_context *brw ) +void brw_destroy_cache( struct brw_context *brw ) { GLuint i; - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); + brw_clear_cache(brw); + for (i = 0; i < BRW_MAX_CACHE; i++) { + dri_bo_unreference(brw->cache.last_bo[i]); + free(brw->cache.name[i]); + } + free(brw->cache.items); + brw->cache.items = NULL; + brw->cache.size = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c new file mode 100644 index 00000000000..b28c57c2bcf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -0,0 +1,200 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/** + * Prints out a header, the contents, and the message associated with + * the hardware state data given. + * + * \param name Name of the state object + * \param data Pointer to the base of the state object + * \param hw_offset Hardware offset of the base of the state data. + * \param index Index of the DWORD being output. + */ +static void +state_out(const char *name, void *data, uint32_t hw_offset, int index, + char *fmt, ...) +{ + va_list va; + + fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", + name, hw_offset + index * 4, ((uint32_t *)data)[index]); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + +/** Generic, undecoded state buffer debug printout */ +static void +state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size) +{ + int i; + + if (buffer == NULL) + return; + + dri_bo_map(buffer, GL_FALSE); + for (i = 0; i < state_size / 4; i++) { + state_out(name, buffer->virtual, buffer->offset, i, + "dword %d\n", i); + } + dri_bo_unmap(buffer); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static void dump_wm_surface_state(struct brw_context *brw) +{ + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) { + dri_bo *surf_bo = brw->wm.surf_bo[i]; + unsigned int surfoff; + struct brw_surface_state *surf; + char name[20]; + + if (surf_bo == NULL) { + fprintf(stderr, "WM SS%d: NULL\n", i); + continue; + } + dri_bo_map(surf_bo, GL_FALSE); + surfoff = surf_bo->offset; + surf = (struct brw_surface_state *)(surf_bo->virtual); + + sprintf(name, "WM SS%d", i); + state_out(name, surf, surfoff, 0, "%s\n", + get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 1, "offset\n"); + state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); + state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); + state_out(name, surf, surfoff, 4, "mip base %d\n", + surf->ss4.min_lod); + + dri_bo_unmap(surf_bo); + } +} + +static void dump_sf_viewport_state(struct brw_context *brw) +{ + const char *name = "SF VP"; + struct brw_sf_viewport *vp; + uint32_t vp_off; + + if (brw->sf.vp_bo == NULL) + return; + + dri_bo_map(brw->sf.vp_bo, GL_FALSE); + + vp = brw->sf.vp_bo->virtual; + vp_off = brw->sf.vp_bo->offset; + + state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); + state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); + state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); + state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); + state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); + state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); + + state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + vp->scissor.xmin, vp->scissor.ymin); + state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + vp->scissor.xmax, vp->scissor.ymax); + + dri_bo_unmap(brw->sf.vp_bo); +} + +static void brw_debug_prog(const char *name, dri_bo *prog) +{ + unsigned int i; + uint32_t *data; + + if (prog == NULL) + return; + + dri_bo_map(prog, GL_FALSE); + + data = prog->virtual; + + for (i = 0; i < prog->size / 4 / 4; i++) { + fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + name, (unsigned int)prog->offset + i * 4 * 4, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + } + + dri_bo_unmap(prog); +} + + +/** + * Print additional debug information associated with the batchbuffer + * when DEBUG_BATCH is set. + * + * For 965, this means mapping the state buffers that would have been referenced + * by the batchbuffer and dumping them. + * + * The buffer offsets printed rely on the buffer containing the last offset + * it was validated at. + */ +void brw_debug_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + dump_wm_surface_state(brw); + + state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + brw_debug_prog("VS prog", brw->vs.prog_bo); + + state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + brw_debug_prog("GS prog", brw->gs.prog_bo); + + state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + dump_sf_viewport_state(brw); + brw_debug_prog("SF prog", brw->sf.prog_bo); + + state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); + brw_debug_prog("WM prog", brw->wm.prog_bo); +} diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c deleted file mode 100644 index b9926f2a5d7..00000000000 --- a/src/mesa/drivers/dri/i965/brw_state_pool.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "brw_state.h" -#include "imports.h" - -#include "intel_ioctl.h" -#include "bufmgr.h" - -GLboolean brw_pool_alloc( struct brw_mem_pool *pool, - GLuint size, - GLuint align, - GLuint *offset_return) -{ - GLuint align_mask = (1<<align)-1; - GLuint fixup = ((pool->offset + align_mask) & ~align_mask) - pool->offset; - - size = (size + 3) & ~3; - - if (pool->offset + fixup + size >= pool->size) { - _mesa_printf("%s failed\n", __FUNCTION__); - assert(0); - exit(0); - } - - pool->offset += fixup; - *offset_return = pool->offset; - pool->offset += size; - - return GL_TRUE; -} - -static -void brw_invalidate_pool( struct intel_context *intel, - struct brw_mem_pool *pool ) -{ - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__); - - bmBufferData(intel, - pool->buffer, - pool->size, - NULL, - 0); - - pool->offset = 0; - - brw_clear_all_caches(pool->brw); -} - -static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr ) -{ - struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr; - - pool->offset = 0; - brw_clear_all_caches(pool->brw); -} - - - -static void brw_init_pool( struct brw_context *brw, - GLuint pool_id, - GLuint size ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pool->size = size; - pool->brw = brw; - - bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12); - - /* Also want to say not to wait on fences when data is presented - */ - bmBufferSetInvalidateCB(&brw->intel, pool->buffer, - brw_invalidate_pool_cb, - pool, - GL_TRUE); - - bmBufferData(&brw->intel, - pool->buffer, - pool->size, - NULL, - 0); - -} - -static void brw_destroy_pool( struct brw_context *brw, - GLuint pool_id ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - bmDeleteBuffers(&brw->intel, 1, &pool->buffer); -} - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ) -{ - if (pool->offset > (pool->size * 3) / 4) { - if (brw->intel.aub_file) - brw->intel.aub_wrap = 1; - else - brw->state.dirty.brw |= BRW_NEW_CONTEXT; - } - -} - -void brw_init_pools( struct brw_context *brw ) -{ - brw_init_pool(brw, BRW_GS_POOL, 0x80000); - brw_init_pool(brw, BRW_SS_POOL, 0x80000); -} - -void brw_destroy_pools( struct brw_context *brw ) -{ - brw_destroy_pool(brw, BRW_GS_POOL); - brw_destroy_pool(brw, BRW_SS_POOL); -} - - -void brw_invalidate_pools( struct brw_context *brw ) -{ - brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]); - brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]); -} diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 92c07c29624..16b0496f479 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -33,7 +33,6 @@ #include "brw_context.h" #include "brw_state.h" -#include "bufmgr.h" #include "intel_batchbuffer.h" /* This is used to initialize brw->state.atoms[]. We could use this @@ -46,7 +45,6 @@ const struct brw_tracked_state *atoms[] = { &brw_check_fallback, - &brw_tnl_vertprog, &brw_active_vertprog, &brw_wm_input_sizes, &brw_vs_prog, @@ -80,19 +78,17 @@ const struct brw_tracked_state *atoms[] = */ &brw_invarient_state, &brw_state_base_address, - &brw_pipe_control, &brw_binding_table_pointers, &brw_blend_constant_color, - &brw_drawing_rect, &brw_depthbuffer, &brw_polygon_stipple, &brw_polygon_stipple_offset, &brw_line_stipple, - + &brw_aa_line_parameters, /* Ordering of the commands below is documented as fixed. */ #if 0 @@ -103,6 +99,9 @@ const struct brw_tracked_state *atoms[] = &brw_psp_urb_cbs, #endif + &brw_drawing_rect, + &brw_indices, + &brw_vertices, NULL, /* brw_constant_buffer */ }; @@ -112,8 +111,7 @@ void brw_init_state( struct brw_context *brw ) { GLuint i; - brw_init_pools(brw); - brw_init_caches(brw); + brw_init_cache(brw); brw->state.atoms = _mesa_malloc(sizeof(atoms)); brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); @@ -138,9 +136,8 @@ void brw_destroy_state( struct brw_context *brw ) brw->state.atoms = NULL; } - brw_destroy_caches(brw); + brw_destroy_cache(brw); brw_destroy_batch_cache(brw); - brw_destroy_pools(brw); } /*********************************************************************** @@ -172,20 +169,34 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } +static void +brw_clear_validated_bos(struct brw_context *brw) +{ + int i; + + /* Clear the last round of validated bos */ + for (i = 0; i < brw->state.validated_bo_count; i++) { + dri_bo_unreference(brw->state.validated_bos[i]); + brw->state.validated_bos[i] = NULL; + } + brw->state.validated_bo_count = 0; +} /*********************************************************************** * Emit all state: */ void brw_validate_state( struct brw_context *brw ) { + struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; + brw_clear_validated_bos(brw); + state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; - if (brw->wrap) - state->brw |= BRW_NEW_CONTEXT; + brw_add_validated_bo(brw, intel->batch->buf); if (brw->emit_state_always) { state->mesa |= ~0; @@ -210,13 +221,30 @@ void brw_validate_state( struct brw_context *brw ) if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache_flush(brw); + brw->intel.Fallback = 0; - /* Make an early reference to the state pools, as we don't cope - * well with them being evicted from here down. - */ - (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer); - (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer); - (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer); + /* do prepare stage for all atoms */ + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = brw->state.atoms[i]; + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->prepare) { + atom->prepare(brw); + } + } + } +} + + +void brw_upload_state(struct brw_context *brw) +{ + struct brw_state_flags *state = &brw->state.dirty; + int i; + + brw_clear_validated_bos(brw); if (INTEL_DEBUG) { /* Debug version which enforces various sanity checks on the @@ -234,12 +262,14 @@ void brw_validate_state( struct brw_context *brw ) assert(atom->dirty.mesa || atom->dirty.brw || atom->dirty.cache); - assert(atom->update); + + if (brw->intel.Fallback) + break; if (check_state(state, &atom->dirty)) { - brw->state.atoms[i]->update( brw ); - -/* emit_foo(brw); */ + if (atom->emit) { + atom->emit( brw ); + } } accumulate_state(&examined, &atom->dirty); @@ -255,10 +285,19 @@ void brw_validate_state( struct brw_context *brw ) } else { for (i = 0; i < Elements(atoms); i++) { - if (check_state(state, &brw->state.atoms[i]->dirty)) - brw->state.atoms[i]->update( brw ); + const struct brw_tracked_state *atom = brw->state.atoms[i]; + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->emit) { + atom->emit( brw ); + } + } } } - memset(state, 0, sizeof(*state)); + if (!brw->intel.Fallback) + memset(state, 0, sizeof(*state)); } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 10fee944e8d..4e577d0f6a8 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -141,7 +141,8 @@ struct brw_depthbuffer struct { GLuint pitch:18; GLuint format:3; - GLuint pad:4; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; GLuint depth_offset_disable:1; GLuint tile_walk:1; GLuint tiled_surface:1; @@ -166,14 +167,64 @@ struct brw_depthbuffer union { struct { - GLuint pad:12; - GLuint min_array_element:9; + GLuint pad:10; + GLuint min_array_element:11; GLuint depth:11; } bits; GLuint dword; } dword4; }; +struct brw_depthbuffer_g4x +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:10; + GLuint min_array_element:11; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; + + union { + struct { + GLuint xoffset:16; + GLuint yoffset:16; + } bits; + GLuint dword; + } dword5; /* NEW in Integrated Graphics Device */ +}; + struct brw_drawrect { struct header header; @@ -213,6 +264,25 @@ struct brw_indexbuffer GLuint buffer_end; }; +/* NEW in Integrated Graphics Device */ +struct brw_aa_line_parameters +{ + struct header header; + + struct { + GLuint aa_coverage_scope:8; + GLuint pad0:8; + GLuint aa_coverage_bias:8; + GLuint pad1:8; + } bits0; + + struct { + GLuint aa_coverage_endcap_slope:8; + GLuint pad0:8; + GLuint aa_coverage_endcap_bias:8; + GLuint pad1:8; + } bits1; +}; struct brw_line_stipple { @@ -239,39 +309,39 @@ struct brw_pipelined_state_pointers struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } vs; struct { GLuint enable:1; GLuint pad:4; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } gs; struct { GLuint enable:1; GLuint pad:4; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } clp; struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } sf; struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } wm; struct { GLuint pad:5; - GLuint offset:27; /* KW: check me! */ + GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ } cc; }; @@ -315,7 +385,8 @@ struct brw_pipe_control { GLuint length:8; GLuint notify_enable:1; - GLuint pad:2; + GLuint texture_cache_flush_enable:1; + GLuint indirect_state_pointers_disable:1; GLuint instruction_state_cache_flush_enable:1; GLuint write_cache_flush_enable:1; GLuint depth_stall_enable:1; @@ -473,7 +544,7 @@ struct thread0 GLuint pad0:1; GLuint grf_reg_count:3; GLuint pad1:2; - GLuint kernel_start_pointer:26; + GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ }; struct thread1 @@ -547,8 +618,8 @@ struct brw_clip_unit_state GLuint pad1:1; GLuint urb_entry_allocation_size:5; GLuint pad2:1; - GLuint max_threads:1; /* may be less */ - GLuint pad3:6; + GLuint max_threads:5; /* may be less */ + GLuint pad3:2; } thread4; struct @@ -557,7 +628,7 @@ struct brw_clip_unit_state GLuint clip_mode:3; GLuint userclip_enable_flags:8; GLuint userclip_must_clip:1; - GLuint pad1:1; + GLuint negative_w_clip_test:1; GLuint guard_band_enable:1; GLuint viewport_z_clip_enable:1; GLuint viewport_xy_clip_enable:1; @@ -637,7 +708,7 @@ struct brw_cc_unit_state struct { GLuint pad0:5; - GLuint cc_viewport_state_offset:27; + GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } cc4; struct @@ -699,7 +770,7 @@ struct brw_sf_unit_state GLuint front_winding:1; GLuint viewport_transform:1; GLuint pad0:3; - GLuint sf_viewport_state_offset:27; + GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } sf5; struct @@ -724,7 +795,8 @@ struct brw_sf_unit_state GLuint use_point_size_state:1; GLuint subpixel_precision:1; GLuint sprite_point:1; - GLuint pad0:11; + GLuint pad0:10; + GLuint aa_line_distance_mode:1; GLuint trifan_pv:2; GLuint linestrip_pv:2; GLuint tristrip_pv:2; @@ -749,8 +821,8 @@ struct brw_gs_unit_state GLuint pad1:1; GLuint urb_entry_allocation_size:5; GLuint pad2:1; - GLuint max_threads:1; - GLuint pad3:6; + GLuint max_threads:5; + GLuint pad3:2; } thread4; struct @@ -764,9 +836,14 @@ struct brw_gs_unit_state struct { GLuint max_vp_index:4; - GLuint pad0:26; - GLuint reorder_enable:1; + GLuint pad0:12; + GLuint svbi_post_inc_value:10; GLuint pad1:1; + GLuint svbi_post_inc_enable:1; + GLuint svbi_payload:1; + GLuint discard_adjaceny:1; + GLuint reorder_enable:1; + GLuint pad2:1; } gs6; }; @@ -786,8 +863,8 @@ struct brw_vs_unit_state GLuint pad1:1; GLuint urb_entry_allocation_size:5; GLuint pad2:1; - GLuint max_threads:4; - GLuint pad3:3; + GLuint max_threads:6; + GLuint pad3:1; } thread4; struct @@ -815,7 +892,7 @@ struct brw_wm_unit_state struct { GLuint stats_enable:1; - GLuint pad0:1; + GLuint depth_buffer_clear:1; GLuint sampler_count:3; GLuint sampler_state_pointer:27; } wm4; @@ -825,7 +902,9 @@ struct brw_wm_unit_state GLuint enable_8_pix:1; GLuint enable_16_pix:1; GLuint enable_32_pix:1; - GLuint pad0:7; + GLuint enable_con_32_pix:1; + GLuint enable_con_64_pix:1; + GLuint pad0:5; GLuint legacy_global_depth_bias:1; GLuint line_stipple:1; GLuint depth_offset:1; @@ -838,9 +917,8 @@ struct brw_wm_unit_state GLuint program_computes_depth:1; GLuint program_uses_killpixel:1; GLuint legacy_line_rast: 1; - GLuint pad1:1; - GLuint max_threads:6; - GLuint pad2:1; + GLuint transposed_urb_read_enable:1; + GLuint max_threads:7; } wm5; GLfloat global_depth_offset_constant; @@ -924,6 +1002,7 @@ struct brw_sf_viewport GLfloat m32; } viewport; + /* scissor coordinates are inclusive */ struct { GLshort xmin; GLshort ymin; @@ -978,10 +1057,26 @@ struct brw_surface_state } ss3; struct { - GLuint pad:19; - GLuint min_array_elt:9; + GLuint multisample_position_palette_index:3; + GLuint pad1:1; + GLuint num_multisamples:3; + GLuint pad0:1; + GLuint render_target_view_extent:9; + GLuint min_array_elt:11; GLuint min_lod:4; } ss4; + + struct { + GLuint pad1:16; + GLuint llc_mapping:1; + GLuint mlc_mapping:1; + GLuint gfdt:1; + GLuint gfdt_src:1; + GLuint y_offset:4; + GLuint pad0:1; + GLuint x_offset:7; + } ss5; /* NEW in Integrated Graphics Device */ + }; @@ -1301,6 +1396,17 @@ struct brw_instruction GLuint end_of_thread:1; } sampler; + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } sampler_g4x; + struct brw_urb_immediate urb; struct { diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 9d4b9867d24..0bb6f176a0d 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -30,165 +30,30 @@ */ -#include "glheader.h" -#include "mtypes.h" -#include "imports.h" -#include "simple_list.h" -#include "enums.h" -#include "image.h" -#include "teximage.h" -#include "texstore.h" -#include "texformat.h" +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/imports.h" +#include "main/simple_list.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/teximage.h" +#include "main/texstore.h" +#include "main/texformat.h" + #include "texmem.h" #include "intel_context.h" -#include "intel_ioctl.h" #include "intel_regions.h" +#include "intel_tex.h" #include "brw_context.h" #include "brw_defines.h" - - -static const struct gl_texture_format * -brwChooseTextureFormat( GLcontext *ctx, GLint internalFormat, - GLenum srcFormat, GLenum srcType ) -{ - switch ( internalFormat ) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV) - return &_mesa_texformat_argb4444; - else if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV) - return &_mesa_texformat_argb1555; - else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8)) - return &_mesa_texformat_rgba8888_rev; - else - return &_mesa_texformat_argb8888; - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return &_mesa_texformat_argb8888; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - /* Broadwater doesn't support RGB888 textures, so these must be - * stored as ARGB. - */ - return &_mesa_texformat_argb8888; - - case 3: - case GL_COMPRESSED_RGB: - case GL_RGB: - if (srcFormat == GL_RGB && - srcType == GL_UNSIGNED_SHORT_5_6_5) - return &_mesa_texformat_rgb565; - else - return &_mesa_texformat_argb8888; - - - case GL_RGB5: - case GL_RGB5_A1: - return &_mesa_texformat_argb1555; - - case GL_R3_G3_B2: - case GL_RGBA2: - case GL_RGBA4: - case GL_RGB4: - return &_mesa_texformat_argb4444; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return &_mesa_texformat_a8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return &_mesa_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return &_mesa_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return &_mesa_texformat_i8; - - case GL_YCBCR_MESA: - if (srcType == GL_UNSIGNED_SHORT_8_8_MESA || - srcType == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_COMPRESSED_RGB_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgba_fxt1; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; /* there is no rgba support? */ - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: - return &_mesa_texformat_z16; - - default: - fprintf(stderr, "unexpected texture format %s in %s\n", - _mesa_lookup_enum_by_nr(internalFormat), - __FUNCTION__); - return NULL; - } - - return NULL; /* never get here */ -} - - -void brwInitTextureFuncs( struct dd_function_table *functions ) -{ - functions->ChooseTextureFormat = brwChooseTextureFormat; -} - -void brw_FrameBufferTexInit( struct brw_context *brw ) +void brw_FrameBufferTexInit( struct brw_context *brw, + struct intel_region *region ) { struct intel_context *intel = &brw->intel; GLcontext *ctx = &intel->ctx; - struct intel_region *region = intel->front_region; struct gl_texture_object *obj; struct gl_texture_image *img; @@ -209,6 +74,26 @@ void brw_FrameBufferTexInit( struct brw_context *brw ) void brw_FrameBufferTexDestroy( struct brw_context *brw ) { - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); + if (brw->intel.frame_buffer_texobj != NULL) + brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, + brw->intel.frame_buffer_texobj ); + brw->intel.frame_buffer_texobj = NULL; +} + +/** + * Finalizes all textures, completing any rendering that needs to be done + * to prepare them. + */ +void brw_validate_textures( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + int i; + + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + + if (texUnit->_ReallyEnabled) { + intel_finalize_mipmap_tree(intel, i); + } + } } diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index af1ad0f1ef1..51a617fcb40 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -35,10 +35,12 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" -#include "macros.h" +#include "intel_context.h" +#include "main/macros.h" +#define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) { /* XXX: these vary depending on image format: */ @@ -53,11 +55,20 @@ GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) GLuint pack_x_pitch, pack_x_nr; GLuint pack_y_pitch; GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; - mt->pitch = ((mt->width0 * mt->cpp + 3) & ~3) / mt->cpp; mt->total_height = 0; + + if (mt->compressed) { + align_w = intel_compressed_alignment(mt->internal_format); + mt->pitch = ALIGN(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); + } - pack_y_pitch = MAX2(mt->height0, 2); pack_x_pitch = mt->pitch; pack_x_nr = 1; @@ -83,26 +94,36 @@ GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ) mt->total_height += y; - - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - } - width = minify(width); height = minify(height); depth = minify(depth); + + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } + } break; } default: - i945_miptree_layout_2d(mt); + i945_miptree_layout_2d(intel, mt); break; } DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 64f5904ac68..1a004176de1 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -35,7 +35,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "brw_hal.h" #define VS 0 #define GS 1 @@ -53,7 +52,7 @@ static const struct { GLuint min_entry_size; GLuint max_entry_size; } limits[CS+1] = { - { 8, 32, 1, 5 }, /* vs */ + { 16, 32, 1, 5 }, /* vs */ { 4, 8, 1, 5 }, /* gs */ { 6, 8, 1, 5 }, /* clp */ { 1, 8, 1, 12 }, /* sf */ @@ -69,7 +68,7 @@ static GLboolean check_urb_layout( struct brw_context *brw ) brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; - return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw); } /* Most minimal update, forces re-emit of URB fence packet after GS @@ -81,20 +80,6 @@ static void recalculate_urb_fence( struct brw_context *brw ) GLuint vsize = brw->vs.prog_data->urb_entry_size; GLuint sfsize = brw->sf.prog_data->urb_entry_size; - static GLboolean (*hal_recalculate_urb_fence) (struct brw_context *brw); - static GLboolean hal_tried; - - if (!hal_tried) - { - hal_recalculate_urb_fence = brw_hal_find_symbol ("intel_hal_recalculate_urb_fence"); - hal_tried = 1; - } - if (hal_recalculate_urb_fence) - { - if ((*hal_recalculate_urb_fence) (brw)) - return; - } - if (csize < limits[CS].min_entry_size) csize = limits[CS].min_entry_size; @@ -107,9 +92,9 @@ static void recalculate_urb_fence( struct brw_context *brw ) if (brw->urb.vsize < vsize || brw->urb.sfsize < sfsize || brw->urb.csize < csize || - (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || - brw->urb.sfsize > brw->urb.sfsize || - brw->urb.csize > brw->urb.csize))) { + (brw->urb.constrained && (brw->urb.vsize > vsize || + brw->urb.sfsize > sfsize || + brw->urb.csize > csize))) { brw->urb.csize = csize; @@ -129,6 +114,10 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_sf_entries = limits[SF].min_nr_entries; brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + /* Mark us as operating with constrained nr_entries, so that next + * time we recalculate we'll resize the fences in the hope of + * escaping constrained mode and getting back to normal performance. + */ brw->urb.constrained = 1; if (!check_urb_layout(brw)) { @@ -153,7 +142,7 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.clip_start, brw->urb.sf_start, brw->urb.cs_start, - 256); + URB_SIZES(brw)); brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } @@ -167,7 +156,7 @@ const struct brw_tracked_state brw_recalculate_urb_fence = { .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_SF_PROG) }, - .update = recalculate_urb_fence + .prepare = recalculate_urb_fence }; @@ -191,25 +180,13 @@ void brw_upload_urb_fence(struct brw_context *brw) /* The ordering below is correct, not the layout in the * instruction. * - * There are 256 urb reg pairs in total. + * There are 256/384 urb reg pairs in total. */ uf.bits0.vs_fence = brw->urb.gs_start; uf.bits0.gs_fence = brw->urb.clip_start; uf.bits0.clp_fence = brw->urb.sf_start; uf.bits1.sf_fence = brw->urb.cs_start; - uf.bits1.cs_fence = 256; + uf.bits1.cs_fence = URB_SIZES(brw); BRW_BATCH_STRUCT(brw, &uf); } - - -#if 0 -const struct brw_tracked_state brw_urb_fence = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP, - .cache = 0 - }, - .update = brw_upload_urb_fence -}; -#endif diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index b6deee23769..ce21aa48695 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -30,7 +30,7 @@ */ -#include "mtypes.h" +#include "main/mtypes.h" #include "shader/prog_parameter.h" #include "brw_util.h" #include "brw_defines.h" @@ -45,86 +45,6 @@ GLuint brw_count_bits( GLuint val ) } -static GLuint brw_parameter_state_flags(const gl_state_index state[]) -{ - switch (state[0]) { - case STATE_MATERIAL: - case STATE_LIGHT: - case STATE_LIGHTMODEL_AMBIENT: - case STATE_LIGHTMODEL_SCENECOLOR: - case STATE_LIGHTPROD: - return _NEW_LIGHT; - - case STATE_TEXGEN: - case STATE_TEXENV_COLOR: - return _NEW_TEXTURE; - - case STATE_FOG_COLOR: - case STATE_FOG_PARAMS: - return _NEW_FOG; - - case STATE_CLIPPLANE: - return _NEW_TRANSFORM; - - case STATE_POINT_SIZE: - case STATE_POINT_ATTENUATION: - return _NEW_POINT; - - case STATE_MODELVIEW_MATRIX: - return _NEW_MODELVIEW; - case STATE_PROJECTION_MATRIX: - return _NEW_PROJECTION; - case STATE_MVP_MATRIX: - return _NEW_MODELVIEW | _NEW_PROJECTION; - case STATE_TEXTURE_MATRIX: - return _NEW_TEXTURE_MATRIX; - case STATE_PROGRAM_MATRIX: - return _NEW_TRACK_MATRIX; - - case STATE_DEPTH_RANGE: - return _NEW_VIEWPORT; - - case STATE_FRAGMENT_PROGRAM: - case STATE_VERTEX_PROGRAM: - return _NEW_PROGRAM; - - case STATE_INTERNAL: - switch (state[1]) { - case STATE_NORMAL_SCALE: - return _NEW_MODELVIEW; - case STATE_TEXRECT_SCALE: - return _NEW_TEXTURE; - default: - assert(0); - return 0; - } - - default: - assert(0); - return 0; - } -} - - -GLuint -brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList) -{ - GLuint i; - GLuint result = 0; - - if (!paramList) - return 0; - - for (i = 0; i < paramList->NumParameters; i++) { - if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) { - result |= brw_parameter_state_flags(paramList->Parameters[i].StateIndexes); - } - } - - return result; -} - - GLuint brw_translate_blend_equation( GLenum mode ) { switch (mode) { diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index bd6cc0a2682..33e7cd87e42 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -33,7 +33,7 @@ #ifndef BRW_UTIL_H #define BRW_UTIL_H -#include "mtypes.h" +#include "main/mtypes.h" extern GLuint brw_count_bits( GLuint val ); extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index e173f6fce3e..1db7ceebcfb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -49,7 +49,7 @@ static void do_vs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); - brw_init_compile(&c.func); + brw_init_compile(brw, &c.func); c.vp = vp; c.prog_data.outputs_written = vp->program.Base.OutputsWritten; @@ -73,19 +73,17 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - /* - */ - brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->vs.prog_data); + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->vs.prog_data ); } -static void brw_upload_vs_prog( struct brw_context *brw ) +static void brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; struct brw_vertex_program *vp = @@ -110,13 +108,13 @@ static void brw_upload_vs_prog( struct brw_context *brw ) /* Make an early check for the key. */ - if (brw_search_cache(&brw->cache[BRW_VS_PROG], - &key, sizeof(key), - &brw->vs.prog_data, - &brw->vs.prog_gs_offset)) - return; - - do_vs_prog(brw, vp, &key); + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->vs.prog_data); + if (brw->vs.prog_bo == NULL) + do_vs_prog(brw, vp, &key); } @@ -128,5 +126,5 @@ const struct brw_tracked_state brw_vs_prog = { .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_METAOPS, .cache = 0 }, - .update = brw_upload_vs_prog + .prepare = brw_upload_vs_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index fdb5785d67d..22388ec99d0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -36,7 +36,7 @@ #include "brw_context.h" #include "brw_eu.h" -#include "program.h" +#include "shader/program.h" struct brw_vs_prog_key { @@ -67,6 +67,12 @@ struct brw_vs_compile { struct brw_reg r1; struct brw_reg regs[PROGRAM_ADDRESS+1][128]; struct brw_reg tmp; + struct brw_reg stack; + + struct { + GLboolean used_in_src; + struct brw_reg reg; + } output_regs[128]; struct brw_reg userplane[6]; @@ -74,8 +80,4 @@ struct brw_vs_compile { void brw_vs_emit( struct brw_vs_compile *c ); - -void brw_ProgramCacheDestroy( GLcontext *ctx ); -void brw_ProgramCacheInit( GLcontext *ctx ); - #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index caef042f1cd..6fbac02de63 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -30,7 +30,7 @@ */ -#include "macros.h" +#include "main/macros.h" #include "brw_context.h" #include "brw_vs.h" @@ -218,6 +218,6 @@ const struct brw_tracked_state brw_wm_input_sizes = { .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS, .cache = 0 }, - .update = calc_wm_input_sizes + .prepare = calc_wm_input_sizes }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 6eb11b19ad2..25b4ee85cb0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -30,8 +30,8 @@ */ -#include "program.h" -#include "macros.h" +#include "main/macros.h" +#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "brw_context.h" @@ -134,6 +134,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) WRITEMASK_X); reg++; } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; /* Some opcodes need an internal temporary: @@ -201,7 +211,7 @@ static void unalias2( struct brw_vs_compile *c, struct brw_reg, struct brw_reg )) { - if ((dst.file == arg0.file && dst.nr == arg0.nr) && + if ((dst.file == arg0.file && dst.nr == arg0.nr) || (dst.file == arg1.file && dst.nr == arg1.nr)) { struct brw_compile *p = &c->func; struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); @@ -213,57 +223,65 @@ static void unalias2( struct brw_vs_compile *c, } } +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint cond) +{ + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control_flag_value(p, 0xff); +} +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} - +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} static void emit_slt( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - /* Could be done with an if/else/endif, but this method uses half - * the instructions. Note that we are careful to reference the - * arguments before writing the dest. That means we emit the - * instructions in an odd order and have to play with the flag - * values. - */ - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - - /* Write all values to 1: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(1.0)); + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} - /* Where the test succeeded, overwite with zero: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_pop_insn_state(p); +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); } +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} static void emit_sge( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - - /* Write all values to zero: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0)); - - /* Where the test succeeded, overwite with 1: - */ - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - brw_pop_insn_state(p); + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); } - static void emit_max( struct brw_compile *p, struct brw_reg dst, struct brw_reg arg0, @@ -592,9 +610,13 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: - case PROGRAM_STATE_VAR: assert(c->regs[file][index].nr != 0); return c->regs[file][index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -668,28 +690,28 @@ static void emit_arl( struct brw_vs_compile *c, * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register src ) + struct prog_src_register *src ) { struct brw_reg reg; - if (src.File == PROGRAM_UNDEFINED) + if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src.RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); else - reg = get_reg(c, src.File, src.Index); + reg = get_reg(c, src->File, src->Index); /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src.Swizzle, 0), - GET_SWZ(src.Swizzle, 1), - GET_SWZ(src.Swizzle, 2), - GET_SWZ(src.Swizzle, 3)); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); /* Note this is ok for non-swizzle instructions: */ - reg.negate = src.NegateBase ? 1 : 0; + reg.negate = src->NegateBase ? 1 : 0; return reg; } @@ -796,8 +818,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) } - /* Build ndc coords? TODO: Shortcircuit when w is known to be one. - */ + /* Build ndc coords */ if (!c->key.know_w_is_one) { ndc = get_tmp(c); emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); @@ -807,12 +828,12 @@ static void emit_vertex_write( struct brw_vs_compile *c) ndc = pos; } - /* This includes the workaround for -ve rhw, so is no longer an - * optional step: + /* Update the header for point size, user clipping flags, and -ve rhw + * workaround. */ if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || c->key.nr_userclip || - !c->key.know_w_is_one) + (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -845,7 +866,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (!c->key.know_w_is_one) { + if (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -891,17 +912,50 @@ static void emit_vertex_write( struct brw_vs_compile *c) } - - +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, target_insn; + struct prog_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->vp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case OPCODE_CAL: + case OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } +} /* Emit the fragment program instructions here. */ -void brw_vs_emit( struct brw_vs_compile *c ) +void brw_vs_emit(struct brw_vs_compile *c ) { +#define MAX_IFSN 32 struct brw_compile *p = &c->func; GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn; + GLuint insn, if_insn = 0; + struct brw_instruction *end_inst; + struct brw_instruction *if_inst[MAX_IFSN]; + struct brw_indirect stack_index = brw_indirect(0, 0); + GLuint index; + GLuint file; if (INTEL_DEBUG & DEBUG_VS) { _mesa_printf("\n\n\nvs-emit:\n"); @@ -912,9 +966,24 @@ void brw_vs_emit( struct brw_vs_compile *c ) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { + GLuint i; + struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + GLuint index = src->Index; + GLuint file = src->File; + if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) + c->output_regs[index].used_in_src = GL_TRUE; + } + } + /* Static register allocation */ brw_vs_alloc_regs(c); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (insn = 0; insn < nr_insns; insn++) { @@ -924,17 +993,29 @@ void brw_vs_emit( struct brw_vs_compile *c ) /* Get argument regs. SWZ is special and does this itself. */ + inst->Data = &p->store[p->nr_insn]; if (inst->Opcode != OPCODE_SWZ) - for (i = 0; i < 3; i++) - args[i] = get_arg(c, inst->SrcReg[i]); + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, src); + } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So * care needs to be taken emitting multi-operation instructions. - */ - dst = get_dst(c, inst->DstReg); + */ + index = inst->DstReg.Index; + file = inst->DstReg.File; + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, inst->DstReg); - switch (inst->Opcode) { case OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); @@ -942,6 +1023,9 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_ADD: brw_ADD(p, dst, args[0], args[1]); break; + case OPCODE_COS: + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); + break; case OPCODE_DP3: brw_DP3(p, dst, args[0], args[1]); break; @@ -1003,12 +1087,28 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); break; + + case OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case OPCODE_SIN: + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; case OPCODE_SGE: emit_sge(p, dst, args[0], args[1]); break; + case OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; case OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; + case OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; case OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; @@ -1021,21 +1121,85 @@ void brw_vs_emit( struct brw_vs_compile *c ) case OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; + case OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case OPCODE_RET: + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); case OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; case OPCODE_PRINT: + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: break; default: + _mesa_printf("Unsupported opcode %i (%s) in vertex shader\n", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : + "unknown"); break; } + if ((inst->DstReg.File == PROGRAM_OUTPUT) + && (inst->DstReg.Index != VERT_RESULT_HPOS) + && c->output_regs[inst->DstReg.Index].used_in_src) { + brw_MOV(p, get_dst(c, inst->DstReg), dst); + } + + /* Result color clamping. + * + * When destination register is an output register and + * it's primary/secondary front/back color, we have to clamp + * the result to [0,1]. This is done by enabling the + * saturation bit for the last instruction. + * + * We don't use brw_set_saturate() as it modifies + * p->current->header.saturate, which affects all the subsequent + * instructions. Instead, we directly modify the header + * of the last (already stored) instruction. + */ + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if ((inst->DstReg.Index == VERT_RESULT_COL0) + || (inst->DstReg.Index == VERT_RESULT_COL1) + || (inst->DstReg.Index == VERT_RESULT_BFC0) + || (inst->DstReg.Index == VERT_RESULT_BFC1)) { + p->store[p->nr_insn-1].header.saturate = 1; + } + } + release_tmps(c); } + end_inst = &p->store[p->nr_insn]; emit_vertex_write(c); - + post_vs_emit(c, end_inst); + for (insn = 0; insn < nr_insns; insn++) + c->vp->program.Base.Instructions[insn].Data = NULL; } - - - - - diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index c225bf8f5c5..6e66f54524b 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -34,62 +34,111 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "macros.h" +#include "main/macros.h" -static void upload_vs_unit( struct brw_context *brw ) -{ - struct brw_vs_unit_state vs; - - memset(&vs, 0, sizeof(vs)); - - /* CACHE_NEW_VS_PROG */ - vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; - vs.thread0.grf_reg_count = ((brw->vs.prog_data->total_grf-1) & ~15) / 16; - vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; - vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; - vs.thread3.dispatch_grf_start_reg = 1; +struct brw_vs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int curbe_offset; - /* BRW_NEW_URB_FENCE */ - vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; - vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - vs.thread4.max_threads = MIN2( - MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), - 15); + unsigned int nr_urb_entries, urb_size; +}; +static void +vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + /* CACHE_NEW_VS_PROG */ + key->total_grf = brw->vs.prog_data->total_grf; + key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; + key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - vs.thread4.max_threads = 0; + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_vs_entries; + key->urb_size = brw->urb.vsize; /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (brw->attribs.Transform->ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence * clip_start: */ - vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + key->curbe_offset = brw->curbe.clip_start; } else { - vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + key->curbe_offset = brw->curbe.vs_start; } +} + +static dri_bo * +vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + struct brw_vs_unit_state vs; + dri_bo *bo; + + memset(&vs, 0, sizeof(vs)); + vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; + vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + vs.thread3.dispatch_grf_start_reg = 1; vs.thread3.urb_entry_read_offset = 0; + vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1), + 15); + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; /* No samplers for ARB_vp programs: */ vs.vs5.sampler_count = 0; if (INTEL_DEBUG & DEBUG_STATS) - vs.thread4.stats_enable = 1; + vs.thread4.stats_enable = 1; - /* Vertex program always enabled: + /* Vertex program always enabled: */ vs.vs6.vs_enable = 1; - brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); + bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + &brw->vs.prog_bo, 1, + &vs, sizeof(vs), + NULL, NULL); + + /* Emit VS program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + return bo; } +static void prepare_vs_unit(struct brw_context *brw) +{ + struct brw_vs_unit_key key; + + vs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->vs.state_bo); + brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, + &key, sizeof(key), + &brw->vs.prog_bo, 1, + NULL); + if (brw->vs.state_bo == NULL) { + brw->vs.state_bo = vs_unit_create_from_key(brw, &key); + } +} const struct brw_tracked_state brw_vs_unit = { .dirty = { @@ -98,5 +147,5 @@ const struct brw_tracked_state brw_vs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_vs_unit + .prepare = prepare_vs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_tnl.c b/src/mesa/drivers/dri/i965/brw_vs_tnl.c index b69be350a92..eacc289f1f1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c +++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c @@ -30,1620 +30,18 @@ */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "brw_vs.h" #include "brw_state.h" -struct state_key { - unsigned light_global_enabled:1; - unsigned light_local_viewer:1; - unsigned light_twoside:1; - unsigned light_color_material:1; - unsigned light_color_material_mask:12; - unsigned light_material_mask:12; - unsigned normalize:1; - unsigned rescale_normals:1; - unsigned fog_source_is_depth:1; - unsigned tnl_do_vertex_fog:1; - unsigned separate_specular:1; - unsigned fog_option:2; - unsigned point_attenuated:1; - unsigned texture_enabled_global:1; - unsigned fragprog_inputs_read:12; - - struct { - unsigned light_enabled:1; - unsigned light_eyepos3_is_zero:1; - unsigned light_spotcutoff_is_180:1; - unsigned light_attenuated:1; - unsigned texunit_really_enabled:1; - unsigned texmat_enabled:1; - unsigned texgen_enabled:4; - unsigned texgen_mode0:4; - unsigned texgen_mode1:4; - unsigned texgen_mode2:4; - unsigned texgen_mode3:4; - } unit[8]; -}; - - - -#define FOG_NONE 0 -#define FOG_LINEAR 1 -#define FOG_EXP 2 -#define FOG_EXP2 3 - -static GLuint translate_fog_mode( GLenum mode ) -{ - switch (mode) { - case GL_LINEAR: return FOG_LINEAR; - case GL_EXP: return FOG_EXP; - case GL_EXP2: return FOG_EXP2; - default: return FOG_NONE; - } -} - -#define TXG_NONE 0 -#define TXG_OBJ_LINEAR 1 -#define TXG_EYE_LINEAR 2 -#define TXG_SPHERE_MAP 3 -#define TXG_REFLECTION_MAP 4 -#define TXG_NORMAL_MAP 5 - -static GLuint translate_texgen( GLboolean enabled, GLenum mode ) -{ - if (!enabled) - return TXG_NONE; - - switch (mode) { - case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; - case GL_EYE_LINEAR: return TXG_EYE_LINEAR; - case GL_SPHERE_MAP: return TXG_SPHERE_MAP; - case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; - case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; - default: return TXG_NONE; - } -} - -static void make_state_key( GLcontext *ctx, struct state_key *key ) -{ - struct brw_context *brw = brw_context(ctx); - const struct gl_fragment_program *fp = brw->fragment_program; - GLuint i; - - /* This now relies on texenvprogram.c being active: - */ - assert(fp); - - memset(key, 0, sizeof(*key)); - - /* BRW_NEW_FRAGMENT_PROGRAM */ - key->fragprog_inputs_read = fp->Base.InputsRead; - - /* _NEW_LIGHT */ - key->separate_specular = (brw->attribs.Light->Model.ColorControl == - GL_SEPARATE_SPECULAR_COLOR); - - /* _NEW_LIGHT */ - if (brw->attribs.Light->Enabled) { - key->light_global_enabled = 1; - - if (brw->attribs.Light->Model.LocalViewer) - key->light_local_viewer = 1; - - if (brw->attribs.Light->Model.TwoSide) - key->light_twoside = 1; - - if (brw->attribs.Light->ColorMaterialEnabled) { - key->light_color_material = 1; - key->light_color_material_mask = brw->attribs.Light->ColorMaterialBitmask; - } - - /* BRW_NEW_INPUT_VARYING */ - - /* For these programs, material values are stuffed into the - * generic slots: - */ - for (i = 0 ; i < MAT_ATTRIB_MAX ; i++) - if (brw->vb.info.varying & (1<<(VERT_ATTRIB_GENERIC0 + i))) - key->light_material_mask |= 1<<i; - - for (i = 0; i < MAX_LIGHTS; i++) { - struct gl_light *light = &brw->attribs.Light->Light[i]; - - if (light->Enabled) { - key->unit[i].light_enabled = 1; - - if (light->EyePosition[3] == 0.0) - key->unit[i].light_eyepos3_is_zero = 1; - - if (light->SpotCutoff == 180.0) - key->unit[i].light_spotcutoff_is_180 = 1; - - if (light->ConstantAttenuation != 1.0 || - light->LinearAttenuation != 0.0 || - light->QuadraticAttenuation != 0.0) - key->unit[i].light_attenuated = 1; - } - } - } - - /* _NEW_TRANSFORM */ - if (brw->attribs.Transform->Normalize) - key->normalize = 1; - - if (brw->attribs.Transform->RescaleNormals) - key->rescale_normals = 1; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - key->fog_option = translate_fog_mode(fp->FogOption); - if (key->fog_option) - key->fragprog_inputs_read |= FRAG_BIT_FOGC; - - /* _NEW_FOG */ - if (brw->attribs.Fog->FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) - key->fog_source_is_depth = 1; - - /* _NEW_HINT, ??? */ - if (1) - key->tnl_do_vertex_fog = 1; - - /* _NEW_POINT */ - if (brw->attribs.Point->_Attenuated) - key->point_attenuated = 1; - - /* _NEW_TEXTURE */ - if (brw->attribs.Texture->_TexGenEnabled || - brw->attribs.Texture->_TexMatEnabled || - brw->attribs.Texture->_EnabledUnits) - key->texture_enabled_global = 1; - - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; - - if (texUnit->_ReallyEnabled) - key->unit[i].texunit_really_enabled = 1; - - if (brw->attribs.Texture->_TexMatEnabled & ENABLE_TEXMAT(i)) - key->unit[i].texmat_enabled = 1; - - if (texUnit->TexGenEnabled) { - key->unit[i].texgen_enabled = 1; - - key->unit[i].texgen_mode0 = - translate_texgen( texUnit->TexGenEnabled & (1<<0), - texUnit->GenModeS ); - key->unit[i].texgen_mode1 = - translate_texgen( texUnit->TexGenEnabled & (1<<1), - texUnit->GenModeT ); - key->unit[i].texgen_mode2 = - translate_texgen( texUnit->TexGenEnabled & (1<<2), - texUnit->GenModeR ); - key->unit[i].texgen_mode3 = - translate_texgen( texUnit->TexGenEnabled & (1<<3), - texUnit->GenModeQ ); - } - } -} - - - -/* Very useful debugging tool - produces annotated listing of - * generated program with line/function references for each - * instruction back into this file: - */ -#define DISASSEM 0 - -/* Should be tunable by the driver - do we want to do matrix - * multiplications with DP4's or with MUL/MAD's? SSE works better - * with the latter, drivers may differ. - */ -#define PREFER_DP4 1 - - -/* Use uregs to represent registers internally, translate to Mesa's - * expected formats on emit. - * - * NOTE: These are passed by value extensively in this file rather - * than as usual by pointer reference. If this disturbs you, try - * remembering they are just 32bits in size. - * - * GCC is smart enough to deal with these dword-sized structures in - * much the same way as if I had defined them as dwords and was using - * macros to access and set the fields. This is much nicer and easier - * to evolve. - */ -struct ureg { - GLuint file:4; - GLint idx:8; /* relative addressing may be negative */ - GLuint negate:1; - GLuint swz:12; - GLuint pad:7; -}; - - -struct tnl_program { - const struct state_key *state; - struct gl_vertex_program *program; - - GLuint nr_instructions; - GLuint temp_in_use; - GLuint temp_reserved; - - struct ureg eye_position; - struct ureg eye_position_normalized; - struct ureg eye_normal; - struct ureg identity; - - GLuint materials; - GLuint color_materials; -}; - - -const static struct ureg undef = { - PROGRAM_UNDEFINED, - ~0, - 0, - 0, - 0 -}; - -/* Local shorthand: - */ -#define X SWIZZLE_X -#define Y SWIZZLE_Y -#define Z SWIZZLE_Z -#define W SWIZZLE_W - - -/* Construct a ureg: - */ -static struct ureg make_ureg(GLuint file, GLint idx) -{ - struct ureg reg; - reg.file = file; - reg.idx = idx; - reg.negate = 0; - reg.swz = SWIZZLE_NOOP; - reg.pad = 0; - return reg; -} - - - -static struct ureg ureg_negate( struct ureg reg ) -{ - reg.negate ^= 1; - return reg; -} - - -static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) -{ - reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), - GET_SWZ(reg.swz, y), - GET_SWZ(reg.swz, z), - GET_SWZ(reg.swz, w)); - - return reg; -} - -static struct ureg swizzle1( struct ureg reg, int x ) -{ - return swizzle(reg, x, x, x, x); -} - -static struct ureg get_temp( struct tnl_program *p ) -{ - int bit = ffs( ~p->temp_in_use ); - if (!bit) { - fprintf(stderr, "%s: out of temporaries\n", __FILE__); - assert(0); - } - - if (bit > p->program->Base.NumTemporaries) - p->program->Base.NumTemporaries = bit; - - p->temp_in_use |= 1<<(bit-1); - return make_ureg(PROGRAM_TEMPORARY, bit-1); -} - -static struct ureg reserve_temp( struct tnl_program *p ) -{ - struct ureg temp = get_temp( p ); - p->temp_reserved |= 1<<temp.idx; - return temp; -} - -static void release_temp( struct tnl_program *p, struct ureg reg ) -{ - if (reg.file == PROGRAM_TEMPORARY) { - p->temp_in_use &= ~(1<<reg.idx); - p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ - } -} - -static void release_temps( struct tnl_program *p ) -{ - p->temp_in_use = p->temp_reserved; -} - - - -static struct ureg register_input( struct tnl_program *p, GLuint input ) -{ - assert(input < 32); - - p->program->Base.InputsRead |= (1<<input); - return make_ureg(PROGRAM_INPUT, input); -} - -static struct ureg register_output( struct tnl_program *p, GLuint output ) -{ - p->program->Base.OutputsWritten |= (1<<output); - return make_ureg(PROGRAM_OUTPUT, output); -} - -static struct ureg register_const4f( struct tnl_program *p, - GLfloat s0, - GLfloat s1, - GLfloat s2, - GLfloat s3) -{ - GLfloat values[4]; - GLint idx; - GLuint swizzle; - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; - idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, - &swizzle); - /* XXX what about swizzle? */ - return make_ureg(PROGRAM_STATE_VAR, idx); -} - -#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) -#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) -#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) -#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) - -static GLboolean is_undef( struct ureg reg ) -{ - return reg.file == PROGRAM_UNDEFINED; -} - -static struct ureg get_identity_param( struct tnl_program *p ) -{ - if (is_undef(p->identity)) - p->identity = register_const4f(p, 0,0,0,1); - - return p->identity; -} - -static struct ureg register_param5( struct tnl_program *p, - GLint s0, - GLint s1, - GLint s2, - GLint s3, - GLint s4) -{ - gl_state_index tokens[STATE_LENGTH]; - GLint idx; - tokens[0] = s0; - tokens[1] = s1; - tokens[2] = s2; - tokens[3] = s3; - tokens[4] = s4; - idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); - return make_ureg(PROGRAM_STATE_VAR, idx); -} - - -#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) -#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) -#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) -#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) - - -static void register_matrix_param5( struct tnl_program *p, - GLint s0, /* matrix name */ - GLint s1, /* texture matrix number */ - GLint s2, /* first row */ - GLint s3, /* last row */ - GLint s4, /* modifier */ - struct ureg *matrix ) -{ - GLint i; - - /* This is a bit sad as the support is there to pull the whole - * matrix out in one go: - */ - for (i = 0; i <= s3 - s2; i++) - matrix[i] = register_param5( p, s0, s1, i, i, s4 ); -} - - -static void emit_arg( struct prog_src_register *src, - struct ureg reg ) -{ - src->File = reg.file; - src->Index = reg.idx; - src->Swizzle = reg.swz; - src->RelAddr = 0; - src->NegateBase = reg.negate; - src->Abs = 0; - src->NegateAbs = 0; -} - -static void emit_dst( struct prog_dst_register *dst, - struct ureg reg, GLuint mask ) -{ - dst->File = reg.file; - dst->Index = reg.idx; - /* allow zero as a shorthand for xyzw */ - dst->WriteMask = mask ? mask : WRITEMASK_XYZW; - dst->CondMask = 0; - dst->CondSwizzle = 0; - dst->CondSrc = 0; - dst->pad = 0; -} - -static void debug_insn( struct prog_instruction *inst, const char *fn, - GLuint line ) -{ - if (DISASSEM) { - static const char *last_fn; - - if (fn != last_fn) { - last_fn = fn; - _mesa_printf("%s:\n", fn); - } - - _mesa_printf("%d:\t", line); - _mesa_print_instruction(inst); - } -} - - -static void emit_op3fn(struct tnl_program *p, - GLuint op, - struct ureg dest, - GLuint mask, - struct ureg src0, - struct ureg src1, - struct ureg src2, - const char *fn, - GLuint line) -{ - GLuint nr = p->program->Base.NumInstructions++; - - if (nr >= p->nr_instructions) { - p->program->Base.Instructions = - _mesa_realloc(p->program->Base.Instructions, - sizeof(struct prog_instruction) * p->nr_instructions, - sizeof(struct prog_instruction) * (p->nr_instructions *= 2)); - } - - { - struct prog_instruction *inst = &p->program->Base.Instructions[nr]; - memset(inst, 0, sizeof(*inst)); - inst->Opcode = op; - inst->StringPos = 0; - inst->Data = 0; - - emit_arg( &inst->SrcReg[0], src0 ); - emit_arg( &inst->SrcReg[1], src1 ); - emit_arg( &inst->SrcReg[2], src2 ); - - emit_dst( &inst->DstReg, dest, mask ); - - debug_insn(inst, fn, line); - } -} - - - -#define emit_op3(p, op, dst, mask, src0, src1, src2) \ - emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) - -#define emit_op2(p, op, dst, mask, src0, src1) \ - emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) - -#define emit_op1(p, op, dst, mask, src0) \ - emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) - - -static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) -{ - if (reg.file == PROGRAM_TEMPORARY && - !(p->temp_reserved & (1<<reg.idx))) - return reg; - else { - struct ureg temp = get_temp(p); - emit_op1(p, OPCODE_MOV, temp, 0, reg); - return temp; - } -} - - -/* Currently no tracking performed of input/output/register size or - * active elements. Could be used to reduce these operations, as - * could the matrix type. - */ -static void emit_matrix_transform_vec4( struct tnl_program *p, - struct ureg dest, - const struct ureg *mat, - struct ureg src) -{ - emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); - emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); - emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); - emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); -} - -/* This version is much easier to implement if writemasks are not - * supported natively on the target or (like SSE), the target doesn't - * have a clean/obvious dotproduct implementation. - */ -static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, - struct ureg dest, - const struct ureg *mat, - struct ureg src) -{ - struct ureg tmp; - - if (dest.file != PROGRAM_TEMPORARY) - tmp = get_temp(p); - else - tmp = dest; - - emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); - emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); - emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); - emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); - - if (dest.file != PROGRAM_TEMPORARY) - release_temp(p, tmp); -} - -static void emit_matrix_transform_vec3( struct tnl_program *p, - struct ureg dest, - const struct ureg *mat, - struct ureg src) -{ - emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); - emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); - emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); -} - - -static void emit_normalize_vec3( struct tnl_program *p, - struct ureg dest, - struct ureg src ) -{ - emit_op2(p, OPCODE_DP3, dest, WRITEMASK_W, src, src); - emit_op1(p, OPCODE_RSQ, dest, WRITEMASK_W, swizzle1(dest,W)); - emit_op2(p, OPCODE_MUL, dest, WRITEMASK_XYZ, src, swizzle1(dest,W)); -} - -static void emit_passthrough( struct tnl_program *p, - GLuint input, - GLuint output ) -{ - struct ureg out = register_output(p, output); - emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); -} - -static struct ureg get_eye_position( struct tnl_program *p ) -{ - if (is_undef(p->eye_position)) { - struct ureg pos = register_input( p, VERT_ATTRIB_POS ); - struct ureg modelview[4]; - - p->eye_position = reserve_temp(p); - - if (PREFER_DP4) { - register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, - 0, modelview ); - - emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); - } - else { - register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, - STATE_MATRIX_TRANSPOSE, modelview ); - - emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); - } - } - - return p->eye_position; -} - - -#if 0 -static struct ureg get_eye_z( struct tnl_program *p ) -{ - if (!is_undef(p->eye_position)) { - return swizzle1(p->eye_position, Z); - } - else if (!is_undef(p->eye_z)) { - struct ureg pos = register_input( p, BRW_ATTRIB_POS ); - struct ureg modelview2; - - p->eye_z = reserve_temp(p); - - register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 2, 1, - STATE_MATRIX, &modelview2 ); - - emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); - emit_op2(p, OPCODE_DP4, p->eye_z, WRITEMASK_Z, pos, modelview2); - } - - return swizzle1(p->eye_z, Z) -} -#endif - - - -static struct ureg get_eye_position_normalized( struct tnl_program *p ) -{ - if (is_undef(p->eye_position_normalized)) { - struct ureg eye = get_eye_position(p); - p->eye_position_normalized = reserve_temp(p); - emit_normalize_vec3(p, p->eye_position_normalized, eye); - } - - return p->eye_position_normalized; -} - - -static struct ureg get_eye_normal( struct tnl_program *p ) -{ - if (is_undef(p->eye_normal)) { - struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); - struct ureg mvinv[3]; - - register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, - STATE_MATRIX_INVTRANS, mvinv ); - - p->eye_normal = reserve_temp(p); - - /* Transform to eye space: - */ - emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); - - /* Normalize/Rescale: - */ - if (p->state->normalize) { - emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); - } - else if (p->state->rescale_normals) { - struct ureg rescale = register_param2(p, STATE_INTERNAL, - STATE_NORMAL_SCALE); - - emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal, - swizzle1(rescale, X)); - } - } - - return p->eye_normal; -} - - - -static void build_hpos( struct tnl_program *p ) -{ - struct ureg pos = register_input( p, VERT_ATTRIB_POS ); - struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); - struct ureg mvp[4]; - - if (PREFER_DP4) { - register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, - 0, mvp ); - emit_matrix_transform_vec4( p, hpos, mvp, pos ); - } - else { - register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, - STATE_MATRIX_TRANSPOSE, mvp ); - emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); - } -} - - -static GLuint material_attrib( GLuint side, GLuint property ) -{ - return (property - STATE_AMBIENT) * 2 + side; -} - -/* Get a bitmask of which material values vary on a per-vertex basis. - */ -static void set_material_flags( struct tnl_program *p ) -{ - p->color_materials = 0; - p->materials = 0; - - if (p->state->light_color_material) { - p->materials = - p->color_materials = p->state->light_color_material_mask; - } - - p->materials |= p->state->light_material_mask; -} - - -static struct ureg get_material( struct tnl_program *p, GLuint side, - GLuint property ) -{ - GLuint attrib = material_attrib(side, property); - - if (p->color_materials & (1<<attrib)) - return register_input(p, VERT_ATTRIB_COLOR0); - else if (p->materials & (1<<attrib)) - return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT ); - else - return register_param3( p, STATE_MATERIAL, side, property ); -} - -#define SCENE_COLOR_BITS(side) ((MAT_BIT_FRONT_EMISSION | \ - MAT_BIT_FRONT_AMBIENT | \ - MAT_BIT_FRONT_DIFFUSE) << (side)) - -/* Either return a precalculated constant value or emit code to - * calculate these values dynamically in the case where material calls - * are present between begin/end pairs. - * - * Probably want to shift this to the program compilation phase - if - * we always emitted the calculation here, a smart compiler could - * detect that it was constant (given a certain set of inputs), and - * lift it out of the main loop. That way the programs created here - * would be independent of the vertex_buffer details. - */ -static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) -{ - if (p->materials & SCENE_COLOR_BITS(side)) { - struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); - struct ureg material_emission = get_material(p, side, STATE_EMISSION); - struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); - struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); - struct ureg tmp = make_temp(p, material_diffuse); - emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, - material_ambient, material_emission); - return tmp; - } - else - return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); -} - - -static struct ureg get_lightprod( struct tnl_program *p, GLuint light, - GLuint side, GLuint property ) -{ - GLuint attrib = material_attrib(side, property); - if (p->materials & (1<<attrib)) { - struct ureg light_value = - register_param3(p, STATE_LIGHT, light, property); - struct ureg material_value = get_material(p, side, property); - struct ureg tmp = get_temp(p); - emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); - return tmp; - } - else - return register_param4(p, STATE_LIGHTPROD, light, side, property); -} - -static struct ureg calculate_light_attenuation( struct tnl_program *p, - GLuint i, - struct ureg VPpli, - struct ureg dist ) -{ - struct ureg attenuation = register_param3(p, STATE_LIGHT, i, - STATE_ATTENUATION); - struct ureg att = get_temp(p); - - /* Calculate spot attenuation: - */ - if (!p->state->unit[i].light_spotcutoff_is_180) { - struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, - STATE_SPOT_DIR_NORMALIZED, i); - struct ureg spot = get_temp(p); - struct ureg slt = get_temp(p); - - emit_op2(p, OPCODE_DP3, spot, 0, ureg_negate(VPpli), spot_dir_norm); - emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); - emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); - emit_op2(p, OPCODE_MUL, att, 0, slt, spot); - - release_temp(p, spot); - release_temp(p, slt); - } - - /* Calculate distance attenuation: - */ - if (p->state->unit[i].light_attenuated) { - - /* 1/d,d,d,1/d */ - emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); - /* 1,d,d*d,1/d */ - emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); - /* 1/dist-atten */ - emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); - - if (!p->state->unit[i].light_spotcutoff_is_180) { - /* dist-atten */ - emit_op1(p, OPCODE_RCP, dist, 0, dist); - /* spot-atten * dist-atten */ - emit_op2(p, OPCODE_MUL, att, 0, dist, att); - } else { - /* dist-atten */ - emit_op1(p, OPCODE_RCP, att, 0, dist); - } - } - - return att; -} - - - - - -/* Need to add some addtional parameters to allow lighting in object - * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye - * space lighting. - */ -static void build_lighting( struct tnl_program *p ) -{ - const GLboolean twoside = p->state->light_twoside; - const GLboolean separate = p->state->separate_specular; - GLuint nr_lights = 0, count = 0; - struct ureg normal = get_eye_normal(p); - struct ureg lit = get_temp(p); - struct ureg dots = get_temp(p); - struct ureg _col0 = undef, _col1 = undef; - struct ureg _bfc0 = undef, _bfc1 = undef; - GLuint i; - - for (i = 0; i < MAX_LIGHTS; i++) - if (p->state->unit[i].light_enabled) - nr_lights++; - - set_material_flags(p); - - { - struct ureg shininess = get_material(p, 0, STATE_SHININESS); - emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); - release_temp(p, shininess); - - _col0 = make_temp(p, get_scenecolor(p, 0)); - if (separate) - _col1 = make_temp(p, get_identity_param(p)); - else - _col1 = _col0; - - } - - if (twoside) { - struct ureg shininess = get_material(p, 1, STATE_SHININESS); - emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, - ureg_negate(swizzle1(shininess,X))); - release_temp(p, shininess); - - _bfc0 = make_temp(p, get_scenecolor(p, 1)); - if (separate) - _bfc1 = make_temp(p, get_identity_param(p)); - else - _bfc1 = _bfc0; - } - - - /* If no lights, still need to emit the scenecolor. - */ - /* KW: changed to do this always - v1.17 "Fix lighting alpha result"? - */ - if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) - { - struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); - emit_op1(p, OPCODE_MOV, res0, 0, _col0); - - if (twoside) { - struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); - emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); - } - } - - if (separate && (p->state->fragprog_inputs_read & FRAG_BIT_COL1)) { - - struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); - emit_op1(p, OPCODE_MOV, res1, 0, _col1); - - if (twoside) { - struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); - emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); - } - } - - if (nr_lights == 0) { - release_temps(p); - return; - } - - - for (i = 0; i < MAX_LIGHTS; i++) { - if (p->state->unit[i].light_enabled) { - struct ureg half = undef; - struct ureg att = undef, VPpli = undef; - - count++; - - if (p->state->unit[i].light_eyepos3_is_zero) { - /* Can used precomputed constants in this case. - * Attenuation never applies to infinite lights. - */ - VPpli = register_param3(p, STATE_LIGHT, i, - STATE_POSITION_NORMALIZED); - if (p->state->light_local_viewer) { - struct ureg eye_hat = get_eye_position_normalized(p); - half = get_temp(p); - emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); - emit_normalize_vec3(p, half, half); - } else { - half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR); - } - } - else { - struct ureg Ppli = register_param3(p, STATE_LIGHT, i, - STATE_POSITION); - struct ureg V = get_eye_position(p); - struct ureg dist = get_temp(p); - - VPpli = get_temp(p); - half = get_temp(p); - - /* Calulate VPpli vector - */ - emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); - - /* Normalize VPpli. The dist value also used in - * attenuation below. - */ - emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); - emit_op1(p, OPCODE_RSQ, dist, 0, dist); - emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); - - - /* Calculate attenuation: - */ - if (!p->state->unit[i].light_spotcutoff_is_180 || - p->state->unit[i].light_attenuated) { - att = calculate_light_attenuation(p, i, VPpli, dist); - } - - - /* Calculate viewer direction, or use infinite viewer: - */ - if (p->state->light_local_viewer) { - struct ureg eye_hat = get_eye_position_normalized(p); - emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); - } - else { - struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); - emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); - } - - emit_normalize_vec3(p, half, half); - - release_temp(p, dist); - } - - /* Calculate dot products: - */ - emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); - emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); - - - /* Front face lighting: - */ - { - struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); - struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); - struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); - struct ureg res0, res1; - GLuint mask0, mask1; - - emit_op1(p, OPCODE_LIT, lit, 0, dots); - - if (!is_undef(att)) - emit_op2(p, OPCODE_MUL, lit, 0, lit, att); - - - mask0 = 0; - mask1 = 0; - res0 = _col0; - res1 = _col1; - - if (count == nr_lights) { - if (separate) { - mask0 = WRITEMASK_XYZ; - mask1 = WRITEMASK_XYZ; - - if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) - res0 = register_output( p, VERT_RESULT_COL0 ); - - if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) - res1 = register_output( p, VERT_RESULT_COL1 ); - } - else { - mask1 = WRITEMASK_XYZ; - - if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) - res1 = register_output( p, VERT_RESULT_COL0 ); - } - } - - emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); - emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); - emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); - - release_temp(p, ambient); - release_temp(p, diffuse); - release_temp(p, specular); - } - - /* Back face lighting: - */ - if (twoside) { - struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); - struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); - struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); - struct ureg res0, res1; - GLuint mask0, mask1; - - emit_op1(p, OPCODE_LIT, lit, 0, ureg_negate(swizzle(dots,X,Y,W,Z))); - - if (!is_undef(att)) - emit_op2(p, OPCODE_MUL, lit, 0, lit, att); - - mask0 = 0; - mask1 = 0; - res0 = _bfc0; - res1 = _bfc1; - - if (count == nr_lights) { - if (separate) { - mask0 = WRITEMASK_XYZ; - mask1 = WRITEMASK_XYZ; - if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) - res0 = register_output( p, VERT_RESULT_BFC0 ); - - if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) - res1 = register_output( p, VERT_RESULT_BFC1 ); - } - else { - mask1 = WRITEMASK_XYZ; - - if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) - res1 = register_output( p, VERT_RESULT_BFC0 ); - } - } - - emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); - emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); - emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); - - release_temp(p, ambient); - release_temp(p, diffuse); - release_temp(p, specular); - } - - release_temp(p, half); - release_temp(p, VPpli); - release_temp(p, att); - } - } - - release_temps( p ); -} - - -static void build_fog( struct tnl_program *p ) -{ - struct ureg fog = register_output(p, VERT_RESULT_FOGC); - struct ureg input; - GLuint useabs = p->state->fog_source_is_depth && p->state->fog_option && - (p->state->fog_option != FOG_EXP2); - - if (p->state->fog_source_is_depth) { - input = swizzle1(get_eye_position(p), Z); - } - else { - input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); - } - - if (p->state->fog_option && - p->state->tnl_do_vertex_fog) { - struct ureg params = register_param2(p, STATE_INTERNAL, - STATE_FOG_PARAMS_OPTIMIZED); - struct ureg tmp = get_temp(p); - struct ureg id = get_identity_param(p); - - emit_op1(p, OPCODE_MOV, fog, 0, id); - - if (useabs) { - emit_op1(p, OPCODE_ABS, tmp, 0, input); - } - - switch (p->state->fog_option) { - case FOG_LINEAR: { - emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input, - swizzle1(params,X), swizzle1(params,Y)); - emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ - emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); - break; - } - case FOG_EXP: - emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input, - swizzle1(params,Z)); - emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); - break; - case FOG_EXP2: - emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); - emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); - emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, ureg_negate(tmp)); - break; - } - - release_temp(p, tmp); - } - else { - /* results = incoming fog coords (compute fog per-fragment later) - * - * KW: Is it really necessary to do anything in this case? - */ - emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, 0, input); - } -} - -static void build_reflect_texgen( struct tnl_program *p, - struct ureg dest, - GLuint writemask ) -{ - struct ureg normal = get_eye_normal(p); - struct ureg eye_hat = get_eye_position_normalized(p); - struct ureg tmp = get_temp(p); - - /* n.u */ - emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); - /* 2n.u */ - emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); - /* (-2n.u)n + u */ - emit_op3(p, OPCODE_MAD, dest, writemask, ureg_negate(tmp), normal, eye_hat); - - release_temp(p, tmp); -} - -static void build_sphere_texgen( struct tnl_program *p, - struct ureg dest, - GLuint writemask ) -{ - struct ureg normal = get_eye_normal(p); - struct ureg eye_hat = get_eye_position_normalized(p); - struct ureg tmp = get_temp(p); - struct ureg half = register_scalar_const(p, .5); - struct ureg r = get_temp(p); - struct ureg inv_m = get_temp(p); - struct ureg id = get_identity_param(p); - - /* Could share the above calculations, but it would be - * a fairly odd state for someone to set (both sphere and - * reflection active for different texture coordinate - * components. Of course - if two texture units enable - * reflect and/or sphere, things start to tilt in favour - * of seperating this out: - */ - - /* n.u */ - emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); - /* 2n.u */ - emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); - /* (-2n.u)n + u */ - emit_op3(p, OPCODE_MAD, r, 0, ureg_negate(tmp), normal, eye_hat); - /* r + 0,0,1 */ - emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); - /* rx^2 + ry^2 + (rz+1)^2 */ - emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); - /* 2/m */ - emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); - /* 1/m */ - emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); - /* r/m + 1/2 */ - emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); - - release_temp(p, tmp); - release_temp(p, r); - release_temp(p, inv_m); -} - - -static void build_texture_transform( struct tnl_program *p ) -{ - GLuint i, j; - - for (i = 0; i < MAX_TEXTURE_UNITS; i++) { - - if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i))) - continue; - - if (p->state->unit[i].texgen_enabled || - p->state->unit[i].texmat_enabled) { - - GLuint texmat_enabled = p->state->unit[i].texmat_enabled; - struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); - struct ureg out_texgen = undef; - - if (p->state->unit[i].texgen_enabled) { - GLuint copy_mask = 0; - GLuint sphere_mask = 0; - GLuint reflect_mask = 0; - GLuint normal_mask = 0; - GLuint modes[4]; - - if (texmat_enabled) - out_texgen = get_temp(p); - else - out_texgen = out; - - modes[0] = p->state->unit[i].texgen_mode0; - modes[1] = p->state->unit[i].texgen_mode1; - modes[2] = p->state->unit[i].texgen_mode2; - modes[3] = p->state->unit[i].texgen_mode3; - - for (j = 0; j < 4; j++) { - switch (modes[j]) { - case TXG_OBJ_LINEAR: { - struct ureg obj = register_input(p, VERT_ATTRIB_POS); - struct ureg plane = - register_param3(p, STATE_TEXGEN, i, - STATE_TEXGEN_OBJECT_S + j); - - emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, - obj, plane ); - break; - } - case TXG_EYE_LINEAR: { - struct ureg eye = get_eye_position(p); - struct ureg plane = - register_param3(p, STATE_TEXGEN, i, - STATE_TEXGEN_EYE_S + j); - - emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, - eye, plane ); - break; - } - case TXG_SPHERE_MAP: - sphere_mask |= WRITEMASK_X << j; - break; - case TXG_REFLECTION_MAP: - reflect_mask |= WRITEMASK_X << j; - break; - case TXG_NORMAL_MAP: - normal_mask |= WRITEMASK_X << j; - break; - case TXG_NONE: - copy_mask |= WRITEMASK_X << j; - } - - } - - - if (sphere_mask) { - build_sphere_texgen(p, out_texgen, sphere_mask); - } - - if (reflect_mask) { - build_reflect_texgen(p, out_texgen, reflect_mask); - } - - if (normal_mask) { - struct ureg normal = get_eye_normal(p); - emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); - } - - if (copy_mask) { - struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); - emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); - } - } - - if (texmat_enabled) { - struct ureg texmat[4]; - struct ureg in = (!is_undef(out_texgen) ? - out_texgen : - register_input(p, VERT_ATTRIB_TEX0+i)); - if (PREFER_DP4) { - register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, - 0, texmat ); - emit_matrix_transform_vec4( p, out, texmat, in ); - } - else { - register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, - STATE_MATRIX_TRANSPOSE, texmat ); - emit_transpose_matrix_transform_vec4( p, out, texmat, in ); - } - } - - release_temps(p); - } - else { - emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); - } - } -} - - -/* Seems like it could be tighter: - */ -static void build_pointsize( struct tnl_program *p ) -{ - struct ureg eye = get_eye_position(p); - struct ureg state_size = register_param1(p, STATE_POINT_SIZE); - struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); - struct ureg out = register_output(p, VERT_RESULT_PSIZ); - struct ureg ut = get_temp(p); - - /* 1, Z, Z * Z, 1 */ - emit_op1(p, OPCODE_MOV, ut, WRITEMASK_XW, swizzle1(get_identity_param(p), W)); - emit_op1(p, OPCODE_ABS, ut, WRITEMASK_YZ, swizzle1(eye, Z)); - emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, ut); - - - /* p1 + p2 * dist + p3 * dist * dist, 0 */ - emit_op2(p, OPCODE_DP3, ut, WRITEMASK_X, ut, state_attenuation); - - /* 1 / sqrt(factor) */ - emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); - - /* ut = pointSize / factor */ - emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); - - /* Clamp to min/max - state_size.[yz] - */ - emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); - emit_op2(p, OPCODE_MIN, out, 0, swizzle1(ut, X), swizzle1(state_size, Z)); - - release_temp(p, ut); -} - -static void build_tnl_program( struct tnl_program *p ) -{ - /* Emit the program, starting with modelviewproject: - */ - build_hpos(p); - - /* Lighting calculations: - */ - if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { - if (p->state->light_global_enabled) - build_lighting(p); - else { - if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) - emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); - - if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) - emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); - } - } - - if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) || - p->state->fog_option != FOG_NONE) - build_fog(p); - - if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) - build_texture_transform(p); - - if (p->state->point_attenuated) - build_pointsize(p); - - /* Finish up: - */ - emit_op1(p, OPCODE_END, undef, 0, undef); - - /* Disassemble: - */ - if (DISASSEM) { - _mesa_printf ("\n"); - } -} - - -static void build_new_tnl_program( const struct state_key *key, - struct gl_vertex_program *program, - GLuint max_temps) -{ - struct tnl_program p; - - _mesa_memset(&p, 0, sizeof(p)); - p.state = key; - p.program = program; - p.eye_position = undef; - p.eye_position_normalized = undef; - p.eye_normal = undef; - p.identity = undef; - p.temp_in_use = 0; - p.nr_instructions = 16; - - if (max_temps >= sizeof(int) * 8) - p.temp_reserved = 0; - else - p.temp_reserved = ~((1<<max_temps)-1); - - p.program->Base.Instructions = - _mesa_malloc(sizeof(struct prog_instruction) * p.nr_instructions); - p.program->Base.String = 0; - p.program->Base.NumInstructions = - p.program->Base.NumTemporaries = - p.program->Base.NumParameters = - p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; - p.program->Base.Parameters = _mesa_new_parameter_list(); - p.program->Base.InputsRead = 0; - p.program->Base.OutputsWritten = 0; - - build_tnl_program( &p ); -} - -static void *search_cache( struct brw_tnl_cache *cache, - GLuint hash, - const void *key, - GLuint keysize) -{ - struct brw_tnl_cache_item *c; - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && memcmp(c->key, key, keysize) == 0) - return c->data; - } - - return NULL; -} - -static void rehash( struct brw_tnl_cache *cache ) -{ - struct brw_tnl_cache_item **items; - struct brw_tnl_cache_item *c, *next; - GLuint size, i; - - size = cache->size * 3; - items = (struct brw_tnl_cache_item**) _mesa_malloc(size * sizeof(*items)); - _mesa_memset(items, 0, size * sizeof(*items)); - - for (i = 0; i < cache->size; i++) - for (c = cache->items[i]; c; c = next) { - next = c->next; - c->next = items[c->hash % size]; - items[c->hash % size] = c; - } - - FREE(cache->items); - cache->items = items; - cache->size = size; -} - -static void cache_item( struct brw_tnl_cache *cache, - GLuint hash, - const struct state_key *key, - void *data ) -{ - struct brw_tnl_cache_item *c = MALLOC(sizeof(*c)); - c->hash = hash; - - c->key = malloc(sizeof(*key)); - memcpy(c->key, key, sizeof(*key)); - - c->data = data; - - if (++cache->n_items > cache->size * 1.5) - rehash(cache); - - c->next = cache->items[hash % cache->size]; - cache->items[hash % cache->size] = c; -} - - -static GLuint hash_key( struct state_key *key ) -{ - GLuint *ikey = (GLuint *)key; - GLuint hash = 0, i; - - /* I'm sure this can be improved on, but speed is important: - */ - for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++) - hash += ikey[i]; - - return hash; -} - -static void update_tnl_program( struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - struct state_key key; - GLuint hash; - struct gl_vertex_program *old = brw->tnl_program; - - /* _NEW_PROGRAM */ - if (brw->attribs.VertexProgram->_Enabled) - return; - - /* Grab all the relevent state and put it in a single structure: - */ - make_state_key(ctx, &key); - hash = hash_key(&key); - - /* Look for an already-prepared program for this state: - */ - brw->tnl_program = (struct gl_vertex_program *) - search_cache( &brw->tnl_program_cache, hash, &key, sizeof(key) ); - - /* OK, we'll have to build a new one: - */ - if (!brw->tnl_program) { - brw->tnl_program = (struct gl_vertex_program *) - ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); - - build_new_tnl_program( &key, brw->tnl_program, -/* ctx->Const.MaxVertexProgramTemps */ - 32 - ); - - if (ctx->Driver.ProgramStringNotify) - ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, - &brw->tnl_program->Base ); - - cache_item( &brw->tnl_program_cache, - hash, &key, brw->tnl_program ); - } - - if (old != brw->tnl_program) - brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM; -} - -/* Note: See brw_draw.c - the vertex program must not rely on - * brw->primitive or brw->reduced_prim. - */ -const struct brw_tracked_state brw_tnl_vertprog = { - .dirty = { - .mesa = (_NEW_PROGRAM | - _NEW_LIGHT | - _NEW_TRANSFORM | - _NEW_FOG | - _NEW_HINT | - _NEW_POINT | - _NEW_TEXTURE), - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_INPUT_VARYING), - .cache = 0 - }, - .update = update_tnl_program -}; - - - - -static void update_active_vertprog( struct brw_context *brw ) +static void prepare_active_vertprog( struct brw_context *brw ) { const struct gl_vertex_program *prev = brw->vertex_program; - /* NEW_PROGRAM */ - if (brw->attribs.VertexProgram->_Enabled) { - brw->vertex_program = brw->attribs.VertexProgram->Current; - } - else { - /* BRW_NEW_TNL_PROGRAM */ - brw->vertex_program = brw->tnl_program; - } + brw->vertex_program = brw->attribs.VertexProgram->_Current; if (brw->vertex_program != prev) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; @@ -1654,37 +52,8 @@ static void update_active_vertprog( struct brw_context *brw ) const struct brw_tracked_state brw_active_vertprog = { .dirty = { .mesa = _NEW_PROGRAM, - .brw = BRW_NEW_TNL_PROGRAM, + .brw = 0, .cache = 0 }, - .update = update_active_vertprog + .prepare = prepare_active_vertprog }; - - -void brw_ProgramCacheInit( GLcontext *ctx ) -{ - struct brw_context *brw = brw_context(ctx); - - brw->tnl_program_cache.size = 17; - brw->tnl_program_cache.n_items = 0; - brw->tnl_program_cache.items = (struct brw_tnl_cache_item **) - _mesa_calloc(brw->tnl_program_cache.size * - sizeof(struct brw_tnl_cache_item)); -} - -void brw_ProgramCacheDestroy( GLcontext *ctx ) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_tnl_cache_item *c, *next; - GLuint i; - - for (i = 0; i < brw->tnl_program_cache.size; i++) - for (c = brw->tnl_program_cache.items[i]; c; c = next) { - next = c->next; - FREE(c->key); - FREE(c->data); - FREE(c); - } - - FREE(brw->tnl_program_cache.items); -} diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 786f30e641e..2d4c81274e6 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -32,11 +32,11 @@ -#include "glheader.h" -#include "mtypes.h" -#include "imports.h" -#include "macros.h" -#include "colormac.h" +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/colormac.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -47,64 +47,117 @@ #include "brw_draw.h" #include "brw_state.h" -#include "brw_aub.h" #include "brw_fallback.h" #include "brw_vs.h" +#include <stdarg.h> - +static void +dri_bo_release(dri_bo **bo) +{ + dri_bo_unreference(*bo); + *bo = NULL; +} /* called from intelDestroyContext() */ static void brw_destroy_context( struct intel_context *intel ) { - GLcontext *ctx = &intel->ctx; struct brw_context *brw = brw_context(&intel->ctx); - - brw_aub_destroy(brw); + int i; brw_destroy_metaops(brw); brw_destroy_state(brw); brw_draw_destroy( brw ); - brw_ProgramCacheDestroy( ctx ); brw_FrameBufferTexDestroy( brw ); + + for (i = 0; i < brw->state.nr_draw_regions; i++) + intel_region_release(&brw->state.draw_regions[i]); + brw->state.nr_draw_regions = 0; + intel_region_release(&brw->state.depth_region); + + dri_bo_release(&brw->curbe.curbe_bo); + dri_bo_release(&brw->vs.prog_bo); + dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->gs.prog_bo); + dri_bo_release(&brw->gs.state_bo); + dri_bo_release(&brw->clip.prog_bo); + dri_bo_release(&brw->clip.state_bo); + dri_bo_release(&brw->clip.vp_bo); + dri_bo_release(&brw->sf.prog_bo); + dri_bo_release(&brw->sf.state_bo); + dri_bo_release(&brw->sf.vp_bo); + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) + dri_bo_release(&brw->wm.sdc_bo[i]); + dri_bo_release(&brw->wm.bind_bo); + for (i = 0; i < BRW_WM_MAX_SURF; i++) + dri_bo_release(&brw->wm.surf_bo[i]); + dri_bo_release(&brw->wm.prog_bo); + dri_bo_release(&brw->wm.state_bo); + dri_bo_release(&brw->cc.prog_bo); + dri_bo_release(&brw->cc.state_bo); + dri_bo_release(&brw->cc.vp_bo); } /* called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region) + struct intel_region *draw_regions[], + struct intel_region *depth_region, + GLuint num_regions) { struct brw_context *brw = brw_context(&intel->ctx); - - intel_region_release(intel, &brw->state.draw_region); - intel_region_release(intel, &brw->state.depth_region); - intel_region_reference(&brw->state.draw_region, draw_region); + int i; + if (brw->state.depth_region != depth_region) + brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; + for (i = 0; i < brw->state.nr_draw_regions; i++) + intel_region_release(&brw->state.draw_regions[i]); + intel_region_release(&brw->state.depth_region); + for (i = 0; i < num_regions; i++) + intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); + brw->state.nr_draw_regions = num_regions; } +/* called from intel_batchbuffer_flush and children before sending a + * batchbuffer off. + */ +static void brw_finish_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + brw_emit_query_end(brw); +} /* called from intelFlushBatchLocked */ -static void brw_lost_hardware( struct intel_context *intel ) +static void brw_new_batch( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - /* Note that we effectively lose the context after this. - * - * Setting this flag provokes a state buffer wrap and also flushes - * the hardware caches. - */ - brw->state.dirty.brw |= BRW_NEW_CONTEXT; + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!brw->no_batch_wrap); + + brw->curbe.need_new_bo = GL_TRUE; - /* Which means there shouldn't be any commands already queued: + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. */ - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); + brw->state.dirty.brw |= BRW_NEW_CONTEXT; brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + brw->vb.upload.offset = 0; + } } static void brw_note_fence( struct intel_context *intel, @@ -115,10 +168,9 @@ static void brw_note_fence( struct intel_context *intel, static void brw_note_unlock( struct intel_context *intel ) { - struct brw_context *brw = brw_context(&intel->ctx); + struct brw_context *brw = brw_context(&intel->ctx); - brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]); - brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]); + brw_state_cache_check_size(brw); brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK; } @@ -156,9 +208,6 @@ static GLuint brw_flush_cmd( void ) return *(GLuint *)&flush; } - - - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -176,10 +225,12 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.invalidate_state = brw_invalidate_state; brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.note_unlock = brw_note_unlock; - brw->intel.vtbl.lost_hardware = brw_lost_hardware; + brw->intel.vtbl.new_batch = brw_new_batch; + brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; brw->intel.vtbl.emit_flush = brw_emit_flush; + brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1497dc79687..361312c2ca1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -29,12 +29,11 @@ * Keith Whitwell <[email protected]> */ - +#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" -#include "brw_hal.h" GLuint brw_wm_nr_args( GLuint opcode ) @@ -66,7 +65,11 @@ GLuint brw_wm_nr_args( GLuint opcode ) case OPCODE_POW: case OPCODE_SUB: case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SLE: case OPCODE_SLT: + case OPCODE_SEQ: + case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: @@ -116,20 +119,6 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } -static void brw_wm_pass_hal (struct brw_wm_compile *c) -{ - static void (*hal_wm_pass) (struct brw_wm_compile *c); - static GLboolean hal_tried; - - if (!hal_tried) - { - hal_wm_pass = brw_hal_find_symbol ("intel_hal_wm_pass"); - hal_tried = 1; - } - if (hal_wm_pass) - (*hal_wm_pass) (c); -} - static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -150,60 +139,53 @@ static void do_wm_prog( struct brw_context *brw, c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Hal optimization - */ - brw_wm_pass_hal (c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - /* This is where we start emitting gen4 code: - */ - brw_init_compile(&c->func); - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; + brw_init_compile(brw, &c->func); + if (brw_wm_is_glsl(&c->fp->program)) { + brw_wm_glsl_emit(brw, c); } else { - c->prog_data.total_scratch = 0; + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + */ + c->grf_limit = BRW_WM_MAX_GRF/2; + + brw_wm_pass2(c); + + c->prog_data.total_grf = c->max_wm_grf; + if (c->last_scratch) { + c->prog_data.total_scratch = + c->last_scratch + 0x40; + } else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); - /* get the program */ program = brw_get_program(&c->func, &program_size); - /* - */ - brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], - &c->key, - sizeof(c->key), - program, - program_size, - &c->prog_data, - &brw->wm.prog_data ); + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + &brw->wm.prog_data ); } @@ -243,7 +225,8 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->TestTwoSide && brw->attribs.Stencil->WriteMask[1])) + (brw->attribs.Stencil->_TestTwoSide && + brw->attribs.Stencil->WriteMask[1])) lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; } @@ -285,7 +268,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1]; + key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); /* _NEW_LIGHT */ key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT); @@ -296,17 +279,41 @@ static void brw_wm_populate_key( struct brw_context *brw, const struct gl_texture_object *t = unit->_Current; if (unit->_ReallyEnabled) { - - if (t->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB && - t->Image[0][t->BaseLevel]->_BaseFormat == GL_DEPTH_COMPONENT) { - key->shadowtex_mask |= 1<<i; - } - - if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1<<i; + if (t->Image[0][t->BaseLevel]->TexFormat->MesaFormat == + MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1<< i; + } } } - + + /* Shadow */ + key->shadowtex_mask = fp->program.Base.ShadowSamplers; + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * This pretty much becomes moot with DRI2 and redirected buffers anyway, + * as our origins will always be zero then. + */ + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } /* Extra info: */ @@ -315,7 +322,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } -static void brw_upload_wm_prog( struct brw_context *brw ) +static void brw_prepare_wm_prog(struct brw_context *brw) { struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) @@ -325,13 +332,13 @@ static void brw_upload_wm_prog( struct brw_context *brw ) /* Make an early check for the key. */ - if (brw_search_cache(&brw->cache[BRW_WM_PROG], - &key, sizeof(key), - &brw->wm.prog_data, - &brw->wm.prog_gs_offset)) - return; - - do_wm_prog(brw, fp, &key); + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + NULL, 0, + &brw->wm.prog_data); + if (brw->wm.prog_bo == NULL) + do_wm_prog(brw, fp, &key); } @@ -345,12 +352,13 @@ const struct brw_tracked_state brw_wm_prog = { _NEW_POLYGON | _NEW_LINE | _NEW_LIGHT | + _NEW_BUFFERS | _NEW_TEXTURE), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), .cache = 0 }, - .update = brw_upload_wm_prog + .prepare = brw_prepare_wm_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f5fddfdb68a..896390c17b8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -34,9 +34,9 @@ #define BRW_WM_H +#include "shader/prog_instruction.h" #include "brw_context.h" #include "brw_eu.h" -#include "prog_instruction.h" /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM @@ -69,9 +69,12 @@ struct brw_wm_prog_key { GLuint runtime_check_aads_emit:1; GLuint yuvtex_mask:8; - GLuint pad1:24; + GLuint yuvtex_swap_mask:8; /* UV swaped */ + GLuint pad1:16; GLuint program_string_id:32; + GLuint origin_x, origin_y; + GLuint drawable_height; }; @@ -140,6 +143,8 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint eot:1; /* End of thread indicator for FB_WRITE*/ + GLuint target:10; /* target binding table index for FB_WRITE*/ }; @@ -152,6 +157,7 @@ struct brw_wm_instruction { #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS +#define BRW_WM_MAX_SUBROUTINE 16 @@ -194,6 +200,8 @@ struct brw_wm_compile { GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; @@ -231,6 +239,18 @@ struct brw_wm_compile { GLuint grf_limit; GLuint max_wm_grf; GLuint last_scratch; + + struct { + GLboolean inited; + struct brw_reg reg; + } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; + struct brw_reg emit_mask_reg; + GLuint reg_index; + GLuint tmp_regs[BRW_WM_MAX_GRF]; + GLuint tmp_index; + GLuint tmp_max; + GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; }; @@ -259,4 +279,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ); +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index fd605159727..58c78c4b2c5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -30,7 +30,7 @@ */ -#include "macros.h" +#include "main/macros.h" #include "brw_context.h" #include "brw_wm.h" @@ -39,7 +39,7 @@ /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ -static __inline struct brw_reg sechalf( struct brw_reg reg ) +static INLINE struct brw_reg sechalf( struct brw_reg reg ) { if (reg.vstride) reg.nr++; @@ -122,26 +122,30 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. + struct brw_compile *p = &c->func; + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. */ if (mask & WRITEMASK_X) { - brw_MOV(p, + /* X' = X - origin */ + brw_ADD(p, dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW)); + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); } if (mask & WRITEMASK_Y) { - /* TODO -- window_height - Y */ - brw_MOV(p, + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_UW))); - + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); } } @@ -219,6 +223,10 @@ static void emit_pinterp( struct brw_compile *p, if (mask & (1<<i)) { brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } + } + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { brw_MUL(p, dst[i], dst[i], w[3]); } } @@ -229,20 +237,20 @@ static void emit_cinterp( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ - } - } + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ + } + } } @@ -343,11 +351,10 @@ static void emit_lrp( struct brw_compile *p, } } } - - -static void emit_slt( struct brw_compile *p, +static void emit_sop( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, + GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { @@ -356,34 +363,66 @@ static void emit_slt( struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_MOV(p, dst[i], brw_imm_f(0)); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); brw_MOV(p, dst[i], brw_imm_f(1.0)); brw_set_predicate_control_flag_value(p, 0xff); } } } -/* Isn't this just the same as the above with the args swapped? - */ -static void emit_sge( struct brw_compile *p, +static void emit_slt( struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - GLuint i; + emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); +} - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); - brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control_flag_value(p, 0xff); - } - } +static void emit_sle( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); } +static void emit_sgt( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); +} +static void emit_sge( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); +} + +static void emit_seq( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); +} + +static void emit_sne( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1 ) +{ + emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); +} static void emit_cmp( struct brw_compile *p, const struct brw_reg *dst, @@ -465,6 +504,9 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -482,6 +524,9 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -500,6 +545,9 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); @@ -543,8 +591,11 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - function == BRW_MATH_FUNCTION_SINCOS); + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + + //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || + // function == BRW_MATH_FUNCTION_SINCOS); brw_MOV(p, brw_message_reg(2), arg0[0]); @@ -567,6 +618,9 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + if (!(mask & WRITEMASK_XYZW)) + return; /* Do not emit dead code*/ + assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); brw_push_insn_state(p); @@ -661,7 +715,7 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + 1, /* surface */ + inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, (shadow ? @@ -670,7 +724,6 @@ static void emit_tex( struct brw_wm_compile *c, responseLength, msgLength, 0); - } @@ -712,7 +765,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + 1, /* surface */ + inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, @@ -777,6 +830,7 @@ static void emit_kil( struct brw_wm_compile *c, brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_AND(p, r0uw, brw_flag_reg(), r0uw); brw_pop_insn_state(p); } @@ -784,7 +838,9 @@ static void emit_kil( struct brw_wm_compile *c, static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, - GLuint nr ) + GLuint nr, + GLuint target, + GLuint eot ) { struct brw_compile *p = &c->func; @@ -807,10 +863,10 @@ static void fire_fb_write( struct brw_wm_compile *c, retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - 0, /* render surface always 0 */ + target, nr, 0, - 1); + eot); } static void emit_aa( struct brw_wm_compile *c, @@ -835,7 +891,9 @@ static void emit_aa( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, struct brw_reg *arg1, - struct brw_reg *arg2) + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; GLuint nr = 2; @@ -890,15 +948,16 @@ static void emit_fb_write( struct brw_wm_compile *c, GLuint off = c->key.dest_depth_reg % 2; if (off != 0) { - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), arg1[comp]); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1)); - brw_pop_insn_state(p); + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); } else { - brw_MOV(p, brw_message_reg(nr), arg1[comp]); + brw_MOV(p, brw_message_reg(nr), arg1[comp]); } nr += 2; } @@ -908,7 +967,7 @@ static void emit_fb_write( struct brw_wm_compile *c, if (c->key.aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -925,14 +984,14 @@ static void emit_fb_write( struct brw_wm_compile *c, jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr); + fire_fb_write(c, 0, nr, target, eot); /* note - thread killed in subroutine */ } brw_land_fwd_jump(p, jmp); /* ELSE: Shuffle up one register to fill in the hole left for AA: */ - fire_fb_write(c, 1, nr-1); + fire_fb_write(c, 1, nr-1, target, eot); } } @@ -1080,7 +1139,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_WPOSXY: - emit_wpos_xy(p, dst, dst_flags, args[0]); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_PIXELW: @@ -1100,7 +1159,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2]); + emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; /* Straightforward arithmetic: @@ -1208,9 +1267,21 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_slt(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SLE: + emit_sle(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SGT: + emit_sgt(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_SEQ: + emit_seq(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SNE: + emit_sne(p, dst, dst_flags, args[0], args[1]); + break; case OPCODE_LIT: emit_lit(p, dst, dst_flags, args[0]); @@ -1231,7 +1302,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - assert(0); + _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, inst->opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->opcode) : + "unknown"); } for (i = 0; i < 4; i++) diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index ff97d87dc45..d3e926c328c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -30,9 +30,9 @@ */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" #include "brw_context.h" #include "brw_wm.h" #include "brw_util.h" @@ -144,7 +144,7 @@ static struct prog_dst_register dst_undef( void ) static struct prog_dst_register get_temp( struct brw_wm_compile *c ) { - int bit = ffs( ~c->fp_temp ); + int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { _mesa_printf("%s: out of temporaries\n", __FILE__); @@ -158,7 +158,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) { - c->fp_temp &= ~1<<(temp.Index + 1 - FIRST_INTERNAL_TEMP); + c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); } @@ -176,6 +176,7 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); *inst = *inst0; + inst->Data = (void *)inst0; return inst; } @@ -201,7 +202,6 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; - return inst; } @@ -361,6 +361,37 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<<idx; } +static void emit_ddx( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint idx = inst->SrcReg[0].Index; + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + + c->fp_deriv_emitted |= 1<<idx; + emit_op(c, + OPCODE_DDX, + inst->DstReg, + 0, 0, 0, + interp, + get_pixel_w(c), + src_undef()); +} + +static void emit_ddy( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint idx = inst->SrcReg[0].Index; + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + + c->fp_deriv_emitted |= 1<<idx; + emit_op(c, + OPCODE_DDY, + inst->DstReg, + 0, 0, 0, + interp, + get_pixel_w(c), + src_undef()); +} /*********************************************************************** * Hacks to extend the program parameter and constant lists. @@ -433,7 +464,7 @@ static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, } idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); - /* XXX what about swizzle? */ + assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ return src_reg(PROGRAM_STATE_VAR, idx); } @@ -463,17 +494,20 @@ static void precalc_dst( struct brw_wm_compile *c, if (dst.WriteMask & WRITEMASK_XZ) { + struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); /* dst.xz = swz src0.1zzz */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XZ), + inst->SaturateMode, 0, 0, + src_swizzle(src0, SWIZZLE_ONE, z, z, z), + src_undef(), + src_undef()); + /* Avoid letting negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].NegateBase &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -496,15 +530,19 @@ static void precalc_lit( struct brw_wm_compile *c, struct prog_dst_register dst = inst->DstReg; if (dst.WriteMask & WRITEMASK_XW) { + struct prog_instruction *swz; + /* dst.xw = swz src0.1111 */ - emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XW), + 0, 0, 0, + src_swizzle1(src0, SWIZZLE_ONE), + src_undef(), + src_undef()); + /* Avoid letting the negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].NegateBase = 0; } @@ -524,13 +562,64 @@ static void precalc_tex( struct brw_wm_compile *c, { struct prog_src_register coord; struct prog_dst_register tmpcoord; - - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { + struct prog_instruction *out; + struct prog_dst_register tmp0 = get_temp(c); + struct prog_src_register tmp0src = src_reg_from_dst(tmp0); + struct prog_dst_register tmp1 = get_temp(c); + struct prog_src_register tmp1src = src_reg_from_dst(tmp1); + struct prog_src_register src0 = inst->SrcReg[0]; + + tmpcoord = get_temp(c); + coord = src_reg_from_dst(tmpcoord); + + out = emit_op(c, OPCODE_MOV, + tmpcoord, + 0, 0, 0, + src0, + src_undef(), + src_undef()); + out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Abs = 1; + + emit_op(c, OPCODE_MAX, + tmp0, + 0, 0, 0, + src_swizzle1(coord, X), + src_swizzle1(coord, Y), + src_undef()); + + emit_op(c, OPCODE_MAX, + tmp1, + 0, 0, 0, + tmp0src, + src_swizzle1(coord, Z), + src_undef()); + + emit_op(c, OPCODE_RCP, + tmp0, + 0, 0, 0, + tmp1src, + src_undef(), + src_undef()); + + emit_op(c, OPCODE_MUL, + tmpcoord, + 0, 0, 0, + src0, + tmp0src, + src_undef()); + + release_temp(c, tmp0); + release_temp(c, tmp1); + } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, STATE_TEXRECT_SCALE, - inst->TexSrcUnit, + unit, 0,0 ); tmpcoord = get_temp(c); @@ -556,29 +645,33 @@ static void precalc_tex( struct brw_wm_compile *c, * conversion requires allocating a temporary variable which we * don't have the facility to do that late in the compilation. */ - if (!(c->key.yuvtex_mask & (1<<inst->TexSrcUnit))) { + if (!(c->key.yuvtex_mask & (1<<unit))) { emit_op(c, OPCODE_TEX, inst->DstReg, inst->SaturateMode, - inst->TexSrcUnit, + unit, inst->TexSrcTarget, coord, src_undef(), src_undef()); } else { + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); + /* CONST C0 = { -.5, -.0625, -.5, 1.164 } CONST C1 = { 1.596, -0.813, 2.018, -.391 } UYV = TEX ... UYV.xyz = ADD UYV, C0 UYV.y = MUL UYV.y, C0.w - RGB.xyz = MAD UYV.xxz, C1, UYV.y + if (UV swaped) + RGB.xyz = MAD UYV.zzx, C1, UYV.y + else + RGB.xyz = MAD UYV.xxz, C1, UYV.y RGB.y = MAD UYV.z, C1.w, RGB.y */ struct prog_dst_register dst = inst->DstReg; - struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register tmp = get_temp(c); struct prog_src_register tmpsrc = src_reg_from_dst(tmp); struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); @@ -590,9 +683,9 @@ static void precalc_tex( struct brw_wm_compile *c, OPCODE_TEX, tmp, inst->SaturateMode, - inst->TexSrcUnit, + unit, inst->TexSrcTarget, - src0, + coord, src_undef(), src_undef()); @@ -608,6 +701,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* YUV.y = MUL YUV.y, C0.w */ + emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), @@ -616,13 +710,18 @@ static void precalc_tex( struct brw_wm_compile *c, src_swizzle1(C0, W), src_undef()); - /* RGB.xyz = MAD YUV.xxz, C1, YUV.y + /* + * if (UV swaped) + * RGB.xyz = MAD YUV.zzx, C1, YUV.y + * else + * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ + emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, 0, 0, - src_swizzle(tmpsrc, X,X,Z,Z), + swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); @@ -639,7 +738,8 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp); } - if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || + (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); } @@ -660,7 +760,7 @@ static GLboolean projtex( struct brw_wm_compile *c, return 0; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<src.Index)) == 0) + (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) return 0; else return 1; @@ -770,14 +870,34 @@ static void emit_fb_write( struct brw_wm_compile *c ) struct prog_src_register outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + GLuint i; - emit_op(c, - WM_FB_WRITE, - dst_mask(dst_undef(),0), - 0, 0, 0, - outcolor, - payload_r0_depth, - outdepth); + struct prog_instruction *inst, *last_inst; + struct brw_context *brw = c->func.brw; + + /* inst->Sampler is not used by backend, + use it for fb write target and eot */ + + if (brw->state.nr_draw_regions > 1) { + for (i = 0 ; i < brw->state.nr_draw_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0, + outcolor, payload_r0_depth, outdepth); + inst->Sampler = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, 0, 0, outcolor, payload_r0_depth, outdepth); + inst->Sampler = (i<<1); + } + } + last_inst->Sampler |= 1; //eot + }else { + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, 0, 0, outcolor, payload_r0_depth, outdepth); + inst->Sampler = 1|(0<<1); + } } @@ -803,7 +923,15 @@ static void validate_src_regs( struct brw_wm_compile *c, } } - +static void validate_dst_regs( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + if (inst->DstReg.File == PROGRAM_OUTPUT) { + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLR) + c->fp_fragcolor_emitted = 1; + } +} static void print_insns( const struct prog_instruction *insn, GLuint nr ) @@ -848,12 +976,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; + validate_src_regs(c, inst); + validate_dst_regs(c, inst); + } + for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { + const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; /* Check for INPUT values, emit INTERP instructions where * necessary: */ - validate_src_regs(c, inst); switch (inst->Opcode) { @@ -889,11 +1021,20 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_LIT: precalc_lit(c, inst); break; - + + case OPCODE_TEX: + precalc_tex(c, inst); + break; + case OPCODE_TXP: precalc_txp(c, inst); break; + case OPCODE_TXB: + out = emit_insn(c, inst); + out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + break; + case OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. @@ -907,8 +1048,16 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - + case OPCODE_DDX: + emit_ddx(c, inst); + break; + case OPCODE_DDY: + emit_ddy(c, inst); + break; case OPCODE_END: + emit_fog(c); + emit_fb_write(c); + break; case OPCODE_PRINT: break; @@ -917,15 +1066,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) break; } } - - emit_fog(c); - emit_fb_write(c); - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("\n\n\npass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("\n\n\npass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c new file mode 100644 index 00000000000..0ea8c3d50e5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -0,0 +1,1777 @@ +#include "main/macros.h" +#include "shader/prog_parameter.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" + +enum _subroutine { + SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 +}; + +/* Only guess, need a flag in gl_fragment_program later */ +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) +{ + int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { + struct prog_instruction *inst = &fp->Base.Instructions[i]; + switch (inst->Opcode) { + case OPCODE_IF: + case OPCODE_INT: + case OPCODE_ENDIF: + case OPCODE_CAL: + case OPCODE_BRK: + case OPCODE_RET: + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + case OPCODE_BGNLOOP: + return GL_TRUE; + default: + break; + } + } + return GL_FALSE; +} + +static void set_reg(struct brw_wm_compile *c, int file, int index, + int component, struct brw_reg reg) +{ + c->wm_regs[file][index][component].reg = reg; + c->wm_regs[file][index][component].inited = GL_TRUE; +} + +static int get_scalar_dst_index(struct prog_instruction *inst) +{ + int i; + for (i = 0; i < 4; i++) + if (inst->DstReg.WriteMask & (1<<i)) + break; + return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + struct brw_reg reg; + if(c->tmp_index == c->tmp_max) + c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; + + reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + return reg; +} + +static int mark_tmps(struct brw_wm_compile *c) +{ + return c->tmp_index; +} + +static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index ) +{ + return brw_vec8_grf( c->tmp_regs[ index ], 0 ); +} + +static void release_tmps(struct brw_wm_compile *c, int mark) +{ + c->tmp_index = mark; +} + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +{ + struct brw_reg reg; + switch (file) { + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + file = PROGRAM_STATE_VAR; + break; + case PROGRAM_UNDEFINED: + return brw_null_reg(); + default: + break; + } + + if(c->wm_regs[file][index][component].inited) + reg = c->wm_regs[file][index][component].reg; + else + reg = brw_vec8_grf(c->reg_index, 0); + + if(!c->wm_regs[file][index][component].inited) { + set_reg(c, file, index, component, reg); + c->reg_index++; + } + + if (neg & (1<< component)) { + reg = negate(reg); + } + if (abs) + reg = brw_abs(reg); + return reg; +} + +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + struct brw_reg reg; + int nr_interp_regs = 0; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + + for (i = 0; i < 4; i++) { + reg = (i < c->key.nr_depth_regs) + ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); + } + c->reg_index += 2*c->key.nr_depth_regs; + { + int nr_params = c->fp->program.Base.Parameters->NumParameters; + struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + c->prog_data.nr_params = 4*nr_params; + for (i = 0; i < nr_params; i++) { + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index/8, + index%8); + c->prog_data.param[index] = + &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + c->nr_creg = 2*((4*nr_params+15)/16); + c->reg_index += c->nr_creg; + } + for (i = 0; i < FRAG_ATTRIB_MAX; i++) { + if (inputs & (1<<i)) { + nr_interp_regs++; + reg = brw_vec8_grf(c->reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, i, j, reg); + c->reg_index += 2; + + } + } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.curb_read_length = c->nr_creg; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; +} + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct prog_instruction *inst, int component, int nr) +{ + return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, + 0, 0); +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct prog_src_register *src, int index, int nr) +{ + int component = GET_SWZ(src->Swizzle, index); + return get_reg(c, src->File, src->Index, component, nr, + src->NegateBase, src->Abs); +} + +/* Subroutines are minimal support for resusable instruction sequences. + They are implemented as simply as possible to minimise overhead: there + is no explicit support for communication between the caller and callee + other than saving the return address in a temporary register, nor is + there any automatic local storage. This implies that great care is + required before attempting reentrancy or any kind of nested + subroutine invocations. */ +static void invoke_subroutine( struct brw_wm_compile *c, + enum _subroutine subroutine, + void (*emit)( struct brw_wm_compile * ) ) +{ + struct brw_compile *p = &c->func; + + assert( subroutine < BRW_WM_MAX_SUBROUTINE ); + + if( c->subroutines[ subroutine ] ) { + /* subroutine previously emitted: reuse existing instructions */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + int here = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) ); + + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( c->subroutines[ subroutine ] - + here - 1 ) << 4 ) ); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } else { + /* previously unused subroutine: emit, and mark for later reuse */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + struct brw_instruction *calc; + int base = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) ); + brw_pop_insn_state(p); + + c->subroutines[ subroutine ] = p->nr_insn; + + emit( c ); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV( p, brw_ip_reg(), return_address ); + brw_pop_insn_state(p); + + brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) ); + + release_tmps( c, mark ); + } +} + +static void emit_abs( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1); + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_MOV(p, dst, brw_abs(src)); + } + } + brw_set_saturate(p, 0); +} + +static void emit_int( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1) ; + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_RNDD(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mov( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i, 1); + src = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_MOV(p, dst, src); + } + } + brw_set_saturate(p, 0); +} + +static void emit_pixel_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + struct brw_reg dst0, dst1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0, 1); + dst1 = get_dst_reg(c, inst, 1, 1); + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } + +} + +static void emit_delta_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg dst0, dst1, src0, src1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0, 1); + dst1 = get_dst_reg(c, inst, 1, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst0, + retype(src0, BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst1, + retype(src1, BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } + +} + + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr, + GLuint target, + GLuint eot) +{ + struct brw_compile *p = &c->func; + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + target, + nr, + 0, + eot); +} + +static void emit_fb_write(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + GLuint target, eot; + struct brw_reg src0; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + if (c->key.source_depth_to_render_target) + { + if (c->key.computes_depth) { + src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } else { + src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + target = inst->Sampler >> 1; + eot = inst->Sampler & 1; + fire_fb_write(c, 0, nr, target, eot); +} + +static void emit_pixel_w( struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + if (mask & WRITEMASK_W) { + struct brw_reg dst, src0, delta0, delta1; + struct brw_reg interp3; + + dst = get_dst_reg(c, inst, 3, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + + interp3 = brw_vec1_grf(src0.nr+1, 4); + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, delta0); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); + + /* Calc w */ + brw_math_16( p, dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + +static void emit_linterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), delta1); + } + } +} + +static void emit_cinterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, src0; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_pinterp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0, w; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + w = get_src_reg(c, &inst->SrcReg[2], 3, 1); + GLuint nr = src0.nr; + int i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_LINE(p, brw_null_reg(), interp[i], delta0); + brw_MAC(p, dst, suboffset(interp[i],1), + delta1); + brw_MUL(p, dst, dst, w); + } + } +} + +static void emit_xpd(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + for (i = 0; i < 4; i++) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i, 1); + src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); + src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct prog_instruction *inst, GLuint func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_rcp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); +} + +static void emit_rsq(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); +} + +static void emit_sin(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); +} + +static void emit_cos(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); +} + +static void emit_ex2(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); +} + +static void emit_lg2(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); +} + +static void emit_add(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_ADD(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_sub(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_ADD(p, dst, src0, negate(src1)); + } + } + brw_set_saturate(p, 0); +} + +static void emit_mul(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_MUL(p, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + +static void emit_frc(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_FRC(p, dst, src0); + } + } + if (inst->SaturateMode != SATURATE_OFF) + brw_set_saturate(p, 0); +} + +static void emit_flr(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + brw_RNDD(p, dst, src0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_max(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + int mark = mark_tmps(c); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c, mark); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct prog_instruction *inst, GLuint cond) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + brw_pop_insn_state(p); + } + } +} + +static void emit_slt(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_L); +} + +static void emit_sle(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_LE); +} + +static void emit_sgt(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_G); +} + +static void emit_sge(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_GE); +} + +static void emit_seq(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_EQ); +} + +static void emit_sne(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); +} + +static void emit_ddx(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + GLuint nr, i; + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + GLuint nr, i; + + src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + nr = src0.nr; + w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static __inline struct brw_reg high_words( struct brw_reg reg ) +{ + return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), + 0, 8, 2 ); +} + +static __inline struct brw_reg low_words( struct brw_reg reg ) +{ + return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); +} + +/* One- and two-dimensional Perlin noise, similar to the description in + _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ +static void noise1_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param, + x0, x1, /* gradients at each end */ + t, tmp[ 2 ], /* float temporaries */ + itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0 = alloc_tmp( c ); + x1 = alloc_tmp( c ); + t = alloc_tmp( c ); + tmp[ 0 ] = alloc_tmp( c ); + tmp[ 1 ] = alloc_tmp( c ); + itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); + itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); + itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); + itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); + itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); + + param = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + + /* Arrange the two end coordinates into scalars (itmp0/itmp1) to + be hashed. Also compute the remainder (offset within the unit + length), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param ); + brw_FRC( p, param, param ); + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); + brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + + /* We're now ready to perform the hashing. The two hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the two gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 31 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); + brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); + + brw_MUL( p, x0, x0, param ); + brw_MUL( p, x1, x1, t ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_MUL( p, param, tmp[ 0 ], param ); + brw_MUL( p, x1, x1, param ); + brw_ADD( p, x0, x0, x1 ); + /* scale by pow( 2, -30 ), to compensate for the format conversion + above and an extra factor of 2 so that a single gradient covers + the [-1,1] range */ + brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise1( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src, param, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src = get_src_reg( c, inst->SrcReg, 0, 1 ); + + param = alloc_tmp( c ); + + brw_MOV( p, param, src ); + + invoke_subroutine( c, SUB_NOISE1, noise1_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +static void noise2_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, + x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ + t, tmp[ 4 ], /* float temporaries */ + itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 4; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + } + itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); + itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); + itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); + + param0 = lookup_tmp( c, mark - 3 ); + param1 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + square), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, itmp[ 0 ], param0 ); + brw_RNDD( p, itmp[ 1 ], param1 ); + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), + low_words( itmp[ 1 ] ) ); + brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); + brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); + brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); + + /* We're now ready to perform the hashing. The four hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the four gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); + + brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); + brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); + brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the + pipeline */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_MUL( p, param0, tmp[ 0 ], param0 ); + brw_MUL( p, param1, tmp[ 1 ], param1 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, param1 ); + brw_MUL( p, x1y1, x1y1, param1 ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. There are horrible register dependencies here, + but we have nothing else to do. */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, param0 ); + brw_ADD( p, x0y0, x0y0, x1y0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise2( struct brw_wm_compile *c, + struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, param0, param1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); + src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + + invoke_subroutine( c, SUB_NOISE2, noise2_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV( p, dst, param0 ); + } + } + if( inst->SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +static void emit_wpos_xy(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0[2], dst[2]; + + dst[0] = get_dst_reg(c, inst, 0, 1); + dst[1] = get_dst_reg(c, inst, 1, 1); + + src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. + */ + if (mask & WRITEMASK_X) { + /* X' = X - origin_x */ + brw_ADD(p, + dst[0], + retype(src0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); + } + + if (mask & WRITEMASK_Y) { + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, + dst[1], + negate(retype(src0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + } +} + +/* TODO + BIAS on SIMD8 not workind yet... + */ +static void emit_txb(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i, 1); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +} + +static void emit_tex(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + GLuint msg_len; + GLuint i, nr; + GLuint emit; + GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; + + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i, 1); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +} + +static void post_wm_emit( struct brw_wm_compile *c ) +{ + GLuint nr_insns = c->fp->program.Base.NumInstructions; + GLuint insn, target_insn; + struct prog_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } +} + +static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) +{ +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; + struct brw_instruction *inst0, *inst1; + int i, if_insn = 0, loop_insn = 0; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + + c->reg_index = 0; + prealloc_reg(c); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + for (i = 0; i < c->nr_fp_insns; i++) { + struct prog_instruction *inst = &c->prog_instructions[i]; + struct prog_instruction *orig_inst; + + if ((orig_inst = inst->Data) != 0) + orig_inst->Data = current_insn(p); + + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + + switch (inst->Opcode) { + case WM_PIXELXY: + emit_pixel_xy(c, inst); + break; + case WM_DELTAXY: + emit_delta_xy(c, inst); + break; + case WM_PIXELW: + emit_pixel_w(c, inst); + break; + case WM_LINTERP: + emit_linterp(c, inst); + break; + case WM_PINTERP: + emit_pinterp(c, inst); + break; + case WM_CINTERP: + emit_cinterp(c, inst); + break; + case WM_WPOSXY: + emit_wpos_xy(c, inst); + break; + case WM_FB_WRITE: + emit_fb_write(c, inst); + break; + case OPCODE_ABS: + emit_abs(c, inst); + break; + case OPCODE_ADD: + emit_add(c, inst); + break; + case OPCODE_SUB: + emit_sub(c, inst); + break; + case OPCODE_FRC: + emit_frc(c, inst); + break; + case OPCODE_FLR: + emit_flr(c, inst); + break; + case OPCODE_LRP: + emit_lrp(c, inst); + break; + case OPCODE_INT: + emit_int(c, inst); + break; + case OPCODE_MOV: + emit_mov(c, inst); + break; + case OPCODE_DP3: + emit_dp3(c, inst); + break; + case OPCODE_DP4: + emit_dp4(c, inst); + break; + case OPCODE_XPD: + emit_xpd(c, inst); + break; + case OPCODE_DPH: + emit_dph(c, inst); + break; + case OPCODE_RCP: + emit_rcp(c, inst); + break; + case OPCODE_RSQ: + emit_rsq(c, inst); + break; + case OPCODE_SIN: + emit_sin(c, inst); + break; + case OPCODE_COS: + emit_cos(c, inst); + break; + case OPCODE_EX2: + emit_ex2(c, inst); + break; + case OPCODE_LG2: + emit_lg2(c, inst); + break; + case OPCODE_MAX: + emit_max(c, inst); + break; + case OPCODE_MIN: + emit_min(c, inst); + break; + case OPCODE_DDX: + emit_ddx(c, inst); + break; + case OPCODE_DDY: + emit_ddy(c, inst); + break; + case OPCODE_SLT: + emit_slt(c, inst); + break; + case OPCODE_SLE: + emit_sle(c, inst); + break; + case OPCODE_SGT: + emit_sgt(c, inst); + break; + case OPCODE_SGE: + emit_sge(c, inst); + break; + case OPCODE_SEQ: + emit_seq(c, inst); + break; + case OPCODE_SNE: + emit_sne(c, inst); + break; + case OPCODE_MUL: + emit_mul(c, inst); + break; + case OPCODE_POW: + emit_pow(c, inst); + break; + case OPCODE_MAD: + emit_mad(c, inst); + break; + case OPCODE_NOISE1: + emit_noise1(c, inst); + break; + case OPCODE_NOISE2: + emit_noise2(c, inst); + break; + /* case OPCODE_NOISE3: */ + /* case OPCODE_NOISE4: */ + /* not yet implemented */ + case OPCODE_TEX: + emit_tex(c, inst); + break; + case OPCODE_TXB: + emit_txb(c, inst); + break; + case OPCODE_KIL_NV: + emit_kil(c); + break; + case OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case OPCODE_BGNSUB: + case OPCODE_ENDSUB: + break; + case OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + orig_inst = inst->Data; + orig_inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case OPCODE_BGNLOOP: + loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + loop_insn--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (inst0 > loop_inst[loop_insn]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + break; + default: + _mesa_printf("unsupported IR in fragment shader %d\n", + inst->Opcode); + } + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + post_wm_emit(c); + for (i = 0; i < c->fp->program.Base.NumInstructions; i++) + c->fp->program.Base.Instructions[i].Data = NULL; +} + +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + brw_wm_pass_fp(c); + brw_wm_emit_glsl(brw, c); + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index ec2b976faa7..0bb5d5ba833 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -30,7 +30,7 @@ */ -#include "mtypes.h" +#include "main/mtypes.h" #include "brw_wm.h" diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 00f6f6b9a4f..205a7160d39 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -168,6 +168,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_PAYLOAD: case PROGRAM_TEMPORARY: case PROGRAM_OUTPUT: + case PROGRAM_VARYING: break; case PROGRAM_LOCAL_PARAM: @@ -179,6 +180,8 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: case PROGRAM_NAMED_PARAM: { struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; @@ -197,6 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: /* These may change from run to run: */ ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); @@ -344,6 +348,8 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; + out->eot = inst->Sampler & 1; + out->target = inst->Sampler>>1; /* Args: */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index d668def7007..f6f3a38e9e0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -150,12 +150,17 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FLR: case OPCODE_FRC: case OPCODE_MOV: + case OPCODE_SWZ: read0 = writemask; break; case OPCODE_SUB: case OPCODE_SLT: + case OPCODE_SLE: case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SEQ: + case OPCODE_SNE: case OPCODE_ADD: case OPCODE_MAX: case OPCODE_MIN: @@ -253,11 +258,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read0 = WRITEMASK_XYW; break; - case OPCODE_SWZ: case OPCODE_DST: case OPCODE_TXP: default: - assert(0); break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index a1edbd6168d..6fca9ad220a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,7 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; + struct brw_context *brw = c->func.brw; + GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -85,8 +86,15 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < FRAG_ATTRIB_MAX; j++) if (inputs & (1<<j)) { + /* index for vs output and ps input are not the same + in shader varying */ + GLuint index; + if (j > FRAG_ATTRIB_VAR0) + index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else + index = j; nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[j], i++); + prealloc_reg(c, &c->payload.input_interp[index], i++); } assert(nr_interp_regs >= 1); @@ -328,7 +336,7 @@ void brw_wm_pass2( struct brw_wm_compile *c ) c->state = PASS2_DONE; if (INTEL_DEBUG & DEBUG_WM) { - brw_wm_print_program(c, "pass2/done"); + brw_wm_print_program(c, "pass2/done"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 93d4cfc3a5f..f12ef47a7d7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -34,7 +34,7 @@ #include "brw_state.h" #include "brw_defines.h" -#include "macros.h" +#include "main/macros.h" @@ -54,7 +54,7 @@ static GLuint translate_wrap_mode( GLenum wrap ) case GL_REPEAT: return BRW_TEXCOORDMODE_WRAP; case GL_CLAMP: - return BRW_TEXCOORDMODE_CLAMP_BORDER; /* conform likes it this way */ + return BRW_TEXCOORDMODE_CLAMP; case GL_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ case GL_CLAMP_TO_BORDER: @@ -79,27 +79,43 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits) } -static GLuint upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static dri_bo *upload_default_color( struct brw_context *brw, + const GLfloat *color ) { struct brw_sampler_default_color sdc; COPY_4V(sdc.color, color); - return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); + return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, + NULL, 0 ); } -/* +struct wm_sampler_key { + int sampler_count; + + struct wm_sampler_entry { + GLenum wrap_r, wrap_s, wrap_t; + float maxlod, minlod; + float lod_bias; + float max_aniso; + GLenum minfilter, magfilter; + GLenum comparemode, comparefunc; + dri_bo *sdc_bo; + } sampler[BRW_MAX_TEX_UNIT]; +}; + +/** + * Sets the sampler state for a single unit based off of the sampler key + * entry. */ -static void brw_update_sampler_state( struct gl_texture_unit *texUnit, - struct gl_texture_object *texObj, - GLuint sdc_gs_offset, - struct brw_sampler_state *sampler) -{ +static void brw_update_sampler_state(struct wm_sampler_entry *key, + dri_bo *sdc_bo, + struct brw_sampler_state *sampler) +{ _mesa_memset(sampler, 0, sizeof(*sampler)); - switch (texObj->MinFilter) { + switch (key->minfilter) { case GL_NEAREST: sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; @@ -130,17 +146,17 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, /* Set Anisotropy: */ - if ( texObj->MaxAnisotropy > 1.0 ) { + if (key->max_aniso > 1.0) { sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (texObj->MaxAnisotropy > 2.0) { - sampler->ss3.max_aniso = MAX2((texObj->MaxAnisotropy - 2) / 2, + if (key->max_aniso > 2.0) { + sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2, BRW_ANISORATIO_16); } } else { - switch (texObj->MagFilter) { + switch (key->magfilter) { case GL_NEAREST: sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; break; @@ -152,9 +168,9 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(texObj->WrapR); - sampler->ss1.s_wrap_mode = translate_wrap_mode(texObj->WrapS); - sampler->ss1.t_wrap_mode = translate_wrap_mode(texObj->WrapT); + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); /* Fulsim complains if I don't do this. Hardware doesn't mind: */ @@ -168,17 +184,18 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, /* Set shadow function: */ - if (texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { /* Shadowing is "enabled" by emitting a particular sampler * message (sample_c). So need to recompile WM program when * shadow comparison is enabled on each/any texture unit. */ - sampler->ss0.shadow_function = intel_translate_compare_func(texObj->CompareFunc); + sampler->ss0.shadow_function = + intel_translate_shadow_compare_func(key->comparefunc); } /* Set LOD bias: */ - sampler->ss0.lod_bias = S_FIXED(texUnit->LodBias + texObj->LodBias, 6); + sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ @@ -192,13 +209,64 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MAX2(texObj->MaxLod, 0), 6); - sampler->ss1.min_lod = U_FIXED(MAX2(texObj->MinLod, 0), 6); + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); - sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; + sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } +/** Sets up the cache key for sampler state for all texture units */ +static void +brw_wm_sampler_populate_key(struct brw_context *brw, + struct wm_sampler_key *key) +{ + int unit; + memset(key, 0, sizeof(*key)); + + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) { + struct wm_sampler_entry *entry = &key->sampler[unit]; + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(texObj); + struct gl_texture_image *firstImage = + texObj->Image[0][intelObj->firstLevel]; + + entry->wrap_r = texObj->WrapR; + entry->wrap_s = texObj->WrapS; + entry->wrap_t = texObj->WrapT; + + entry->maxlod = texObj->MaxLod; + entry->minlod = texObj->MinLod; + entry->lod_bias = texUnit->LodBias + texObj->LodBias; + entry->max_aniso = texObj->MaxAnisotropy; + entry->minfilter = texObj->MinFilter; + entry->magfilter = texObj->MagFilter; + entry->comparemode = texObj->CompareMode; + entry->comparefunc = texObj->CompareFunc; + + dri_bo_unreference(brw->wm.sdc_bo[unit]); + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + float bordercolor[4] = { + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0] + }; + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); + } else { + brw->wm.sdc_bo[unit] = upload_default_color(brw, + texObj->BorderColor); + } + key->sampler_count = unit + 1; + } + } +} /* All samplers must be uploaded in a single contiguous array, which * complicates various things. However, this is still too confusing - @@ -206,40 +274,61 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, */ static void upload_wm_samplers( struct brw_context *brw ) { - GLuint unit; - GLuint sampler_count = 0; - - /* _NEW_TEXTURE */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; + struct wm_sampler_key key; + int i; - GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor); + brw_wm_sampler_populate_key(brw, &key); - brw_update_sampler_state(texUnit, - texObj, - sdc_gs_offset, - &brw->wm.sampler[unit]); - - sampler_count = unit + 1; - } - } - - if (brw->wm.sampler_count != sampler_count) { - brw->wm.sampler_count = sampler_count; + if (brw->wm.sampler_count != key.sampler_count) { + brw->wm.sampler_count = key.sampler_count; brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } - brw->wm.sampler_gs_offset = 0; + dri_bo_unreference(brw->wm.sampler_bo); + brw->wm.sampler_bo = NULL; + if (brw->wm.sampler_count == 0) + return; - if (brw->wm.sampler_count) - brw->wm.sampler_gs_offset = - brw_cache_data_sz(&brw->cache[BRW_SAMPLER], - brw->wm.sampler, - sizeof(struct brw_sampler_state) * brw->wm.sampler_count); -} + brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + NULL); + /* If we didnt find it in the cache, compute the state and put it in the + * cache. + */ + if (brw->wm.sampler_bo == NULL) { + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + memset(sampler, 0, sizeof(sampler)); + for (i = 0; i < key.sampler_count; i++) { + if (brw->wm.sdc_bo[i] == NULL) + continue; + + brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i], + &sampler[i]); + } + + brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + &sampler, sizeof(sampler), + NULL, NULL); + + /* Emit SDC relocations */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (!brw->attribs.Texture->Unit[i]._ReallyEnabled) + continue; + + dri_bo_emit_reloc(brw->wm.sampler_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); + } + } +} const struct brw_tracked_state brw_wm_samplers = { .dirty = { @@ -247,7 +336,7 @@ const struct brw_tracked_state brw_wm_samplers = { .brw = 0, .cache = 0 }, - .update = upload_wm_samplers + .prepare = upload_wm_samplers, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index ff5cb31bdd1..61fe97a4634 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -34,109 +34,135 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "bufmgr.h" +#include "brw_wm.h" /*********************************************************************** * WM unit - fragment programs and rasterization */ -static void invalidate_scratch_cb( struct intel_context *intel, - void *unused ) -{ - /* nothing */ -} +struct brw_wm_unit_key { + unsigned int total_grf, total_scratch; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int dispatch_grf_start_reg; + + unsigned int curbe_offset; + unsigned int urb_size; + + unsigned int max_threads; + unsigned int nr_surfaces, sampler_count; + GLboolean uses_depth, computes_depth, uses_kill, is_glsl; + GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; + GLfloat offset_units, offset_factor; +}; -static void upload_wm_unit(struct brw_context *brw ) +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { + const struct gl_fragment_program *fp = brw->fragment_program; struct intel_context *intel = &brw->intel; - struct brw_wm_unit_state wm; - GLuint max_threads; + + memset(key, 0, sizeof(*key)); if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) - max_threads = 0; + key->max_threads = 1; else - max_threads = 31; - - - memset(&wm, 0, sizeof(wm)); + key->max_threads = 32; /* CACHE_NEW_WM_PROG */ - wm.thread0.grf_reg_count = ((brw->wm.prog_data->total_grf-1) & ~15) / 16; - wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; - wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; - wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; - - wm.wm5.max_threads = max_threads; - - if (brw->wm.prog_data->total_scratch) { - GLuint per_thread = (brw->wm.prog_data->total_scratch + 1023) / 1024; - GLuint total = per_thread * (max_threads + 1); - - /* Scratch space -- just have to make sure there is sufficient - * allocated for the active program and current number of threads. - */ - - if (!brw->wm.scratch_buffer) { - bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12); - bmBufferSetInvalidateCB(intel, - brw->wm.scratch_buffer, - invalidate_scratch_cb, - NULL, - GL_FALSE); - } + key->total_grf = brw->wm.prog_data->total_grf; + key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; + key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; + key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); - if (total > brw->wm.scratch_buffer_size) { - brw->wm.scratch_buffer_size = total; - bmBufferData(intel, - brw->wm.scratch_buffer, - brw->wm.scratch_buffer_size, - NULL, - 0); - } - - assert(per_thread <= 12 * 1024); - wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1; + /* BRW_NEW_URB_FENCE */ + key->urb_size = brw->urb.vsize; - /* XXX: could make this dynamic as this is so rarely active: - */ - /* BRW_NEW_LOCK */ - wm.thread2.scratch_space_base_pointer = - bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10; - } + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.wm_start; /* CACHE_NEW_SURFACE */ - wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + key->nr_surfaces = brw->wm.nr_surfaces; - /* BRW_NEW_CURBE_OFFSETS */ - wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + /* CACHE_NEW_SAMPLER */ + key->sampler_count = brw->wm.sampler_count; - wm.thread3.urb_entry_read_offset = 0; + /* _NEW_POLYGONSTIPPLE */ + key->polygon_stipple = brw->attribs.Polygon->StippleFlag; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + + /* as far as we can tell */ + key->computes_depth = + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + + /* _NEW_COLOR */ + key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled; + key->is_glsl = brw_wm_is_glsl(fp); + + /* XXX: This needs a flag to indicate when it changes. */ + key->stats_wm = intel->stats_wm; + + /* _NEW_LINE */ + key->line_stipple = brw->attribs.Line->StippleFlag; + + /* _NEW_POLYGON */ + key->offset_enable = brw->attribs.Polygon->OffsetFill; + key->offset_units = brw->attribs.Polygon->OffsetUnits; + key->offset_factor = brw->attribs.Polygon->OffsetFactor; +} + +static dri_bo * +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, + dri_bo **reloc_bufs) +{ + struct brw_wm_unit_state wm; + dri_bo *bo; + + memset(&wm, 0, sizeof(wm)); + + wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (key->total_scratch != 0) { + wm.thread2.scratch_space_base_pointer = + brw->wm.scratch_buffer->offset >> 10; /* reloc */ + wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } - /* CACHE_NEW_SAMPLER */ - wm.wm4.sampler_count = brw->wm.sampler_count; - wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; + wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + wm.thread3.urb_entry_read_offset = 0; - /* BRW_NEW_FRAGMENT_PROGRAM */ - { - const struct gl_fragment_program *fp = brw->fragment_program; - - if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS)) - wm.wm5.program_uses_depth = 1; /* as far as we can tell */ - - if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) - wm.wm5.program_computes_depth = 1; - - /* _NEW_COLOR */ - if (fp->UsesKill || - brw->attribs.Color->AlphaEnabled) - wm.wm5.program_uses_killpixel = 1; + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (brw->wm.sampler_bo != NULL) { + /* reloc */ + wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + } else { + wm.wm4.sampler_state_pointer = 0; } - wm.wm5.enable_16_pix = 1; + wm.wm5.program_uses_depth = key->uses_depth; + wm.wm5.program_computes_depth = key->computes_depth; + wm.wm5.program_uses_killpixel = key->uses_kill; + + if (key->is_glsl) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; + + wm.wm5.max_threads = key->max_threads - 1; wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ wm.wm5.legacy_line_rast = 0; wm.wm5.legacy_global_depth_bias = 0; @@ -144,34 +170,101 @@ static void upload_wm_unit(struct brw_context *brw ) wm.wm5.line_aa_region_width = 0; wm.wm5.line_endcap_aa_region_width = 1; - /* _NEW_POLYGONSTIPPLE */ - if (brw->attribs.Polygon->StippleFlag) - wm.wm5.polygon_stipple = 1; + wm.wm5.polygon_stipple = key->polygon_stipple; - /* _NEW_POLYGON */ - if (brw->attribs.Polygon->OffsetFill) { + if (key->offset_enable) { wm.wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, * offset_constant, scaling and MRD. This value passes glean * but gives some odd results elsewere (eg. the * quad-offset-units test). */ - wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + wm.global_depth_offset_constant = key->offset_units * 2; /* This is the only value that passes glean: */ - wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + wm.global_depth_offset_scale = key->offset_factor; } - /* _NEW_LINE */ - if (brw->attribs.Line->StippleFlag) { - wm.wm5.line_stipple = 1; - } + wm.wm5.line_stipple = key->line_stipple; - if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm) + if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; - brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc_bufs, 3, + &wm, sizeof(wm), + NULL, NULL); + + /* Emit WM program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key->total_scratch != 0) { + dri_bo_emit_reloc(bo, + 0, 0, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_buffer); + } + + /* Emit sampler state relocation */ + if (key->sampler_count != 0) { + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + return bo; +} + + +static void upload_wm_unit( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + struct brw_wm_unit_key key; + dri_bo *reloc_bufs[3]; + wm_unit_populate_key(brw, &key); + + /* Allocate the necessary scratch space if we haven't already. Don't + * bother reducing the allocation later, since we use scratch so + * rarely. + */ + assert(key.total_scratch <= 12 * 1024); + if (key.total_scratch) { + GLuint total = key.total_scratch * key.max_threads; + + if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (brw->wm.scratch_buffer == NULL) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096); + } + } + + reloc_bufs[0] = brw->wm.prog_bo; + reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[2] = brw->wm.sampler_bo; + + dri_bo_unreference(brw->wm.state_bo); + brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc_bufs, 3, + NULL); + if (brw->wm.state_bo == NULL) { + brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); + } } const struct brw_tracked_state brw_wm_unit = { @@ -189,6 +282,6 @@ const struct brw_tracked_state brw_wm_unit = { CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) }, - .update = upload_wm_unit + .prepare = upload_wm_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d24c618a668..3790b50c976 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -30,9 +30,9 @@ */ -#include "mtypes.h" -#include "texformat.h" -#include "texstore.h" +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -69,7 +69,7 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -114,11 +114,32 @@ static GLuint translate_tex_format( GLuint mesa_format ) return BRW_SURFACEFORMAT_FXT1; case MESA_FORMAT_Z16: - return BRW_SURFACEFORMAT_L16_UNORM; + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I16_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A16_UNORM; + else + return BRW_SURFACEFORMAT_L16_UNORM; - case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; + return BRW_SURFACEFORMAT_DXT1_RGB; + + case MESA_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case MESA_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case MESA_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case MESA_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + + case MESA_FORMAT_S8_Z24: + return BRW_SURFACEFORMAT_I24X8_UNORM; default: assert(0); @@ -126,142 +147,342 @@ static GLuint translate_tex_format( GLuint mesa_format ) } } -static -void brw_update_texture_surface( GLcontext *ctx, - GLuint unit, - struct brw_surface_state *surf ) -{ - struct intel_context *intel = intel_context(ctx); - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; +struct brw_wm_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; - memset(surf, 0, sizeof(*surf)); +static void +brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + surf->ss3.tiled_surface = 0; + surf->ss3.tile_walk = 0; + break; + case I915_TILING_X: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } +} - surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf->ss0.surface_type = translate_tex_target(tObj->Target); - surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat); +static dri_bo * +brw_create_texture_surface( struct brw_context *brw, + struct brw_wm_surface_key *key ) +{ + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(key->target); + + if (key->bo) + surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + else { + switch(key->depth) { + case 32: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; + default: + case 24: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; break; + case 16: surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; break; + } + } /* This is ok for all textures with channel width 8bit or less: */ -/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - - /* BRW_NEW_LOCK */ - surf->ss1.base_addr = bmBufferOffset(intel, - intelObj->mt->region->buffer); - - surf->ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; - surf->ss2.width = firstImage->Width - 1; - surf->ss2.height = firstImage->Height - 1; - - surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf->ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */ - surf->ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1; - surf->ss3.depth = firstImage->Depth - 1; - - surf->ss4.min_lod = 0; +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.mip_count = key->last_level - key->first_level; + surf.ss2.width = key->width - 1; + surf.ss2.height = key->height - 1; + brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + surf.ss3.depth = key->depth - 1; + + surf.ss4.min_lod = 0; - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - surf->ss0.cube_pos_x = 1; - surf->ss0.cube_pos_y = 1; - surf->ss0.cube_pos_z = 1; - surf->ss0.cube_neg_x = 1; - surf->ss0.cube_neg_y = 1; - surf->ss0.cube_neg_z = 1; + if (key->target == GL_TEXTURE_CUBE_MAP) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; } -} - + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + return bo; +} -#define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) ) +static void +brw_update_texture_surface( GLcontext *ctx, GLuint unit ) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct brw_wm_surface_key key; + + memset(&key, 0, sizeof(key)); + + if (intelObj->imageOverride) { + key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; + key.depth = intelObj->depthOverride; + key.bo = NULL; + key.offset = intelObj->textureOffset; + } else { + key.format = firstImage->TexFormat->MesaFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; + } + key.target = tObj->Target; + key.depthmode = tObj->DepthMode; + key.first_level = intelObj->firstLevel; + key.last_level = intelObj->lastLevel; + key.width = firstImage->Width; + key.height = firstImage->Height; + key.cpp = intelObj->mt->cpp; + key.tiling = intelObj->mt->region->tiling; + + dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); + brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { + brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + } +} -static void upload_wm_surfaces(struct brw_context *brw ) +/** + * Sets up a surface state structure to point at the given region. + * While it is only used for the front/back buffer currently, it should be + * usable for further buffers when doing ARB_draw_buffer support. + */ +static void +brw_update_region_surface(struct brw_context *brw, struct intel_region *region, + unsigned int unit, GLboolean cached) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; - struct brw_surface_binding_table bind; - GLuint i; - - memcpy(&bind, &brw->wm.bind, sizeof(bind)); - - { + dri_bo *region_bo = NULL; + struct { + unsigned int surface_type; + unsigned int surface_format; + unsigned int width, height, cpp; + GLubyte color_mask[4]; + GLboolean color_blend; + uint32_t tiling; + } key; + + memset(&key, 0, sizeof(key)); + + if (region != NULL) { + region_bo = region->buffer; + + key.surface_type = BRW_SURFACE_2D; + if (region->cpp == 4) + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + key.tiling = region->tiling; + key.width = region->pitch; /* XXX: not really! */ + key.height = region->height; + key.cpp = region->cpp; + } else { + key.surface_type = BRW_SURFACE_NULL; + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + key.tiling = 0; + key.width = 1; + key.height = 1; + key.cpp = 4; + } + memcpy(key.color_mask, brw->attribs.Color->ColorMask, + sizeof(key.color_mask)); + key.color_blend = (!brw->attribs.Color->_LogicOpEnabled && + brw->attribs.Color->BlendEnabled); + + dri_bo_unreference(brw->wm.surf_bo[unit]); + brw->wm.surf_bo[unit] = NULL; + if (cached) + brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); + + if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; - struct intel_region *region = brw->state.draw_region; memset(&surf, 0, sizeof(surf)); - if (region->cpp == 4) - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + surf.ss0.surface_format = key.surface_format; + surf.ss0.surface_type = key.surface_type; + if (region_bo != NULL) + surf.ss1.base_addr = region_bo->offset; /* reloc */ - surf.ss0.surface_type = BRW_SURFACE_2D; + surf.ss2.width = key.width - 1; + surf.ss2.height = key.height - 1; + brw_set_surface_tiling(&surf, key.tiling); + surf.ss3.pitch = (key.width * key.cpp) - 1; /* _NEW_COLOR */ - surf.ss0.color_blend = (!brw->attribs.Color->_LogicOpEnabled && - brw->attribs.Color->BlendEnabled); + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + + /* Key size will never match key size for textures, so we're safe. */ + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + &surf, sizeof(surf), + NULL, NULL); + if (region_bo != NULL) { + /* We might sample from it, and we might render to it, so flag + * them both. We might be able to figure out from other state + * a more restrictive relocation to emit. + */ + dri_bo_emit_reloc(brw->wm.surf_bo[unit], + I915_GEM_DOMAIN_RENDER | + I915_GEM_DOMAIN_SAMPLER, + I915_GEM_DOMAIN_RENDER, + 0, + offsetof(struct brw_surface_state, ss1), + region_bo); + } + } +} - surf.ss0.writedisable_red = !brw->attribs.Color->ColorMask[0]; - surf.ss0.writedisable_green = !brw->attribs.Color->ColorMask[1]; - surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2]; - surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3]; +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static dri_bo * +brw_wm_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) + if (brw->wm.surf_bo[i]) + data[i] = brw->wm.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + if (brw->wm.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } + } - /* BRW_NEW_LOCK */ - surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer); + free(data); + } + return bind_bo; +} - surf.ss2.width = region->pitch - 1; /* XXX: not really! */ - surf.ss2.height = region->height - 1; - surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = region->tiled; - surf.ss3.pitch = (region->pitch * region->cpp) - 1; +static void prepare_wm_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + GLuint i; - brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); - brw->wm.nr_surfaces = 1; + if (brw->state.nr_draw_regions > 1) { + for (i = 0; i < brw->state.nr_draw_regions; i++) { + brw_update_region_surface(brw, brw->state.draw_regions[i], i, + GL_FALSE); + } + }else { + brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); } + brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; - /* _NEW_TEXTURE, BRW_NEW_TEXDATA - */ - if (texUnit->_ReallyEnabled && - intel_finalize_mipmap_tree(intel,texUnit->_Current)) { - - struct brw_surface_state surf; - - brw_update_texture_surface(ctx, i, &surf); - - brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); - brw->wm.nr_surfaces = i+2; - } - else if( texUnit->_ReallyEnabled && - texUnit->_Current == intel->frame_buffer_texobj ) - { - brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0]; - brw->wm.nr_surfaces = i+2; - } - else { - brw->wm.bind.surf_ss_offset[i+1] = 0; + /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ + if(texUnit->_ReallyEnabled) { + if (texUnit->_Current == intel->frame_buffer_texobj) { + dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); + brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); + brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + } else { + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + } + } else { + dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); + brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL; } + } - brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], - &brw->wm.bind ); + dri_bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); } + const struct brw_tracked_state brw_wm_surfaces = { .dirty = { .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_LOCK), /* required for bmBufferOffset */ + .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .update = upload_wm_surfaces + .prepare = prepare_wm_surfaces, }; diff --git a/src/mesa/drivers/dri/i965/bufmgr.h b/src/mesa/drivers/dri/i965/bufmgr.h deleted file mode 100644 index e748c0d6d0b..00000000000 --- a/src/mesa/drivers/dri/i965/bufmgr.h +++ /dev/null @@ -1,215 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef BUFMGR_H -#define BUFMGR_H - -#include "intel_context.h" - - -/* The buffer manager context. Opaque. - */ -struct bufmgr; -struct buffer; - - -struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); - -/* Flags for validate and other calls. If both NO_UPLOAD and NO_EVICT - * are specified, ValidateBuffers is essentially a query. - */ -#define BM_MEM_LOCAL 0x1 -#define BM_MEM_AGP 0x2 -#define BM_MEM_VRAM 0x4 /* not yet used */ -#define BM_WRITE 0x8 /* not yet used */ -#define BM_READ 0x10 /* not yet used */ -#define BM_NO_UPLOAD 0x20 -#define BM_NO_EVICT 0x40 -#define BM_NO_MOVE 0x80 /* not yet used */ -#define BM_NO_ALLOC 0x100 /* legacy "fixed" buffers only */ -#define BM_CLIENT 0x200 /* for map - pointer will be accessed - * without dri lock */ - -#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM) - - - - -/* Create a pool of a given memory type, from a certain offset and a - * certain size. - * - * Also passed in is a virtual pointer to the start of the pool. This - * is useful in the faked-out version in i915 so that MapBuffer can - * return a pointer to a buffer residing in AGP space. - * - * Flags passed into a pool are inherited by all buffers allocated in - * that pool. So pools representing the static front,back,depth - * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match - * the behaviour of the legacy allocations. - * - * Returns -1 for failure, pool number for success. - */ -int bmInitPool( struct intel_context *, - unsigned long low_offset, - void *low_virtual, - unsigned long size, - unsigned flags); - - -/* Stick closely to ARB_vbo semantics - they're well defined and - * understood, and drivers can just pass the calls through without too - * much thunking. - */ -void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers, - int align ); -void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers); - - -/* Hook to inform faked buffer manager about fixed-position - * front,depth,back buffers. These may move to a fully memory-managed - * scheme, or they may continue to be managed as is. - */ -struct buffer *bmGenBufferStatic(struct intel_context *, - unsigned pool); - -/* On evict, buffer manager will call invalidate_cb() to note that the - * buffer needs to be reloaded. - * - * Buffer is uploaded by calling bmMapBuffer() and copying data into - * the returned pointer. - * - * This is basically a big hack to get some more performance by - * turning off backing store for buffers where we either have it - * already (textures) or don't need it (batch buffers, temporary - * vbo's). - */ -void bmBufferSetInvalidateCB(struct intel_context *, - struct buffer *buf, - void (*invalidate_cb)( struct intel_context *, void *ptr ), - void *ptr, - GLboolean dont_fence_subdata); - - -/* The driver has more intimate knowledge of the hardare than a GL - * client would, so flags here is more proscriptive than the usage - * values in the ARB_vbo interface: - */ -int bmBufferData(struct intel_context *, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags ); - -int bmBufferSubData(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data ); - - -int bmBufferDataAUB(struct intel_context *, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags, - unsigned aubtype, - unsigned aubsubtype ); - -int bmBufferSubDataAUB(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data, - unsigned aubtype, - unsigned aubsubtype ); - - -/* In this version, taking the offset will provoke an upload on - * buffers not already resident in AGP: - */ -unsigned bmBufferOffset(struct intel_context *, - struct buffer *buf); - - -/* Extract data from the buffer: - */ -void bmBufferGetSubData(struct intel_context *, - struct buffer *buf, - unsigned offset, - unsigned size, - void *data ); - -void *bmMapBuffer( struct intel_context *, - struct buffer *buf, - unsigned access ); - -void bmUnmapBuffer( struct intel_context *, - struct buffer *buf ); - -void bmUnmapBufferAUB( struct intel_context *, - struct buffer *buf, - unsigned aubtype, - unsigned aubsubtype ); - - -/* Pertains to all buffers who's offset has been taken since the last - * fence or release. - */ -int bmValidateBuffers( struct intel_context * ); -void bmReleaseBuffers( struct intel_context * ); - -GLuint bmCtxId( struct intel_context *intel ); - - -GLboolean bmError( struct intel_context * ); -void bmEvictAll( struct intel_context * ); - -void *bmFindVirtual( struct intel_context *intel, - unsigned int offset, - size_t sz ); - -/* This functionality is used by the buffer manager, not really sure - * if we need to be exposing it in this way, probably libdrm will - * offer equivalent calls. - * - * For now they can stay, but will likely change/move before final: - */ -unsigned bmSetFence( struct intel_context * ); -unsigned bmSetFenceLock( struct intel_context * ); -unsigned bmLockAndFence( struct intel_context *intel ); -int bmTestFence( struct intel_context *, unsigned fence ); -void bmFinishFence( struct intel_context *, unsigned fence ); -void bmFinishFenceLock( struct intel_context *, unsigned fence ); - -void bm_fake_NotifyContendedLockTake( struct intel_context * ); - -extern int INTEL_DEBUG; -#define DEBUG_BUFMGR 0x10000000 - -#define DBG(...) do { if (INTEL_DEBUG & DEBUG_BUFMGR) _mesa_printf(__VA_ARGS__); } while(0) - -#endif diff --git a/src/mesa/drivers/dri/i965/bufmgr_fake.c b/src/mesa/drivers/dri/i965/bufmgr_fake.c deleted file mode 100644 index 24ee11edd8c..00000000000 --- a/src/mesa/drivers/dri/i965/bufmgr_fake.c +++ /dev/null @@ -1,1460 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Originally a fake version of the buffer manager so that we can - * prototype the changes in a driver fairly quickly, has been fleshed - * out to a fully functional interim solution. - * - * Basically wraps the old style memory management in the new - * programming interface, but is more expressive and avoids many of - * the bugs in the old texture manager. - */ -#include "bufmgr.h" - -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" - -#include "simple_list.h" -#include "mm.h" -#include "imports.h" - -#define BM_POOL_MAX 8 - -/* Internal flags: - */ -#define BM_NO_BACKING_STORE 0x2000 -#define BM_NO_FENCE_SUBDATA 0x4000 - - -static int check_fenced( struct intel_context *intel ); - -static int nr_attach = 0; - -/* Wrapper around mm.c's mem_block, which understands that you must - * wait for fences to expire before memory can be freed. This is - * specific to our use of memcpy for uploads - an upload that was - * processed through the command queue wouldn't need to care about - * fences. - */ -struct block { - struct block *next, *prev; - struct pool *pool; /* BM_MEM_AGP */ - struct mem_block *mem; /* BM_MEM_AGP */ - - unsigned referenced:1; - unsigned on_hardware:1; - unsigned fenced:1; - - - unsigned fence; /* BM_MEM_AGP, Split to read_fence, write_fence */ - - struct buffer *buf; - void *virtual; -}; - - -struct buffer { - unsigned id; /* debug only */ - const char *name; - unsigned size; - - unsigned mapped:1; - unsigned dirty:1; - unsigned aub_dirty:1; - unsigned alignment:13; - unsigned flags:16; - - struct block *block; - void *backing_store; - void (*invalidate_cb)( struct intel_context *, void * ); - void *invalidate_ptr; -}; - -struct pool { - unsigned size; - unsigned low_offset; - struct buffer *static_buffer; - unsigned flags; - struct mem_block *heap; - void *virtual; - struct block lru; /* only allocated, non-fence-pending blocks here */ -}; - -struct bufmgr { - _glthread_Mutex mutex; /**< for thread safety */ - struct pool pool[BM_POOL_MAX]; - unsigned nr_pools; - - unsigned buf_nr; /* for generating ids */ - - struct block referenced; /* after bmBufferOffset */ - struct block on_hardware; /* after bmValidateBuffers */ - struct block fenced; /* after bmFenceBuffers (mi_flush, emit irq, write dword) */ - /* then to pool->lru or free() */ - - unsigned ctxId; - unsigned last_fence; - unsigned free_on_hardware; - - unsigned fail:1; - unsigned need_fence:1; -}; - -#define MAXFENCE 0x7fffffff - -static GLboolean FENCE_LTE( unsigned a, unsigned b ) -{ - if (a == b) - return GL_TRUE; - - if (a < b && b - a < (1<<24)) - return GL_TRUE; - - if (a > b && MAXFENCE - a + b < (1<<24)) - return GL_TRUE; - - return GL_FALSE; -} - -int bmTestFence( struct intel_context *intel, unsigned fence ) -{ - /* Slight problem with wrap-around: - */ - return fence == 0 || FENCE_LTE(fence, intel->sarea->last_dispatch); -} - -#define LOCK(bm) \ - int dolock = nr_attach > 1; \ - if (dolock) _glthread_LOCK_MUTEX(bm->mutex) - -#define UNLOCK(bm) \ - if (dolock) _glthread_UNLOCK_MUTEX(bm->mutex) - - - -static GLboolean alloc_from_pool( struct intel_context *intel, - unsigned pool_nr, - struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - struct pool *pool = &bm->pool[pool_nr]; - struct block *block = (struct block *)calloc(sizeof *block, 1); - GLuint sz, align = (1<<buf->alignment); - - if (!block) - return GL_FALSE; - - sz = (buf->size + align-1) & ~(align-1); - - block->mem = mmAllocMem(pool->heap, - sz, - buf->alignment, 0); - if (!block->mem) { - free(block); - return GL_FALSE; - } - - make_empty_list(block); - - /* Insert at head or at tail??? - */ - insert_at_tail(&pool->lru, block); - - block->pool = pool; - block->virtual = pool->virtual + block->mem->ofs; - block->buf = buf; - - buf->block = block; - - return GL_TRUE; -} - - - - - - - - -/* Release the card storage associated with buf: - */ -static void free_block( struct intel_context *intel, struct block *block ) -{ - DBG("free block %p\n", block); - - if (!block) - return; - - check_fenced(intel); - - if (block->referenced) { - _mesa_printf("tried to free block on referenced list\n"); - assert(0); - } - else if (block->on_hardware) { - block->buf = NULL; - intel->bm->free_on_hardware += block->mem->size; - } - else if (block->fenced) { - block->buf = NULL; - } - else { - DBG(" - free immediately\n"); - remove_from_list(block); - - mmFreeMem(block->mem); - free(block); - } -} - - -static void alloc_backing_store( struct intel_context *intel, struct buffer *buf ) -{ - assert(!buf->backing_store); - assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); - - buf->backing_store = ALIGN_MALLOC(buf->size, 64); -} - -static void free_backing_store( struct intel_context *intel, struct buffer *buf ) -{ - assert(!(buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE))); - - if (buf->backing_store) { - ALIGN_FREE(buf->backing_store); - buf->backing_store = NULL; - } -} - - - - - - -static void set_dirty( struct intel_context *intel, - struct buffer *buf ) -{ - if (buf->flags & BM_NO_BACKING_STORE) - buf->invalidate_cb(intel, buf->invalidate_ptr); - - assert(!(buf->flags & BM_NO_EVICT)); - - DBG("set_dirty - buf %d\n", buf->id); - buf->dirty = 1; -} - - -static int evict_lru( struct intel_context *intel, GLuint max_fence, GLuint *pool ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int i; - - DBG("%s\n", __FUNCTION__); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - - if (block->buf && - (block->buf->flags & BM_NO_FENCE_SUBDATA)) - continue; - - if (block->fence && max_fence && - !FENCE_LTE(block->fence, max_fence)) - return 0; - - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - *pool = i; - return 1; - } - } - } - - - return 0; -} - - -#define foreach_s_rev(ptr, t, list) \ - for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev) - -static int evict_mru( struct intel_context *intel, GLuint *pool ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int i; - - DBG("%s\n", __FUNCTION__); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s_rev(block, tmp, &bm->pool[i].lru) { - - if (block->buf && - (block->buf->flags & BM_NO_FENCE_SUBDATA)) - continue; - - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - *pool = i; - return 1; - } - } - } - - - return 0; -} - - -static int check_fenced( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - int ret = 0; - - foreach_s(block, tmp, &bm->fenced ) { - assert(block->fenced); - - if (bmTestFence(intel, block->fence)) { - - block->fenced = 0; - - if (!block->buf) { - DBG("delayed free: offset %x sz %x\n", block->mem->ofs, block->mem->size); - remove_from_list(block); - mmFreeMem(block->mem); - free(block); - } - else { - DBG("return to lru: offset %x sz %x\n", block->mem->ofs, block->mem->size); - move_to_tail(&block->pool->lru, block); - } - - ret = 1; - } - else { - /* Blocks are ordered by fence, so if one fails, all from - * here will fail also: - */ - break; - } - } - - /* Also check the referenced list: - */ - foreach_s(block, tmp, &bm->referenced ) { - if (block->fenced && - bmTestFence(intel, block->fence)) { - block->fenced = 0; - } - } - - - DBG("%s: %d\n", __FUNCTION__, ret); - return ret; -} - - - -static void fence_blocks( struct intel_context *intel, - unsigned fence ) -{ - struct bufmgr *bm = intel->bm; - struct block *block, *tmp; - - foreach_s (block, tmp, &bm->on_hardware) { - DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, - block->mem->size, block->buf, fence); - block->fence = fence; - - block->on_hardware = 0; - block->fenced = 1; - - /* Move to tail of pending list here - */ - move_to_tail(&bm->fenced, block); - } - - /* Also check the referenced list: - */ - foreach_s (block, tmp, &bm->referenced) { - if (block->on_hardware) { - DBG("Fence block %p (sz 0x%x buf %p) with fence %d\n", block, - block->mem->size, block->buf, fence); - - block->fence = fence; - block->on_hardware = 0; - block->fenced = 1; - } - } - - - bm->last_fence = fence; - assert(is_empty_list(&bm->on_hardware)); -} - - - - -static GLboolean alloc_block( struct intel_context *intel, - struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - int i; - - assert(intel->locked); - - DBG("%s 0x%x bytes (%s)\n", __FUNCTION__, buf->size, buf->name); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_ALLOC) && - alloc_from_pool(intel, i, buf)) { - - DBG("%s --> 0x%x (sz %x)\n", __FUNCTION__, - buf->block->mem->ofs, buf->block->mem->size); - - return GL_TRUE; - } - } - - DBG("%s --> fail\n", __FUNCTION__); - return GL_FALSE; -} - - -static GLboolean evict_and_alloc_block( struct intel_context *intel, - struct buffer *buf ) -{ - GLuint pool; - struct bufmgr *bm = intel->bm; - - assert(buf->block == NULL); - - /* Put a cap on the amount of free memory we'll allow to accumulate - * before emitting a fence. - */ - if (bm->free_on_hardware > 1 * 1024 * 1024) { - DBG("fence for free space: %x\n", bm->free_on_hardware); - bmSetFence(intel); - } - - /* Search for already free memory: - */ - if (alloc_block(intel, buf)) - return GL_TRUE; - - /* Look for memory that may have become free: - */ - if (check_fenced(intel) && - alloc_block(intel, buf)) - return GL_TRUE; - - /* Look for memory blocks not used for >1 frame: - */ - while (evict_lru(intel, intel->second_last_swap_fence, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - - /* If we're not thrashing, allow lru eviction to dig deeper into - * recently used textures. We'll probably be thrashing soon: - */ - if (!intel->thrashing) { - while (evict_lru(intel, 0, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - } - - /* Keep thrashing counter alive? - */ - if (intel->thrashing) - intel->thrashing = 20; - - /* Wait on any already pending fences - here we are waiting for any - * freed memory that has been submitted to hardware and fenced to - * become available: - */ - while (!is_empty_list(&bm->fenced)) { - GLuint fence = bm->fenced.next->fence; - bmFinishFence(intel, fence); - - if (alloc_block(intel, buf)) - return GL_TRUE; - } - - - /* - */ - if (!is_empty_list(&bm->on_hardware)) { - bmSetFence(intel); - - while (!is_empty_list(&bm->fenced)) { - GLuint fence = bm->fenced.next->fence; - bmFinishFence(intel, fence); - } - - if (!intel->thrashing) { - DBG("thrashing\n"); - } - intel->thrashing = 20; - - if (alloc_block(intel, buf)) - return GL_TRUE; - } - - while (evict_mru(intel, &pool)) - if (alloc_from_pool(intel, pool, buf)) - return GL_TRUE; - - DBG("%s 0x%x bytes failed\n", __FUNCTION__, buf->size); - - assert(is_empty_list(&bm->on_hardware)); - assert(is_empty_list(&bm->fenced)); - - return GL_FALSE; -} - - - - - - - - - - -/*********************************************************************** - * Public functions - */ - - -/* The initialization functions are skewed in the fake implementation. - * This call would be to attach to an existing manager, rather than to - * create a local one. - */ -struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ) -{ - _glthread_DECLARE_STATIC_MUTEX(initMutex); - static struct bufmgr bm; - - /* This function needs a mutex of its own... - */ - _glthread_LOCK_MUTEX(initMutex); - - if (nr_attach == 0) { - _glthread_INIT_MUTEX(bm.mutex); - - make_empty_list(&bm.referenced); - make_empty_list(&bm.fenced); - make_empty_list(&bm.on_hardware); - - /* The context id of any of the share group. This won't be used - * in communication with the kernel, so it doesn't matter if - * this context is eventually deleted. - */ - bm.ctxId = intel->hHWContext; - } - - nr_attach++; - - _glthread_UNLOCK_MUTEX(initMutex); - - return &bm; -} - - - -/* The virtual pointer would go away in a true implementation. - */ -int bmInitPool( struct intel_context *intel, - unsigned long low_offset, - void *low_virtual, - unsigned long size, - unsigned flags) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - GLuint i; - - for (i = 0; i < bm->nr_pools; i++) { - if (bm->pool[i].low_offset == low_offset && - bm->pool[i].size == size) { - retval = i; - goto out; - } - } - - - if (bm->nr_pools >= BM_POOL_MAX) - retval = -1; - else { - i = bm->nr_pools++; - - DBG("bmInitPool %d low_offset %x sz %x\n", - i, low_offset, size); - - bm->pool[i].low_offset = low_offset; - bm->pool[i].size = size; - bm->pool[i].heap = mmInit( low_offset, size ); - bm->pool[i].virtual = low_virtual - low_offset; - bm->pool[i].flags = flags; - - make_empty_list(&bm->pool[i].lru); - - retval = i; - } - } - out: - UNLOCK(bm); - return retval; -} - -static struct buffer *do_GenBuffer(struct intel_context *intel, const char *name, int align) -{ - struct bufmgr *bm = intel->bm; - struct buffer *buf = calloc(sizeof(*buf), 1); - - buf->id = ++bm->buf_nr; - buf->name = name; - buf->alignment = align; - buf->flags = BM_MEM_AGP|BM_MEM_VRAM|BM_MEM_LOCAL; - - return buf; -} - - -void *bmFindVirtual( struct intel_context *intel, - unsigned int offset, - size_t sz ) -{ - struct bufmgr *bm = intel->bm; - int i; - - for (i = 0; i < bm->nr_pools; i++) - if (offset >= bm->pool[i].low_offset && - offset + sz <= bm->pool[i].low_offset + bm->pool[i].size) - return bm->pool[i].virtual + offset; - - return NULL; -} - - -void bmGenBuffers(struct intel_context *intel, - const char *name, unsigned n, - struct buffer **buffers, - int align ) -{ - struct bufmgr *bm = intel->bm; - LOCK(bm); - { - int i; - - for (i = 0; i < n; i++) - buffers[i] = do_GenBuffer(intel, name, align); - } - UNLOCK(bm); -} - - -void bmDeleteBuffers(struct intel_context *intel, unsigned n, struct buffer **buffers) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - unsigned i; - - for (i = 0; i < n; i++) { - struct buffer *buf = buffers[i]; - - if (buf && buf->block) - free_block(intel, buf->block); - - if (buf) - free(buf); - } - } - UNLOCK(bm); -} - - - - -/* Hook to inform faked buffer manager about fixed-position - * front,depth,back buffers. These may move to a fully memory-managed - * scheme, or they may continue to be managed as is. It will probably - * be useful to pass a fixed offset here one day. - */ -struct buffer *bmGenBufferStatic(struct intel_context *intel, - unsigned pool ) -{ - struct bufmgr *bm = intel->bm; - struct buffer *buf; - LOCK(bm); - { - assert(bm->pool[pool].flags & BM_NO_EVICT); - assert(bm->pool[pool].flags & BM_NO_MOVE); - - if (bm->pool[pool].static_buffer) - buf = bm->pool[pool].static_buffer; - else { - buf = do_GenBuffer(intel, "static", 12); - - bm->pool[pool].static_buffer = buf; - assert(!buf->block); - - buf->size = bm->pool[pool].size; - buf->flags = bm->pool[pool].flags; - buf->alignment = 12; - - if (!alloc_from_pool(intel, pool, buf)) - assert(0); - } - } - UNLOCK(bm); - return buf; -} - - -static void wait_quiescent(struct intel_context *intel, - struct block *block) -{ - if (block->on_hardware) { - assert(intel->bm->need_fence); - bmSetFence(intel); - assert(!block->on_hardware); - } - - - if (block->fenced) { - bmFinishFence(intel, block->fence); - } - - assert(!block->on_hardware); - assert(!block->fenced); -} - - - -/* If buffer size changes, free and reallocate. Otherwise update in - * place. - */ -int bmBufferData(struct intel_context *intel, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - DBG("bmBufferData %d sz 0x%x data: %p\n", buf->id, size, data); - - assert(!buf->mapped); - - if (buf->block) { - struct block *block = buf->block; - - /* Optimistic check to see if we can reuse the block -- not - * required for correctness: - */ - if (block->fenced) - check_fenced(intel); - - if (block->on_hardware || - block->fenced || - (buf->size && buf->size != size) || - (data == NULL)) { - - assert(!block->referenced); - - free_block(intel, block); - buf->block = NULL; - buf->dirty = 1; - } - } - - buf->size = size; - if (buf->block) { - assert (buf->block->mem->size >= size); - } - - if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { - - assert(intel->locked || data == NULL); - - if (data != NULL) { - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = -1; - goto out; - } - - wait_quiescent(intel, buf->block); - - DBG("bmBufferData %d offset 0x%x sz 0x%x\n", - buf->id, buf->block->mem->ofs, size); - - assert(buf->block->virtual == buf->block->pool->virtual + buf->block->mem->ofs); - - do_memcpy(buf->block->virtual, data, size); - } - buf->dirty = 0; - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - free_backing_store(intel, buf); - - if (data != NULL) { - alloc_backing_store(intel, buf); - do_memcpy(buf->backing_store, data, size); - } - } - } - out: - UNLOCK(bm); - return retval; -} - - -/* Update the buffer in place, in whatever space it is currently resident: - */ -int bmBufferSubData(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - if (size == 0) - return 0; - - LOCK(bm); - { - DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); - - assert(offset+size <= buf->size); - - if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { - - assert(intel->locked); - - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = -1; - goto out; - } - - if (!(buf->flags & BM_NO_FENCE_SUBDATA)) - wait_quiescent(intel, buf->block); - - buf->dirty = 0; - - do_memcpy(buf->block->virtual + offset, data, size); - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - - if (buf->backing_store == NULL) - alloc_backing_store(intel, buf); - - do_memcpy(buf->backing_store + offset, data, size); - } - } - out: - UNLOCK(bm); - return retval; -} - - - -int bmBufferDataAUB(struct intel_context *intel, - struct buffer *buf, - unsigned size, - const void *data, - unsigned flags, - unsigned aubtype, - unsigned aubsubtype ) -{ - int retval = bmBufferData(intel, buf, size, data, flags); - - - /* This only works because in this version of the buffer manager we - * allocate all buffers statically in agp space and so can emit the - * uploads to the aub file with the correct offsets as they happen. - */ - if (retval == 0 && data && intel->aub_file) { - - if (buf->block && !buf->dirty) { - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs, - buf->block->virtual, - size, - aubtype, - aubsubtype); - buf->aub_dirty = 0; - } - } - - return retval; -} - - -int bmBufferSubDataAUB(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - const void *data, - unsigned aubtype, - unsigned aubsubtype ) -{ - int retval = bmBufferSubData(intel, buf, offset, size, data); - - - /* This only works because in this version of the buffer manager we - * allocate all buffers statically in agp space and so can emit the - * uploads to the aub file with the correct offsets as they happen. - */ - if (intel->aub_file) { - if (retval == 0 && buf->block && !buf->dirty) - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs + offset, - ((const char *)buf->block->virtual) + offset, - size, - aubtype, - aubsubtype); - } - - return retval; -} - -void bmUnmapBufferAUB( struct intel_context *intel, - struct buffer *buf, - unsigned aubtype, - unsigned aubsubtype ) -{ - bmUnmapBuffer(intel, buf); - - if (intel->aub_file) { - /* Hack - exclude the framebuffer mappings. If you removed - * this, you'd get very big aubfiles, but you *would* be able to - * see fallback rendering. - */ - if (buf->block && !buf->dirty && buf->block->pool == &intel->bm->pool[0]) { - buf->aub_dirty = 1; - } - } -} - -unsigned bmBufferOffset(struct intel_context *intel, - struct buffer *buf) -{ - struct bufmgr *bm = intel->bm; - unsigned retval = 0; - - LOCK(bm); - { - assert(intel->locked); - - if (!buf->block && - !evict_and_alloc_block(intel, buf)) { - bm->fail = 1; - retval = ~0; - } - else { - assert(buf->block); - assert(buf->block->buf == buf); - - DBG("Add buf %d (block %p, dirty %d) to referenced list\n", buf->id, buf->block, - buf->dirty); - - move_to_tail(&bm->referenced, buf->block); - buf->block->referenced = 1; - - retval = buf->block->mem->ofs; - } - } - UNLOCK(bm); - - return retval; -} - - - -/* Extract data from the buffer: - */ -void bmBufferGetSubData(struct intel_context *intel, - struct buffer *buf, - unsigned offset, - unsigned size, - void *data ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - DBG("bmBufferSubdata %d offset 0x%x sz 0x%x\n", buf->id, offset, size); - - if (buf->flags & (BM_NO_EVICT|BM_NO_BACKING_STORE)) { - if (buf->block && size) { - wait_quiescent(intel, buf->block); - do_memcpy(data, buf->block->virtual + offset, size); - } - } - else { - if (buf->backing_store && size) { - do_memcpy(data, buf->backing_store + offset, size); - } - } - } - UNLOCK(bm); -} - - -/* Return a pointer to whatever space the buffer is currently resident in: - */ -void *bmMapBuffer( struct intel_context *intel, - struct buffer *buf, - unsigned flags ) -{ - struct bufmgr *bm = intel->bm; - void *retval = NULL; - - LOCK(bm); - { - DBG("bmMapBuffer %d\n", buf->id); - - if (buf->mapped) { - _mesa_printf("%s: already mapped\n", __FUNCTION__); - retval = NULL; - } - else if (buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT)) { - - assert(intel->locked); - - if (!buf->block && !evict_and_alloc_block(intel, buf)) { - DBG("%s: alloc failed\n", __FUNCTION__); - bm->fail = 1; - retval = NULL; - } - else { - assert(buf->block); - buf->dirty = 0; - - if (!(buf->flags & BM_NO_FENCE_SUBDATA)) - wait_quiescent(intel, buf->block); - - buf->mapped = 1; - retval = buf->block->virtual; - } - } - else { - DBG("%s - set buf %d dirty\n", __FUNCTION__, buf->id); - set_dirty(intel, buf); - - if (buf->backing_store == 0) - alloc_backing_store(intel, buf); - - buf->mapped = 1; - retval = buf->backing_store; - } - } - UNLOCK(bm); - return retval; -} - -void bmUnmapBuffer( struct intel_context *intel, struct buffer *buf ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - DBG("bmUnmapBuffer %d\n", buf->id); - buf->mapped = 0; - } - UNLOCK(bm); -} - - - - -/* This is the big hack that turns on BM_NO_BACKING_STORE. Basically - * says that an external party will maintain the backing store, eg - * Mesa's local copy of texture data. - */ -void bmBufferSetInvalidateCB(struct intel_context *intel, - struct buffer *buf, - void (*invalidate_cb)( struct intel_context *, void *ptr ), - void *ptr, - GLboolean dont_fence_subdata) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - if (buf->backing_store) - free_backing_store(intel, buf); - - buf->flags |= BM_NO_BACKING_STORE; - - if (dont_fence_subdata) - buf->flags |= BM_NO_FENCE_SUBDATA; - - DBG("bmBufferSetInvalidateCB set buf %d dirty\n", buf->id); - buf->dirty = 1; - buf->invalidate_cb = invalidate_cb; - buf->invalidate_ptr = ptr; - - /* Note that it is invalid right from the start. Also note - * invalidate_cb is called with the bufmgr locked, so cannot - * itself make bufmgr calls. - */ - invalidate_cb( intel, ptr ); - } - UNLOCK(bm); -} - - - - - - - -/* This is only protected against thread interactions by the DRI lock - * and the policy of ensuring that all dma is flushed prior to - * releasing that lock. Otherwise you might have two threads building - * up a list of buffers to validate at once. - */ -int bmValidateBuffers( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - int retval = 0; - - LOCK(bm); - { - DBG("%s fail %d\n", __FUNCTION__, bm->fail); - assert(intel->locked); - - if (!bm->fail) { - struct block *block, *tmp; - - foreach_s(block, tmp, &bm->referenced) { - struct buffer *buf = block->buf; - - DBG("Validate buf %d / block %p / dirty %d\n", buf->id, block, buf->dirty); - - /* Upload the buffer contents if necessary: - */ - if (buf->dirty) { - DBG("Upload dirty buf %d (%s) sz %d offset 0x%x\n", buf->id, - buf->name, buf->size, block->mem->ofs); - - assert(!(buf->flags & (BM_NO_BACKING_STORE|BM_NO_EVICT))); - - wait_quiescent(intel, buf->block); - - do_memcpy(buf->block->virtual, - buf->backing_store, - buf->size); - - if (intel->aub_file) { - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs, - buf->backing_store, - buf->size, - 0, - 0); - } - - buf->dirty = 0; - buf->aub_dirty = 0; - } - else if (buf->aub_dirty) { - intel->vtbl.aub_gtt_data(intel, - buf->block->mem->ofs, - buf->block->virtual, - buf->size, - 0, - 0); - buf->aub_dirty = 0; - } - - block->referenced = 0; - block->on_hardware = 1; - move_to_tail(&bm->on_hardware, block); - } - - bm->need_fence = 1; - } - - retval = bm->fail ? -1 : 0; - } - UNLOCK(bm); - - - if (retval != 0) - DBG("%s failed\n", __FUNCTION__); - - return retval; -} - - - - -void bmReleaseBuffers( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - - foreach_s (block, tmp, &bm->referenced) { - - DBG("remove block %p from referenced list\n", block); - - if (block->on_hardware) { - /* Return to the on-hardware list. - */ - move_to_tail(&bm->on_hardware, block); - } - else if (block->fenced) { - struct block *s; - - /* Hmm - have to scan the fenced list to insert the - * buffers in order. This is O(nm), but rare and the - * numbers are low. - */ - foreach (s, &bm->fenced) { - if (FENCE_LTE(block->fence, s->fence)) - break; - } - - move_to_tail(s, block); - } - else { - /* Return to the lru list: - */ - move_to_tail(&block->pool->lru, block); - } - - block->referenced = 0; - } - } - UNLOCK(bm); -} - - -/* This functionality is used by the buffer manager, not really sure - * if we need to be exposing it in this way, probably libdrm will - * offer equivalent calls. - * - * For now they can stay, but will likely change/move before final: - */ -unsigned bmSetFence( struct intel_context *intel ) -{ - assert(intel->locked); - - /* Emit MI_FLUSH here: - */ - if (intel->bm->need_fence) { - - /* Emit a flush without using a batchbuffer. Can't rely on the - * batchbuffer at this level really. Would really prefer that - * the IRQ ioctly emitted the flush at the same time. - */ - GLuint dword[2]; - dword[0] = intel->vtbl.flush_cmd(); - dword[1] = 0; - intel_cmd_ioctl(intel, (char *)&dword, sizeof(dword)); - - intel->bm->last_fence = intelEmitIrqLocked( intel ); - - fence_blocks(intel, intel->bm->last_fence); - - intel->vtbl.note_fence(intel, intel->bm->last_fence); - intel->bm->need_fence = 0; - - if (intel->thrashing) { - intel->thrashing--; - if (!intel->thrashing) - DBG("not thrashing\n"); - } - - intel->bm->free_on_hardware = 0; - } - - return intel->bm->last_fence; -} - -unsigned bmSetFenceLock( struct intel_context *intel ) -{ - unsigned last; - LOCK(intel->bm); - last = bmSetFence(intel); - UNLOCK(intel->bm); - return last; -} -unsigned bmLockAndFence( struct intel_context *intel ) -{ - if (intel->bm->need_fence) { - LOCK_HARDWARE(intel); - LOCK(intel->bm); - bmSetFence(intel); - UNLOCK(intel->bm); - UNLOCK_HARDWARE(intel); - } - - return intel->bm->last_fence; -} - - -void bmFinishFence( struct intel_context *intel, unsigned fence ) -{ - if (!bmTestFence(intel, fence)) { - DBG("...wait on fence %d\n", fence); - intelWaitIrq( intel, fence ); - } - assert(bmTestFence(intel, fence)); - check_fenced(intel); -} - -void bmFinishFenceLock( struct intel_context *intel, unsigned fence ) -{ - LOCK(intel->bm); - bmFinishFence(intel, fence); - UNLOCK(intel->bm); -} - - -/* Specifically ignore texture memory sharing. - * -- just evict everything - * -- and wait for idle - */ -void bm_fake_NotifyContendedLockTake( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - GLuint i; - - assert(is_empty_list(&bm->referenced)); - - bm->need_fence = 1; - bm->fail = 0; - bmFinishFence(intel, bmSetFence(intel)); - - assert(is_empty_list(&bm->fenced)); - assert(is_empty_list(&bm->on_hardware)); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - assert(bmTestFence(intel, block->fence)); - set_dirty(intel, block->buf); - } - } - } - } - UNLOCK(bm); -} - - - -void bmEvictAll( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - - LOCK(bm); - { - struct block *block, *tmp; - GLuint i; - - DBG("%s\n", __FUNCTION__); - - assert(is_empty_list(&bm->referenced)); - - bm->need_fence = 1; - bm->fail = 0; - bmFinishFence(intel, bmSetFence(intel)); - - assert(is_empty_list(&bm->fenced)); - assert(is_empty_list(&bm->on_hardware)); - - for (i = 0; i < bm->nr_pools; i++) { - if (!(bm->pool[i].flags & BM_NO_EVICT)) { - foreach_s(block, tmp, &bm->pool[i].lru) { - assert(bmTestFence(intel, block->fence)); - set_dirty(intel, block->buf); - block->buf->block = NULL; - - free_block(intel, block); - } - } - } - } - UNLOCK(bm); -} - - -GLboolean bmError( struct intel_context *intel ) -{ - struct bufmgr *bm = intel->bm; - GLboolean retval; - - LOCK(bm); - { - retval = bm->fail; - } - UNLOCK(bm); - - return retval; -} - - -GLuint bmCtxId( struct intel_context *intel ) -{ - return intel->bm->ctxId; -} diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 64885ed9b4b..d38cdf31cc6 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -1,243 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "imports.h" -#include "intel_batchbuffer.h" -#include "intel_ioctl.h" -#include "bufmgr.h" - - -static void intel_batchbuffer_reset( struct intel_batchbuffer *batch ) -{ - assert(batch->map == NULL); - - batch->offset = (unsigned long)batch->ptr; - batch->offset = (batch->offset + 63) & ~63; - batch->ptr = (unsigned char *) batch->offset; - - if (BATCH_SZ - batch->offset < BATCH_REFILL) { - bmBufferData(batch->intel, - batch->buffer, - BATCH_SZ, - NULL, - 0); - batch->offset = 0; - batch->ptr = NULL; - } - - batch->flags = 0; -} - -static void intel_batchbuffer_reset_cb( struct intel_context *intel, - void *ptr ) -{ - struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr; - assert(batch->map == NULL); - batch->flags = 0; - batch->offset = 0; - batch->ptr = NULL; -} - -GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ) -{ - if (!batch->map) { - batch->map = bmMapBuffer(batch->intel, batch->buffer, - BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE); - batch->ptr += (unsigned long)batch->map; - } - - return batch->map; -} - -void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ) -{ - if (batch->map) { - batch->ptr -= (unsigned long)batch->map; - batch->map = NULL; - bmUnmapBuffer(batch->intel, batch->buffer); - } -} - - - -/*====================================================================== - * Public functions - */ -struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ) -{ - struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); - - batch->intel = intel; - - bmGenBuffers(intel, "batch", 1, &batch->buffer, 12); - - bmBufferSetInvalidateCB(intel, batch->buffer, - intel_batchbuffer_reset_cb, - batch, - GL_TRUE); - - bmBufferData(batch->intel, - batch->buffer, - BATCH_SZ, - NULL, - 0); - - - return batch; -} - -void intel_batchbuffer_free( struct intel_batchbuffer *batch ) -{ - if (batch->map) - bmUnmapBuffer(batch->intel, batch->buffer); - - bmDeleteBuffers(batch->intel, 1, &batch->buffer); - free(batch); -} - - -#define MI_BATCH_BUFFER_END (0xA<<23) - - -GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ) -{ - struct intel_context *intel = batch->intel; - GLuint used = batch->ptr - (batch->map + batch->offset); - GLuint offset; - GLint retval = GL_TRUE; - - assert(intel->locked); - - if (used == 0) { - bmReleaseBuffers( batch->intel ); - return GL_TRUE; - } - - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ - if (used & 4) { - ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END; - batch->ptr += 4; - used += 4; - } - else { - ((int *)batch->ptr)[0] = 0; - ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END; - - batch->ptr += 8; - used += 8; - } - - intel_batchbuffer_unmap(batch); - - /* Get the batch buffer offset: Must call bmBufferOffset() before - * bmValidateBuffers(), otherwise the buffer won't be on the inuse - * list. - */ - offset = bmBufferOffset(batch->intel, batch->buffer); - - if (bmValidateBuffers( batch->intel ) != 0) { - assert(intel->locked); - bmReleaseBuffers( batch->intel ); - retval = GL_FALSE; - goto out; - } - - - if (intel->aub_file) { - /* Send buffered commands to aubfile as a single packet. - */ - intel_batchbuffer_map(batch); - ((int *)batch->ptr)[-1] = intel->vtbl.flush_cmd(); - intel->vtbl.aub_commands(intel, - offset, /* Fulsim wierdness - don't adjust */ - batch->map + batch->offset, - used); - ((int *)batch->ptr)[-1] = MI_BATCH_BUFFER_END; - intel_batchbuffer_unmap(batch); - } - - - /* Fire the batch buffer, which was uploaded above: - */ - intel_batch_ioctl(batch->intel, - offset + batch->offset, - used); - - if (intel->aub_file && - intel->ctx.DrawBuffer->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) - intel->vtbl.aub_dump_bmp( intel, 0 ); - - /* Reset the buffer: - */ - out: - intel_batchbuffer_reset( batch ); - intel_batchbuffer_map( batch ); - - if (!retval) - DBG("%s failed\n", __FUNCTION__); - - return retval; -} - - - - - - - -void intel_batchbuffer_align( struct intel_batchbuffer *batch, - GLuint align, - GLuint sz ) -{ - unsigned long ptr = (unsigned long) batch->ptr; - unsigned long aptr = (ptr + align) & ~((unsigned long)align-1); - GLuint fixup = aptr - ptr; - - if (intel_batchbuffer_space(batch) < fixup + sz) - intel_batchbuffer_flush(batch); - else { - memset(batch->ptr, 0, fixup); - batch->ptr += fixup; - } -} - - - - -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, - GLuint bytes, - GLuint flags) -{ - assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes, flags); - __memcpy(batch->ptr, data, bytes); - batch->ptr += bytes; -} - +../intel/intel_batchbuffer.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h deleted file mode 100644 index 25e0a65e99f..00000000000 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ /dev/null @@ -1,127 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "mtypes.h" -#include "bufmgr.h" - -struct intel_context; - -#define BATCH_SZ (16 * 1024) -#define BATCH_REFILL 4096 -#define BATCH_RESERVED 16 - -#define INTEL_BATCH_NO_CLIPRECTS 0x1 -#define INTEL_BATCH_CLIPRECTS 0x2 - -struct intel_batchbuffer { - struct intel_context *intel; - - struct buffer *buffer; - - GLuint flags; - unsigned long offset; - - GLubyte *map; - GLubyte *ptr; -}; - -struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel ); - -void intel_batchbuffer_free( struct intel_batchbuffer *batch ); - - -GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch ); - -void intel_batchbuffer_unmap( struct intel_batchbuffer *batch ); -GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch ); - - -/* Unlike bmBufferData, this currently requires the buffer be mapped. - * Consider it a convenience function wrapping multple - * intel_buffer_dword() calls. - */ -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, - GLuint bytes, - GLuint flags); - -void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, - GLuint bytes); - - -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static inline GLuint -intel_batchbuffer_space( struct intel_batchbuffer *batch ) -{ - return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset)); -} - - -static inline void -intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, - GLuint dword) -{ - assert(batch->map); - assert(intel_batchbuffer_space(batch) >= 4); - *(GLuint *)(batch->ptr) = dword; - batch->ptr += 4; -} - -static inline void -intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz, - GLuint flags) -{ - assert(sz < BATCH_SZ - 8); - if (intel_batchbuffer_space(batch) < sz || - (batch->flags != 0 && flags != 0 && batch->flags != flags)) - intel_batchbuffer_flush(batch); - - batch->flags |= flags; -} - -void intel_batchbuffer_align( struct intel_batchbuffer *batch, - GLuint align, - GLuint sz ); - - -/* Here are the crusty old macros, to be removed: - */ -#define BATCH_LOCALS -#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define ADVANCE_BATCH() do { } while(0) - - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index f88cbb2328d..dd6c8d17c28 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -1,617 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <errno.h> - -#include "mtypes.h" -#include "context.h" -#include "enums.h" -#include "vblank.h" - -#include "intel_reg.h" -#include "intel_batchbuffer.h" -#include "intel_context.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_structs.h" - -#include "bufmgr.h" - - - - -/* - * Copy the back buffer to the front buffer. - */ -void intelCopyBuffer( const __DRIdrawablePrivate *dPriv, - const drm_clip_rect_t *rect ) -{ - struct intel_context *intel; - GLboolean missed_target; - int64_t ust; - - DBG("%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; - intelFlush( &intel->ctx ); - - - bmFinishFenceLock(intel, intel->last_swap_fence); - - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets - * should work regardless. - */ - LOCK_HARDWARE( intel ); - - if (!rect) - { - UNLOCK_HARDWARE( intel ); - driWaitForVBlank( dPriv, &intel->vbl_seq, intel->vblank_flags, & missed_target ); - LOCK_HARDWARE( intel ); - } - - { - intelScreenPrivate *intelScreen = intel->intelScreen; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int nbox = dPriv->numClipRects; - drm_clip_rect_t *pbox = dPriv->pClipRects; - int cpp = intelScreen->cpp; - struct intel_region *src, *dst; - int BR13, CMD; - int i; - int src_pitch, dst_pitch; - - if (intel->sarea->pf_current_page == 0) { - dst = intel->front_region; - src = intel->back_region; - } - else { - assert(0); - src = intel->front_region; - dst = intel->back_region; - } - - src_pitch = src->pitch * src->cpp; - dst_pitch = dst->pitch * dst->cpp; - - if (cpp == 2) { - BR13 = (0xCC << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - } - else { - BR13 = (0xCC << 16) | (1<<24) | (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - } - - if (src->tiled) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - - if (dst->tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - for (i = 0 ; i < nbox; i++, pbox++) - { - drm_clip_rect_t tmp = *pbox; - - if (rect) { - if (!intel_intersect_cliprects(&tmp, &tmp, rect)) - continue; - } - - - if (tmp.x1 > tmp.x2 || - tmp.y1 > tmp.y2 || - tmp.x2 > intelScreen->width || - tmp.y2 > intelScreen->height) - continue; - - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (tmp.y1 << 16) | tmp.x1 ); - OUT_BATCH( (tmp.y2 << 16) | tmp.x2 ); - OUT_BATCH( bmBufferOffset(intel, dst->buffer) ); - OUT_BATCH( (tmp.y1 << 16) | tmp.x1 ); - OUT_BATCH( src_pitch ); - OUT_BATCH( bmBufferOffset(intel, src->buffer) ); - ADVANCE_BATCH(); - } - } - - intel_batchbuffer_flush( intel->batch ); - intel->second_last_swap_fence = intel->last_swap_fence; - intel->last_swap_fence = bmSetFenceLock( intel ); - UNLOCK_HARDWARE( intel ); - - if (!rect) - { - intel->swap_count++; - (*dri_interface->getUST)(&ust); - if (missed_target) { - intel->swap_missed_count++; - intel->swap_missed_ust = ust - intel->swap_ust; - } - - intel->swap_ust = ust; - } - -} - - - - -void intelEmitFillBlit( struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ) -{ - GLuint BR13, CMD; - BATCH_LOCALS; - - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (0xF0 << 16) | (1<<24); - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (1<<24) | (1<<25); - CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - break; - default: - return; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (y << 16) | x ); - OUT_BATCH( ((y+h) << 16) | (x+w) ); - OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset ); - OUT_BATCH( color ); - ADVANCE_BATCH(); -} - -static GLuint translate_raster_op(GLenum logicop) -{ - switch(logicop) { - case GL_CLEAR: return 0x00; - case GL_AND: return 0x88; - case GL_AND_REVERSE: return 0x44; - case GL_COPY: return 0xCC; - case GL_AND_INVERTED: return 0x22; - case GL_NOOP: return 0xAA; - case GL_XOR: return 0x66; - case GL_OR: return 0xEE; - case GL_NOR: return 0x11; - case GL_EQUIV: return 0x99; - case GL_INVERT: return 0x55; - case GL_OR_REVERSE: return 0xDD; - case GL_COPY_INVERTED: return 0x33; - case GL_OR_INVERTED: return 0xBB; - case GL_NAND: return 0x77; - case GL_SET: return 0xFF; - default: return 0; - } -} - - -/* Copy BitBlt - */ -void intelEmitCopyBlit( struct intel_context *intel, - GLuint cpp, - GLshort src_pitch, - struct buffer *src_buffer, - GLuint src_offset, - GLboolean src_tiled, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort src_x, GLshort src_y, - GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h, - GLenum logic_op ) -{ - GLuint CMD, BR13; - int dst_y2 = dst_y + h; - int dst_x2 = dst_x + w; - BATCH_LOCALS; - - - DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_x, src_y, - dst_buffer, dst_pitch, dst_x, dst_y, - w,h,logic_op); - - assert( logic_op - GL_CLEAR >= 0 ); - assert( logic_op - GL_CLEAR < 0x10 ); - - src_pitch *= cpp; - dst_pitch *= cpp; - - switch(cpp) { - case 1: - case 2: - case 3: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | - (1<<25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - break; - default: - return; - } - - if (src_tiled) { - CMD |= XY_SRC_TILED; - src_pitch /= 4; - } - - if (dst_tiled) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } - - if (dst_y2 < dst_y || - dst_x2 < dst_x) { - return; - } - - dst_pitch &= 0xffff; - src_pitch &= 0xffff; - - /* Initial y values don't seem to work with negative pitches. If - * we adjust the offsets manually (below), it seems to work fine. - * - * On the other hand, if we always adjust, the hardware doesn't - * know which blit directions to use, so overlapping copypixels get - * the wrong result. - */ - if (dst_pitch > 0 && src_pitch > 0) { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( dst_pitch | BR13 ); - OUT_BATCH( (dst_y << 16) | dst_x ); - OUT_BATCH( (dst_y2 << 16) | dst_x2 ); - OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset ); - OUT_BATCH( (src_y << 16) | src_x ); - OUT_BATCH( src_pitch ); - OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset ); - ADVANCE_BATCH(); - } - else { - BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( CMD ); - OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); - OUT_BATCH( (0 << 16) | dst_x ); - OUT_BATCH( (h << 16) | dst_x2 ); - OUT_BATCH( bmBufferOffset(intel, dst_buffer) + dst_offset + dst_y * dst_pitch ); - OUT_BATCH( (0 << 16) | src_x ); - OUT_BATCH( (src_pitch & 0xffff) ); - OUT_BATCH( bmBufferOffset(intel, src_buffer) + src_offset + src_y * src_pitch ); - ADVANCE_BATCH(); - } -} - - - -void intelClearWithBlit(GLcontext *ctx, GLbitfield flags) -{ - struct intel_context *intel = intel_context( ctx ); - intelScreenPrivate *intelScreen = intel->intelScreen; - GLuint clear_depth, clear_color; - GLint cx, cy, cw, ch; - GLint cpp = intelScreen->cpp; - GLboolean all; - GLint i; - struct intel_region *front = intel->front_region; - struct intel_region *back = intel->back_region; - struct intel_region *depth = intel->depth_region; - GLuint BR13, FRONT_CMD, BACK_CMD, DEPTH_CMD; - GLuint front_pitch; - GLuint back_pitch; - GLuint depth_pitch; - BATCH_LOCALS; - - - clear_color = intel->ClearColor; - clear_depth = 0; - - if (flags & BUFFER_BIT_DEPTH) { - clear_depth = (GLuint)(ctx->Depth.Clear * intel->ClearDepth); - } - - if (flags & BUFFER_BIT_STENCIL) { - clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; - } - - switch(cpp) { - case 2: - BR13 = (0xF0 << 16) | (1<<24); - BACK_CMD = FRONT_CMD = XY_COLOR_BLT_CMD; - DEPTH_CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | (1<<24) | (1<<25); - BACK_CMD = FRONT_CMD = (XY_COLOR_BLT_CMD | - XY_COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - DEPTH_CMD = XY_COLOR_BLT_CMD; - if (flags & BUFFER_BIT_DEPTH) DEPTH_CMD |= XY_COLOR_BLT_WRITE_RGB; - if (flags & BUFFER_BIT_STENCIL) DEPTH_CMD |= XY_COLOR_BLT_WRITE_ALPHA; - break; - default: - return; - } - - - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - { - /* get clear bounds after locking */ - cx = ctx->DrawBuffer->_Xmin; - cy = ctx->DrawBuffer->_Ymin; - ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; - cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; - all = (cw == ctx->DrawBuffer->Width && ch == ctx->DrawBuffer->Height); - - /* flip top to bottom */ - cy = intel->driDrawable->h - cy - ch; - cx = cx + intel->drawX; - cy += intel->drawY; - - /* adjust for page flipping */ - if ( intel->sarea->pf_current_page == 0 ) { - front = intel->front_region; - back = intel->back_region; - } - else { - back = intel->front_region; - front = intel->back_region; - } - - front_pitch = front->pitch * front->cpp; - back_pitch = back->pitch * back->cpp; - depth_pitch = depth->pitch * depth->cpp; - - if (front->tiled) { - FRONT_CMD |= XY_DST_TILED; - front_pitch /= 4; - } - - if (back->tiled) { - BACK_CMD |= XY_DST_TILED; - back_pitch /= 4; - } - - if (depth->tiled) { - DEPTH_CMD |= XY_DST_TILED; - depth_pitch /= 4; - } - - for (i = 0 ; i < intel->numClipRects ; i++) - { - drm_clip_rect_t *box = &intel->pClipRects[i]; - drm_clip_rect_t b; - - if (!all) { - GLint x = box->x1; - GLint y = box->y1; - GLint w = box->x2 - x; - GLint h = box->y2 - y; - - if (x < cx) w -= cx - x, x = cx; - if (y < cy) h -= cy - y, y = cy; - if (x + w > cx + cw) w = cx + cw - x; - if (y + h > cy + ch) h = cy + ch - y; - if (w <= 0) continue; - if (h <= 0) continue; - - b.x1 = x; - b.y1 = y; - b.x2 = x + w; - b.y2 = y + h; - } else { - b = *box; - } - - - if (b.x1 > b.x2 || - b.y1 > b.y2 || - b.x2 > intelScreen->width || - b.y2 > intelScreen->height) - continue; - - if ( flags & BUFFER_BIT_FRONT_LEFT ) { - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( FRONT_CMD ); - OUT_BATCH( front_pitch | BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( bmBufferOffset(intel, front->buffer) ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( flags & BUFFER_BIT_BACK_LEFT ) { - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( BACK_CMD ); - OUT_BATCH( back_pitch | BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( bmBufferOffset(intel, back->buffer) ); - OUT_BATCH( clear_color ); - ADVANCE_BATCH(); - } - - if ( flags & (BUFFER_BIT_STENCIL | BUFFER_BIT_DEPTH) ) { - BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); - OUT_BATCH( DEPTH_CMD ); - OUT_BATCH( depth_pitch | BR13 ); - OUT_BATCH( (b.y1 << 16) | b.x1 ); - OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_BATCH( bmBufferOffset(intel, depth->buffer) ); - OUT_BATCH( clear_depth ); - ADVANCE_BATCH(); - } - } - } - intel_batchbuffer_flush( intel->batch ); - UNLOCK_HARDWARE( intel ); -} - - - -#define BR13_565 0x1 -#define BR13_8888 0x3 - - -void -intelEmitImmediateColorExpandBlit(struct intel_context *intel, - GLuint cpp, - GLubyte *src_bits, GLuint src_size, - GLuint fg_color, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLenum logic_op) -{ - struct xy_setup_blit setup; - struct xy_text_immediate_blit text; - int dwords = ((src_size + 7) & ~7) / 4; - - assert( logic_op - GL_CLEAR >= 0 ); - assert( logic_op - GL_CLEAR < 0x10 ); - - if (w < 0 || h < 0) - return; - - dst_pitch *= cpp; - - if (dst_tiled) - dst_pitch /= 4; - - DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", - __FUNCTION__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); - - memset(&setup, 0, sizeof(setup)); - - setup.br0.client = CLIENT_2D; - setup.br0.opcode = OPCODE_XY_SETUP_BLT; - setup.br0.write_alpha = (cpp == 4); - setup.br0.write_rgb = (cpp == 4); - setup.br0.dst_tiled = dst_tiled; - setup.br0.length = (sizeof(setup) / sizeof(int)) - 2; - - setup.br13.dest_pitch = dst_pitch; - setup.br13.rop = translate_raster_op(logic_op); - setup.br13.color_depth = (cpp == 4) ? BR13_8888 : BR13_565; - setup.br13.clipping_enable = 0; - setup.br13.mono_source_transparency = 1; - - setup.dw2.clip_y1 = 0; - setup.dw2.clip_x1 = 0; - setup.dw3.clip_y2 = 100; - setup.dw3.clip_x2 = 100; - - setup.dest_base_addr = bmBufferOffset(intel, dst_buffer) + dst_offset; - setup.background_color = 0; - setup.foreground_color = fg_color; - setup.pattern_base_addr = 0; - - memset(&text, 0, sizeof(text)); - text.dw0.client = CLIENT_2D; - text.dw0.opcode = OPCODE_XY_TEXT_IMMEDIATE_BLT; - text.dw0.pad0 = 0; - text.dw0.byte_packed = 1; /* ?maybe? */ - text.dw0.pad1 = 0; - text.dw0.dst_tiled = dst_tiled; - text.dw0.pad2 = 0; - text.dw0.length = (sizeof(text)/sizeof(int)) - 2 + dwords; - text.dw1.dest_y1 = y; /* duplicates info in setup blit */ - text.dw1.dest_x1 = x; - text.dw2.dest_y2 = y + h; - text.dw2.dest_x2 = x + w; - - intel_batchbuffer_require_space( intel->batch, - sizeof(setup) + - sizeof(text) + - dwords, - INTEL_BATCH_NO_CLIPRECTS ); - - intel_batchbuffer_data( intel->batch, - &setup, - sizeof(setup), - INTEL_BATCH_NO_CLIPRECTS ); - - intel_batchbuffer_data( intel->batch, - &text, - sizeof(text), - INTEL_BATCH_NO_CLIPRECTS ); - - intel_batchbuffer_data( intel->batch, - src_bits, - dwords * 4, - INTEL_BATCH_NO_CLIPRECTS ); -} - +../intel/intel_blit.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h deleted file mode 100644 index e361545c8fa..00000000000 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ /dev/null @@ -1,78 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BLIT_H -#define INTEL_BLIT_H - -#include "intel_context.h" -#include "intel_ioctl.h" - -struct buffer; - -extern void intelCopyBuffer( const __DRIdrawablePrivate *dpriv, - const drm_clip_rect_t *rect ); -extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask); - -extern void intelEmitCopyBlit( struct intel_context *intel, - GLuint cpp, - GLshort src_pitch, - struct buffer *src_buffer, - GLuint src_offset, - GLboolean src_tiled, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort srcx, GLshort srcy, - GLshort dstx, GLshort dsty, - GLshort w, GLshort h, - GLenum logic_op ); - -extern void intelEmitFillBlit( struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color ); - -void -intelEmitImmediateColorExpandBlit(struct intel_context *intel, - GLuint cpp, - GLubyte *src_bits, GLuint src_size, - GLuint fg_color, - GLshort dst_pitch, - struct buffer *dst_buffer, - GLuint dst_offset, - GLboolean dst_tiled, - GLshort dst_x, GLshort dst_y, - GLshort w, GLshort h, - GLenum logic_op ); - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 015e433fd7a..e06dd3c8d3c 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -1,207 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "imports.h" -#include "mtypes.h" -#include "bufferobj.h" - -#include "intel_context.h" -#include "intel_buffer_objects.h" -#include "bufmgr.h" - - -/** - * There is some duplication between mesa's bufferobjects and our - * bufmgr buffers. Both have an integer handle and a hashtable to - * lookup an opaque structure. It would be nice if the handles and - * internal structure where somehow shared. - */ -static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, - GLuint name, - GLenum target ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object); - - _mesa_initialize_buffer_object(&obj->Base, name, target); - - /* XXX: We generate our own handle, which is different to 'name' above. - */ - bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6); - assert(obj->buffer); - - return &obj->Base; -} - - -/** - * Deallocate/free a vertex/pixel buffer object. - * Called via glDeleteBuffersARB(). - */ -static void intel_bufferobj_free( GLcontext *ctx, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - - if (intel_obj->buffer) - bmDeleteBuffers( intel, 1, &intel_obj->buffer ); - - _mesa_free(intel_obj); -} - - - -/** - * Allocate space for and store data in a buffer object. Any data that was - * previously stored in the buffer object is lost. If data is NULL, - * memory will be allocated, but no copy will occur. - * Called via glBufferDataARB(). - */ -static void intel_bufferobj_data( GLcontext *ctx, - GLenum target, - GLsizeiptrARB size, - const GLvoid *data, - GLenum usage, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - /* XXX: do something useful with 'usage' (eg. populate flags - * argument below) - */ - assert(intel_obj); - - obj->Size = size; - obj->Usage = usage; - - bmBufferDataAUB(intel, intel_obj->buffer, size, data, 0, - 0, 0); -} - - -/** - * Replace data in a subrange of buffer object. If the data range - * specified by size + offset extends beyond the end of the buffer or - * if data is NULL, no copy is performed. - * Called via glBufferSubDataARB(). - */ -static void intel_bufferobj_subdata( GLcontext *ctx, - GLenum target, - GLintptrARB offset, - GLsizeiptrARB size, - const GLvoid * data, - struct gl_buffer_object * obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - bmBufferSubDataAUB(intel, intel_obj->buffer, offset, size, data, 0, 0); -} - - -/** - * Called via glGetBufferSubDataARB(). - */ -static void intel_bufferobj_get_subdata( GLcontext *ctx, - GLenum target, - GLintptrARB offset, - GLsizeiptrARB size, - GLvoid * data, - struct gl_buffer_object * obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data); -} - - - -/** - * Called via glMapBufferARB(). - */ -static void *intel_bufferobj_map( GLcontext *ctx, - GLenum target, - GLenum access, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - /* XXX: Translate access to flags arg below: - */ - assert(intel_obj); - assert(intel_obj->buffer); - obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0); - return obj->Pointer; -} - - -/** - * Called via glMapBufferARB(). - */ -static GLboolean intel_bufferobj_unmap( GLcontext *ctx, - GLenum target, - struct gl_buffer_object *obj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - - assert(intel_obj); - assert(intel_obj->buffer); - assert(obj->Pointer); - bmUnmapBufferAUB(intel, intel_obj->buffer, 0, 0); - obj->Pointer = NULL; - return GL_TRUE; -} - -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj ) -{ - assert(intel_obj->Base.Name); - assert(intel_obj->buffer); - return intel_obj->buffer; -} - -void intel_bufferobj_init( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - - ctx->Driver.NewBufferObject = intel_bufferobj_alloc; - ctx->Driver.DeleteBuffer = intel_bufferobj_free; - ctx->Driver.BufferData = intel_bufferobj_data; - ctx->Driver.BufferSubData = intel_bufferobj_subdata; - ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata; - ctx->Driver.MapBuffer = intel_bufferobj_map; - ctx->Driver.UnmapBuffer = intel_bufferobj_unmap; -} +../intel/intel_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h deleted file mode 100644 index 4b38803e576..00000000000 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h +++ /dev/null @@ -1,70 +0,0 @@ - /************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_BUFFEROBJ_H -#define INTEL_BUFFEROBJ_H - -#include "mtypes.h" - -struct intel_context; -struct gl_buffer_object; - - -/** - * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object. - */ -struct intel_buffer_object { - struct gl_buffer_object Base; - struct buffer *buffer; /* the low-level buffer manager's buffer handle */ -}; - - -/* Get the bm buffer associated with a GL bufferobject: - */ -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj ); - -/* Hook the bufferobject implementation into mesa: - */ -void intel_bufferobj_init( struct intel_context *intel ); - - - -/* Are the obj->Name tests necessary? Unfortunately yes, mesa - * allocates a couple of gl_buffer_object structs statically, and - * the Name == 0 test is the only way to identify them and avoid - * casting them erroneously to our structs. - */ -static inline struct intel_buffer_object * -intel_buffer_object( struct gl_buffer_object *obj ) -{ - if (obj->Name) - return (struct intel_buffer_object *)obj; - else - return NULL; -} - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c index d155c039d77..c86daa49f47 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_buffers.c +++ b/src/mesa/drivers/dri/i965/intel_buffers.c @@ -1,547 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_batchbuffer.h" -#include "context.h" -#include "framebuffer.h" -#include "macros.h" -#include "swrast/swrast.h" - -GLboolean intel_intersect_cliprects( drm_clip_rect_t *dst, - const drm_clip_rect_t *a, - const drm_clip_rect_t *b ) -{ - dst->x1 = MAX2(a->x1, b->x1); - dst->x2 = MIN2(a->x2, b->x2); - dst->y1 = MAX2(a->y1, b->y1); - dst->y2 = MIN2(a->y2, b->y2); - - return (dst->x1 <= dst->x2 && - dst->y1 <= dst->y2); -} - -struct intel_region *intel_drawbuf_region( struct intel_context *intel ) -{ - switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) { - case BUFFER_BIT_FRONT_LEFT: - return intel->front_region; - case BUFFER_BIT_BACK_LEFT: - return intel->back_region; - default: - /* Not necessary to fallback - could handle either NONE or - * FRONT_AND_BACK cases below. - */ - return NULL; - } -} - -struct intel_region *intel_readbuf_region( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - - /* This will have to change to support EXT_fbo's, but is correct - * for now: - */ - switch (ctx->ReadBuffer->_ColorReadBufferIndex) { - case BUFFER_FRONT_LEFT: - return intel->front_region; - case BUFFER_BACK_LEFT: - return intel->back_region; - default: - assert(0); - return NULL; - } -} - - - -static void intelBufferSize(GLframebuffer *buffer, - GLuint *width, - GLuint *height) -{ - GET_CURRENT_CONTEXT(ctx); - struct intel_context *intel = intel_context(ctx); - /* Need to lock to make sure the driDrawable is uptodate. This - * information is used to resize Mesa's software buffers, so it has - * to be correct. - */ - LOCK_HARDWARE(intel); - if (intel->driDrawable) { - *width = intel->driDrawable->w; - *height = intel->driDrawable->h; - } - else { - *width = 0; - *height = 0; - } - UNLOCK_HARDWARE(intel); -} - - -static void intelSetFrontClipRects( struct intel_context *intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) return; - - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; -} - - -static void intelSetBackClipRects( struct intel_context *intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!dPriv) return; - - if (intel->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0) { - intel->numClipRects = dPriv->numClipRects; - intel->pClipRects = dPriv->pClipRects; - intel->drawX = dPriv->x; - intel->drawY = dPriv->y; - } else { - intel->numClipRects = dPriv->numBackClipRects; - intel->pClipRects = dPriv->pBackClipRects; - intel->drawX = dPriv->backX; - intel->drawY = dPriv->backY; - - if (dPriv->numBackClipRects == 1 && - dPriv->x == dPriv->backX && - dPriv->y == dPriv->backY) { - - /* Repeat the calculation of the back cliprect dimensions here - * as early versions of dri.a in the Xserver are incorrect. Try - * very hard not to restrict future versions of dri.a which - * might eg. allocate truly private back buffers. - */ - int x1, y1; - int x2, y2; - - x1 = dPriv->x; - y1 = dPriv->y; - x2 = dPriv->x + dPriv->w; - y2 = dPriv->y + dPriv->h; - - if (x1 < 0) x1 = 0; - if (y1 < 0) y1 = 0; - if (x2 > intel->intelScreen->width) x2 = intel->intelScreen->width; - if (y2 > intel->intelScreen->height) y2 = intel->intelScreen->height; - - if (x1 == dPriv->pBackClipRects[0].x1 && - y1 == dPriv->pBackClipRects[0].y1) { - - dPriv->pBackClipRects[0].x2 = x2; - dPriv->pBackClipRects[0].y2 = y2; - } - } - } -} - - -void intelWindowMoved( struct intel_context *intel ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - if (!intel->ctx.DrawBuffer) { - intelSetFrontClipRects( intel ); - } - else { - switch (intel->ctx.DrawBuffer->_ColorDrawBufferMask[0]) { - case BUFFER_BIT_FRONT_LEFT: - intelSetFrontClipRects( intel ); - break; - case BUFFER_BIT_BACK_LEFT: - intelSetBackClipRects( intel ); - break; - default: - /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */ - intelSetFrontClipRects( intel ); - } - } - - _mesa_resize_framebuffer(&intel->ctx, - (GLframebuffer*)dPriv->driverPrivate, - dPriv->w, dPriv->h); - - /* Set state we know depends on drawable parameters: - */ - { - GLcontext *ctx = &intel->ctx; - - if (ctx->Driver.Scissor) - ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height ); - - if (ctx->Driver.DepthRange) - ctx->Driver.DepthRange( ctx, - ctx->Viewport.Near, - ctx->Viewport.Far ); - - intel->NewGLState |= _NEW_SCISSOR; - } - - /* This works because the lock is always grabbed before emitting - * commands and commands are always flushed prior to releasing - * the lock. - */ - intel->NewGLState |= _NEW_WINDOW_POS; -} - - - -/* A true meta version of this would be very simple and additionally - * machine independent. Maybe we'll get there one day. - */ -static void intelClearWithTris(struct intel_context *intel, - GLbitfield mask) -{ - GLcontext *ctx = &intel->ctx; - drm_clip_rect_t clear; - GLint cx, cy, cw, ch; - - if (INTEL_DEBUG & DEBUG_DRI) - _mesa_printf("%s %x\n", __FUNCTION__, mask); - - { - - intel->vtbl.install_meta_state(intel); - - /* Get clear bounds after locking */ - cx = ctx->DrawBuffer->_Xmin; - cy = ctx->DrawBuffer->_Ymin; - cw = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; - ch = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; - - clear.x1 = cx; - clear.y1 = cy; - clear.x2 = cx + cw; - clear.y2 = cy + ch; - - /* Back and stencil cliprects are the same. Try and do both - * buffers at once: - */ - if (mask & (BUFFER_BIT_BACK_LEFT|BUFFER_BIT_STENCIL|BUFFER_BIT_DEPTH)) { - intel->vtbl.meta_draw_region(intel, - intel->back_region, - intel->depth_region ); - - if (mask & BUFFER_BIT_BACK_LEFT) - intel->vtbl.meta_color_mask(intel, GL_TRUE ); - else - intel->vtbl.meta_color_mask(intel, GL_FALSE ); - - if (mask & BUFFER_BIT_STENCIL) - intel->vtbl.meta_stencil_replace( intel, - intel->ctx.Stencil.WriteMask[0], - intel->ctx.Stencil.Clear); - else - intel->vtbl.meta_no_stencil_write(intel); - - if (mask & BUFFER_BIT_DEPTH) - intel->vtbl.meta_depth_replace( intel ); - else - intel->vtbl.meta_no_depth_write(intel); - - /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the - * drawing origin may not be correctly emitted. - */ - intel->vtbl.meta_draw_quad(intel, - clear.x1, clear.x2, - clear.y1, clear.y2, - intel->ctx.Depth.Clear, - intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3], - 0, 0, 0, 0); - } - - /* Front may have different cliprects: - */ - if (mask & BUFFER_BIT_FRONT_LEFT) { - intel->vtbl.meta_no_depth_write(intel); - intel->vtbl.meta_no_stencil_write(intel); - intel->vtbl.meta_color_mask(intel, GL_TRUE ); - intel->vtbl.meta_draw_region(intel, - intel->front_region, - intel->depth_region); - - /* XXX: Using INTEL_BATCH_NO_CLIPRECTS here is dangerous as the - * drawing origin may not be correctly emitted. - */ - intel->vtbl.meta_draw_quad(intel, - clear.x1, clear.x2, - clear.y1, clear.y2, - 0, - intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3], - 0, 0, 0, 0); - } - - intel->vtbl.leave_meta_state( intel ); - } -} - - - - - -static void intelClear(GLcontext *ctx, GLbitfield mask) -{ - struct intel_context *intel = intel_context( ctx ); - const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask); - GLbitfield tri_mask = 0; - GLbitfield blit_mask = 0; - GLbitfield swrast_mask = 0; - - if (INTEL_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s %x\n", __FUNCTION__, mask); - - - if (mask & BUFFER_BIT_FRONT_LEFT) { - if (colorMask == ~0) { - blit_mask |= BUFFER_BIT_FRONT_LEFT; - } - else { - tri_mask |= BUFFER_BIT_FRONT_LEFT; - } - } - - if (mask & BUFFER_BIT_BACK_LEFT) { - if (colorMask == ~0) { - blit_mask |= BUFFER_BIT_BACK_LEFT; - } - else { - tri_mask |= BUFFER_BIT_BACK_LEFT; - } - } - - - if (mask & BUFFER_BIT_STENCIL) { - if (!intel->hw_stencil) { - swrast_mask |= BUFFER_BIT_STENCIL; - } - else if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff || - intel->depth_region->tiled) { - tri_mask |= BUFFER_BIT_STENCIL; - } - else { - blit_mask |= BUFFER_BIT_STENCIL; - } - } - - /* Do depth with stencil if possible to avoid 2nd pass over the - * same buffer. - */ - if (mask & BUFFER_BIT_DEPTH) { - if ((tri_mask & BUFFER_BIT_STENCIL) || - intel->depth_region->tiled) - tri_mask |= BUFFER_BIT_DEPTH; - else - blit_mask |= BUFFER_BIT_DEPTH; - } - - swrast_mask |= (mask & BUFFER_BIT_ACCUM); - - intelFlush( ctx ); - - if (blit_mask) - intelClearWithBlit( ctx, blit_mask ); - - if (tri_mask) - intelClearWithTris( intel, tri_mask ); - - if (swrast_mask) - _swrast_Clear( ctx, swrast_mask ); -} - - - - - - - -/* Flip the front & back buffers - */ -static void intelPageFlip( const __DRIdrawablePrivate *dPriv ) -{ -#if 0 - struct intel_context *intel; - int tmp, ret; - - if (INTEL_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; - - intelFlush( &intel->ctx ); - LOCK_HARDWARE( intel ); - - if (dPriv->pClipRects) { - *(drm_clip_rect_t *)intel->sarea->boxes = dPriv->pClipRects[0]; - intel->sarea->nbox = 1; - } - - ret = drmCommandNone(intel->driFd, DRM_I830_FLIP); - if (ret) { - fprintf(stderr, "%s: %d\n", __FUNCTION__, ret); - UNLOCK_HARDWARE( intel ); - exit(1); - } - - tmp = intel->sarea->last_enqueue; - intelRefillBatchLocked( intel ); - UNLOCK_HARDWARE( intel ); - - - intelSetDrawBuffer( &intel->ctx, intel->ctx.Color.DriverDrawBuffer ); -#endif -} - - -void intelSwapBuffers( __DRIdrawablePrivate *dPriv ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - struct intel_context *intel; - GLcontext *ctx; - intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate; - ctx = &intel->ctx; - if (ctx->Visual.doubleBufferMode) { - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - if ( 0 /*intel->doPageFlip*/ ) { /* doPageFlip is never set !!! */ - intelPageFlip( dPriv ); - } else { - intelCopyBuffer( dPriv, NULL ); - } - if (intel->aub_file) { - intelFlush(ctx); - intel->vtbl.aub_dump_bmp( intel, 1 ); - - intel->aub_wrap = 1; - } - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - -void intelCopySubBuffer( __DRIdrawablePrivate *dPriv, - int x, int y, int w, int h ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - struct intel_context *intel = dPriv->driContextPriv->driverPrivate; - GLcontext *ctx = &intel->ctx; - - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - intelCopyBuffer( dPriv, &rect ); - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - fprintf(stderr, "%s: drawable has no context!\n", __FUNCTION__); - } -} - - -static void intelDrawBuffer(GLcontext *ctx, GLenum mode ) -{ - struct intel_context *intel = intel_context(ctx); - int front = 0; - - if (!ctx->DrawBuffer) - return; - - switch ( ctx->DrawBuffer->_ColorDrawBufferMask[0] ) { - case BUFFER_BIT_FRONT_LEFT: - front = 1; - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - case BUFFER_BIT_BACK_LEFT: - front = 0; - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - default: - FALLBACK( intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - if ( intel->sarea->pf_current_page == 1 ) - front ^= 1; - - intelSetFrontClipRects( intel ); - - - if (front) { - if (intel->draw_region != intel->front_region) { - intel_region_release(intel, &intel->draw_region); - intel_region_reference(&intel->draw_region, intel->front_region); - } - } else { - if (intel->draw_region != intel->back_region) { - intel_region_release(intel, &intel->draw_region); - intel_region_reference(&intel->draw_region, intel->back_region); - } - } - - intel->vtbl.set_draw_region( intel, - intel->draw_region, - intel->depth_region); -} - -static void intelReadBuffer( GLcontext *ctx, GLenum mode ) -{ - /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ -} - - - -void intelInitBufferFuncs( struct dd_function_table *functions ) -{ - functions->Clear = intelClear; - functions->GetBufferSize = intelBufferSize; - functions->DrawBuffer = intelDrawBuffer; - functions->ReadBuffer = intelReadBuffer; -} +../intel/intel_buffers.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c index 4f51fefe0f6..27a1cbb255e 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_context.c +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -1,712 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "glheader.h" -#include "context.h" -#include "matrix.h" -#include "simple_list.h" -#include "extensions.h" -#include "framebuffer.h" -#include "imports.h" -#include "points.h" - -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" -#include "tnl/tnl.h" -#include "vbo/vbo.h" - -#include "tnl/t_pipeline.h" -#include "tnl/t_vertex.h" - -#include "drivers/common/driverfuncs.h" - -#include "intel_screen.h" - -#include "i830_dri.h" -#include "i830_common.h" - -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_buffer_objects.h" - -#include "bufmgr.h" - -#include "utils.h" -#include "vblank.h" -#ifndef INTEL_DEBUG -int INTEL_DEBUG = (0); -#endif - -#define need_GL_ARB_multisample -#define need_GL_ARB_point_parameters -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object -#define need_GL_ARB_vertex_program -#define need_GL_ARB_window_pos -#define need_GL_ARB_occlusion_query -#define need_GL_EXT_blend_color -#define need_GL_EXT_blend_equation_separate -#define need_GL_EXT_blend_func_separate -#define need_GL_EXT_blend_minmax -#define need_GL_EXT_cull_vertex -#define need_GL_EXT_fog_coord -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_secondary_color -#include "extension_helper.h" - -#ifndef VERBOSE -int VERBOSE = 0; -#endif - -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -#define DRIVER_VERSION "4.1.3002" - -static const GLubyte *intelGetString( GLcontext *ctx, GLenum name ) -{ - const char * chipset; - static char buffer[128]; - - switch (name) { - case GL_VENDOR: - return (GLubyte *)"Tungsten Graphics, Inc"; - break; - - case GL_RENDERER: - switch (intel_context(ctx)->intelScreen->deviceID) { - case PCI_CHIP_I965_Q: - chipset = "Intel(R) 965Q"; - break; - case PCI_CHIP_I965_G: - case PCI_CHIP_I965_G_1: - chipset = "Intel(R) 965G"; - break; - case PCI_CHIP_I946_GZ: - chipset = "Intel(R) 946GZ"; - break; - case PCI_CHIP_I965_GM: - chipset = "Intel(R) 965GM"; - break; - case PCI_CHIP_I965_GME: - chipset = "Intel(R) 965GME/GLE"; - break; - default: - chipset = "Unknown Intel Chipset"; - } - - (void) driGetRendererString( buffer, chipset, DRIVER_VERSION, 0 ); - return (GLubyte *) buffer; - - default: - return NULL; - } -} - - -/** - * Extension strings exported by the intel driver. - * - * \note - * It appears that ARB_texture_env_crossbar has "disappeared" compared to the - * old i830-specific driver. - */ -const struct dri_extension card_extensions[] = -{ - { "GL_ARB_multisample", GL_ARB_multisample_functions }, - { "GL_ARB_multitexture", NULL }, - { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, - { "GL_ARB_texture_border_clamp", NULL }, - { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, - { "GL_ARB_texture_cube_map", NULL }, - { "GL_ARB_texture_env_add", NULL }, - { "GL_ARB_texture_env_combine", NULL }, - { "GL_ARB_texture_env_dot3", NULL }, - { "GL_ARB_texture_mirrored_repeat", NULL }, - { "GL_ARB_texture_non_power_of_two", NULL }, - { "GL_ARB_texture_rectangle", NULL }, - { "GL_NV_texture_rectangle", NULL }, - { "GL_EXT_texture_rectangle", NULL }, - { "GL_ARB_texture_rectangle", NULL }, - { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, - { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, - { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, - { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, - { "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions }, - { "GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions }, - { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, - { "GL_EXT_blend_logic_op", NULL }, - { "GL_EXT_blend_subtract", NULL }, - { "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions }, - { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, - { "GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions }, - { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, - { "GL_EXT_stencil_wrap", NULL }, - { "GL_EXT_texture_edge_clamp", NULL }, - { "GL_EXT_texture_env_combine", NULL }, - { "GL_EXT_texture_env_dot3", NULL }, - { "GL_EXT_texture_filter_anisotropic", NULL }, - { "GL_EXT_texture_lod_bias", NULL }, - { "GL_3DFX_texture_compression_FXT1", NULL }, - { "GL_APPLE_client_storage", NULL }, - { "GL_MESA_pack_invert", NULL }, - { "GL_MESA_ycbcr_texture", NULL }, - { "GL_NV_blend_square", NULL }, - { "GL_SGIS_generate_mipmap", NULL }, - { NULL, NULL } -}; - -const struct dri_extension arb_oc_extension = - { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}; - -void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging) -{ - struct intel_context *intel = ctx?intel_context(ctx):NULL; - driInitExtensions(ctx, card_extensions, enable_imaging); - if (!ctx || intel->intelScreen->drmMinor >= 8) - driInitSingleExtension (ctx, &arb_oc_extension); -} - -static const struct dri_debug_control debug_control[] = -{ - { "fall", DEBUG_FALLBACKS }, - { "tex", DEBUG_TEXTURE }, - { "ioctl", DEBUG_IOCTL }, - { "prim", DEBUG_PRIMS }, - { "vert", DEBUG_VERTS }, - { "state", DEBUG_STATE }, - { "verb", DEBUG_VERBOSE }, - { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, - { "san", DEBUG_SANITY }, - { "sync", DEBUG_SYNC }, - { "sleep", DEBUG_SLEEP }, - { "pix", DEBUG_PIXEL }, - { "buf", DEBUG_BUFMGR }, - { "stats", DEBUG_STATS }, - { "tile", DEBUG_TILE }, - { "sing", DEBUG_SINGLE_THREAD }, - { "thre", DEBUG_SINGLE_THREAD }, - { "wm", DEBUG_WM }, - { "vs", DEBUG_VS }, - { NULL, 0 } -}; - - -static void intelInvalidateState( GLcontext *ctx, GLuint new_state ) -{ - struct intel_context *intel = intel_context(ctx); - - _swrast_InvalidateState( ctx, new_state ); - _swsetup_InvalidateState( ctx, new_state ); - _vbo_InvalidateState( ctx, new_state ); - _tnl_InvalidateState( ctx, new_state ); - _tnl_invalidate_vertex_state( ctx, new_state ); - - intel->NewGLState |= new_state; - - if (intel->vtbl.invalidate_state) - intel->vtbl.invalidate_state( intel, new_state ); -} - - -void intelFlush( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context( ctx ); - - bmLockAndFence(intel); -} - -void intelFinish( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context( ctx ); - - bmFinishFence(intel, bmLockAndFence(intel)); -} - -static void -intelBeginQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) -{ - struct intel_context *intel = intel_context( ctx ); - drmI830MMIO io = { - .read_write = MMIO_READ, - .reg = MMIO_REGS_PS_DEPTH_COUNT, - .data = &q->Result - }; - intel->stats_wm++; - intelFinish(&intel->ctx); - drmCommandRead(intel->driFd, DRM_I830_MMIO, &io, sizeof(io)); -} - -static void -intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q) -{ - struct intel_context *intel = intel_context( ctx ); - GLuint64EXT tmp; - drmI830MMIO io = { - .read_write = MMIO_READ, - .reg = MMIO_REGS_PS_DEPTH_COUNT, - .data = &tmp - }; - intelFinish(&intel->ctx); - drmCommandRead(intel->driFd, DRM_I830_MMIO, &io, sizeof(io)); - q->Result = tmp - q->Result; - q->Ready = GL_TRUE; - intel->stats_wm--; -} - - -void intelInitDriverFunctions( struct dd_function_table *functions ) -{ - _mesa_init_driver_functions( functions ); - - functions->Flush = intelFlush; - functions->Finish = intelFinish; - functions->GetString = intelGetString; - functions->UpdateState = intelInvalidateState; - functions->BeginQuery = intelBeginQuery; - functions->EndQuery = intelEndQuery; - - /* CopyPixels can be accelerated even with the current memory - * manager: - */ - if (!getenv("INTEL_NO_BLIT")) { - functions->CopyPixels = intelCopyPixels; - functions->Bitmap = intelBitmap; - } - - intelInitTextureFuncs( functions ); - intelInitStateFuncs( functions ); - intelInitBufferFuncs( functions ); -} - - - -GLboolean intelInitContext( struct intel_context *intel, - const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate, - struct dd_function_table *functions ) -{ - GLcontext *ctx = &intel->ctx; - GLcontext *shareCtx = (GLcontext *) sharedContextPrivate; - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - volatile drmI830Sarea *saPriv = (volatile drmI830Sarea *) - (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); - - if (!_mesa_initialize_context(&intel->ctx, - mesaVis, shareCtx, - functions, - (void*) intel)) { - _mesa_printf("%s: failed to init mesa context\n", __FUNCTION__); - return GL_FALSE; - } - - driContextPriv->driverPrivate = intel; - intel->intelScreen = intelScreen; - intel->driScreen = sPriv; - intel->sarea = saPriv; - - driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache, - intel->driScreen->myNum, "i965"); - - intel->vblank_flags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intel->optionCache) : VBLANK_FLAG_NO_IRQ; - - ctx->Const.MaxTextureMaxAnisotropy = 2.0; - - if (getenv("INTEL_STRICT_CONFORMANCE")) { - intel->strict_conformance = 1; - } - - if (intel->strict_conformance) { - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 1.0; - ctx->Const.MaxLineWidthAA = 1.0; - ctx->Const.LineWidthGranularity = 1.0; - } - else { - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = 5.0; - ctx->Const.MaxLineWidthAA = 5.0; - ctx->Const.LineWidthGranularity = 0.5; - } - - ctx->Const.MinPointSize = 1.0; - ctx->Const.MinPointSizeAA = 1.0; - ctx->Const.MaxPointSize = 255.0; - ctx->Const.MaxPointSizeAA = 3.0; - ctx->Const.PointSizeGranularity = 1.0; - - /* reinitialize the context point state. - * It depend on constants in __GLcontextRec::Const - */ - _mesa_init_point(ctx); - - /* Initialize the software rasterizer and helper modules. */ - _swrast_CreateContext( ctx ); - _vbo_CreateContext( ctx ); - _tnl_CreateContext( ctx ); - _swsetup_CreateContext( ctx ); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - - /* Configure swrast to match hardware characteristics: */ - _swrast_allow_pixel_fog( ctx, GL_FALSE ); - _swrast_allow_vertex_fog( ctx, GL_TRUE ); - - /* Dri stuff */ - intel->hHWContext = driContextPriv->hHWContext; - intel->driFd = sPriv->fd; - intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock; - - intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; - intel->hw_stipple = 1; - - switch(mesaVis->depthBits) { - case 0: /* what to do in this case? */ - case 16: - intel->depth_scale = 1.0/0xffff; - intel->polygon_offset_scale = 1.0/0xffff; - intel->depth_clear_mask = ~0; - intel->ClearDepth = 0xffff; - break; - case 24: - intel->depth_scale = 1.0/0xffffff; - intel->polygon_offset_scale = 2.0/0xffffff; /* req'd to pass glean */ - intel->depth_clear_mask = 0x00ffffff; - intel->stencil_clear_mask = 0xff000000; - intel->ClearDepth = 0x00ffffff; - break; - default: - assert(0); - break; - } - - /* Initialize swrast, tnl driver tables: */ - intelInitSpanFuncs( ctx ); - - intel->no_hw = getenv("INTEL_NO_HW") != NULL; - - if (!intel->intelScreen->irq_active) { - _mesa_printf("IRQs not active. Exiting\n"); - exit(1); - } - intelInitExtensions(ctx, GL_TRUE); - - INTEL_DEBUG = driParseDebugString( getenv( "INTEL_DEBUG" ), - debug_control ); - - - /* Buffer manager: - */ - intel->bm = bm_fake_intel_Attach( intel ); - - - bmInitPool(intel, - intel->intelScreen->tex.offset, /* low offset */ - intel->intelScreen->tex.map, /* low virtual */ - intel->intelScreen->tex.size, - BM_MEM_AGP); - - /* These are still static, but create regions for them. - */ - intel->front_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->front.offset, - intelScreen->front.map, - intelScreen->cpp, - intelScreen->front.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->front.size, - intelScreen->front.tiled != 0); - - intel->back_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->back.offset, - intelScreen->back.map, - intelScreen->cpp, - intelScreen->back.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->back.size, - intelScreen->back.tiled != 0); - - /* Still assuming front.cpp == depth.cpp - * - * XXX: Setting tiling to false because Depth tiling only supports - * YMAJOR but the blitter only supports XMAJOR tiling. Have to - * resolve later. - */ - intel->depth_region = - intel_region_create_static(intel, - BM_MEM_AGP, - intelScreen->depth.offset, - intelScreen->depth.map, - intelScreen->cpp, - intelScreen->depth.pitch / intelScreen->cpp, - intelScreen->height, - intelScreen->depth.size, - intelScreen->depth.tiled != 0); - - intel_bufferobj_init( intel ); - intel->batch = intel_batchbuffer_alloc( intel ); - - if (intel->ctx.Mesa_DXTn) { - _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); - _mesa_enable_extension( ctx, "GL_S3_s3tc" ); - } - else if (driQueryOptionb (&intelScreen->optionCache, "force_s3tc_enable")) { - _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); - } - -/* driInitTextureObjects( ctx, & intel->swapped, */ -/* DRI_TEXMGR_DO_TEXTURE_1D | */ -/* DRI_TEXMGR_DO_TEXTURE_2D | */ -/* DRI_TEXMGR_DO_TEXTURE_RECT ); */ - - - if (getenv("INTEL_NO_RAST")) { - fprintf(stderr, "disabling 3D rasterization\n"); - intel->no_rast = 1; - } - - - return GL_TRUE; -} - -void intelDestroyContext(__DRIcontextPrivate *driContextPriv) -{ - struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; - - assert(intel); /* should never be null */ - if (intel) { - GLboolean release_texture_heaps; - - - intel->vtbl.destroy( intel ); - - release_texture_heaps = (intel->ctx.Shared->RefCount == 1); - _swsetup_DestroyContext (&intel->ctx); - _tnl_DestroyContext (&intel->ctx); - _vbo_DestroyContext (&intel->ctx); - - _swrast_DestroyContext (&intel->ctx); - intel->Fallback = 0; /* don't call _swrast_Flush later */ - intel_batchbuffer_free(intel->batch); - intel->batch = NULL; - - - if ( release_texture_heaps ) { - /* This share group is about to go away, free our private - * texture object data. - */ - - /* XXX: destroy the shared bufmgr struct here? - */ - } - - /* Free the regions created to describe front/back/depth - * buffers: - */ -#if 0 - intel_region_release(intel, &intel->front_region); - intel_region_release(intel, &intel->back_region); - intel_region_release(intel, &intel->depth_region); - intel_region_release(intel, &intel->draw_region); -#endif - - /* free the Mesa context */ - _mesa_destroy_context(&intel->ctx); - } - - driContextPriv->driverPrivate = NULL; -} - -GLboolean intelUnbindContext(__DRIcontextPrivate *driContextPriv) -{ - return GL_TRUE; -} - -GLboolean intelMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv) -{ - - if (driContextPriv) { - struct intel_context *intel = (struct intel_context *) driContextPriv->driverPrivate; - - if ( intel->driDrawable != driDrawPriv ) { - /* Shouldn't the readbuffer be stored also? */ - driDrawableInitVBlank( driDrawPriv, intel->vblank_flags, - &intel->vbl_seq ); - - intel->driDrawable = driDrawPriv; - intelWindowMoved( intel ); - } - - _mesa_make_current(&intel->ctx, - (GLframebuffer *) driDrawPriv->driverPrivate, - (GLframebuffer *) driReadPriv->driverPrivate); - - intel->ctx.Driver.DrawBuffer( &intel->ctx, intel->ctx.Color.DrawBuffer[0] ); - } else { - _mesa_make_current(NULL, NULL, NULL); - } - - return GL_TRUE; -} - - -static void intelContendedLock( struct intel_context *intel, GLuint flags ) -{ - __DRIdrawablePrivate *dPriv = intel->driDrawable; - __DRIscreenPrivate *sPriv = intel->driScreen; - volatile drmI830Sarea * sarea = intel->sarea; - int me = intel->hHWContext; - int my_bufmgr = bmCtxId(intel); - - drmGetLock(intel->driFd, intel->hHWContext, flags); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: - */ - if (dPriv) - DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); - - - intel->locked = 1; - intel->need_flush = 1; - - /* Lost context? - */ - if (sarea->ctxOwner != me) { - DBG("Lost Context: sarea->ctxOwner %x me %x\n", sarea->ctxOwner, me); - sarea->ctxOwner = me; - intel->vtbl.lost_hardware( intel ); - } - - /* As above, but don't evict the texture data on transitions - * between contexts which all share a local buffer manager. - */ - if (sarea->texAge != my_bufmgr) { - DBG("Lost Textures: sarea->texAge %x my_bufmgr %x\n", sarea->ctxOwner, my_bufmgr); - sarea->texAge = my_bufmgr; - bm_fake_NotifyContendedLockTake( intel ); - } - - /* Drawable changed? - */ - if (dPriv && intel->lastStamp != dPriv->lastStamp) { - intelWindowMoved( intel ); - intel->lastStamp = dPriv->lastStamp; - } -} - -_glthread_DECLARE_STATIC_MUTEX(lockMutex); - -/* Lock the hardware and validate our state. - */ -void LOCK_HARDWARE( struct intel_context *intel ) -{ - char __ret=0; - - _glthread_LOCK_MUTEX(lockMutex); - assert(!intel->locked); - - - DRM_CAS(intel->driHwLock, intel->hHWContext, - (DRM_LOCK_HELD|intel->hHWContext), __ret); - if (__ret) - intelContendedLock( intel, 0 ); - - intel->locked = 1; - - if (intel->aub_wrap) { - bm_fake_NotifyContendedLockTake( intel ); - intel->vtbl.lost_hardware( intel ); - intel->vtbl.aub_wrap(intel); - intel->aub_wrap = 0; - } - - if (bmError(intel)) { - bmEvictAll(intel); - intel->vtbl.lost_hardware( intel ); - } - - /* Make sure nothing has been emitted prior to getting the lock: - */ - assert(intel->batch->map == 0); - - /* XXX: postpone, may not be needed: - */ - if (!intel_batchbuffer_map(intel->batch)) { - bmEvictAll(intel); - intel->vtbl.lost_hardware( intel ); - - /* This could only fail if the batchbuffer was greater in size - * than the available texture memory: - */ - if (!intel_batchbuffer_map(intel->batch)) { - _mesa_printf("double failure to map batchbuffer\n"); - assert(0); - } - } -} - - -/* Unlock the hardware using the global current context - */ -void UNLOCK_HARDWARE( struct intel_context *intel ) -{ - /* Make sure everything has been released: - */ - assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); - - intel_batchbuffer_unmap(intel->batch); - intel->vtbl.note_unlock( intel ); - intel->locked = 0; - - - - DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); - _glthread_UNLOCK_MUTEX(lockMutex); -} - - +../intel/intel_context.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h deleted file mode 100644 index 406f8483dce..00000000000 --- a/src/mesa/drivers/dri/i965/intel_context.h +++ /dev/null @@ -1,528 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTELCONTEXT_INC -#define INTELCONTEXT_INC - - - -#include "mtypes.h" -#include "drm.h" -#include "texmem.h" - -#include "intel_screen.h" -#include "i830_common.h" -#include "tnl/t_vertex.h" - -#define TAG(x) intel##x -#include "tnl_dd/t_dd_vertex.h" -#undef TAG - -#define DV_PF_555 (1<<8) -#define DV_PF_565 (2<<8) -#define DV_PF_8888 (3<<8) - -struct intel_region; -struct intel_context; - -typedef void (*intel_tri_func)(struct intel_context *, intelVertex *, intelVertex *, - intelVertex *); -typedef void (*intel_line_func)(struct intel_context *, intelVertex *, intelVertex *); -typedef void (*intel_point_func)(struct intel_context *, intelVertex *); - -#define INTEL_FALLBACK_DRAW_BUFFER 0x1 -#define INTEL_FALLBACK_READ_BUFFER 0x2 -#define INTEL_FALLBACK_USER 0x4 -#define INTEL_FALLBACK_RENDERMODE 0x8 -#define INTEL_FALLBACK_TEXTURE 0x10 - -extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode ); -#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) - - - -struct intel_texture_object -{ - struct gl_texture_object base; /* The "parent" object */ - - /* The mipmap tree must include at least these levels once - * validated: - */ - GLuint firstLevel; - GLuint lastLevel; - - GLuint dirty_images[6]; - GLuint dirty; - - /* On validation any active images held in main memory or in other - * regions will be copied to this region and the old storage freed. - */ - struct intel_mipmap_tree *mt; -}; - - - -struct intel_context -{ - GLcontext ctx; /* the parent class */ - - struct { - void (*destroy)( struct intel_context *intel ); - void (*emit_state)( struct intel_context *intel ); - void (*emit_invarient_state)( struct intel_context *intel ); - void (*lost_hardware)( struct intel_context *intel ); - void (*note_fence)( struct intel_context *intel, GLuint fence ); - void (*note_unlock)( struct intel_context *intel ); - void (*update_texture_state)( struct intel_context *intel ); - - void (*render_start)( struct intel_context *intel ); - void (*set_draw_region)( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region ); - - GLuint (*flush_cmd)( void ); - - void (*emit_flush)( struct intel_context *intel, - GLuint unused ); - - void (*aub_commands)( struct intel_context *intel, - GLuint offset, - const void *buf, - GLuint sz ); - void (*aub_dump_bmp)( struct intel_context *intel, GLuint buffer ); - void (*aub_wrap)( struct intel_context *intel ); - void (*aub_gtt_data)( struct intel_context *intel, - GLuint offset, - const void *src, - GLuint size, - GLuint aubtype, - GLuint aubsubtype); - - - void (*reduced_primitive_state)( struct intel_context *intel, GLenum rprim ); - - GLboolean (*check_vertex_size)( struct intel_context *intel, GLuint expected ); - - void (*invalidate_state)( struct intel_context *intel, GLuint new_state ); - - /* Metaops: - */ - void (*install_meta_state)( struct intel_context *intel ); - void (*leave_meta_state)( struct intel_context *intel ); - - void (*meta_draw_region)( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region ); - - void (*meta_color_mask)( struct intel_context *intel, - GLboolean ); - - void (*meta_stencil_replace)( struct intel_context *intel, - GLuint mask, - GLuint clear ); - - void (*meta_depth_replace)( struct intel_context *intel ); - - void (*meta_texture_blend_replace) (struct intel_context * intel); - - void (*meta_no_stencil_write)( struct intel_context *intel ); - void (*meta_no_depth_write)( struct intel_context *intel ); - void (*meta_no_texture)( struct intel_context *intel ); - void (*meta_import_pixel_state) (struct intel_context * intel); - void (*meta_frame_buffer_texture)( struct intel_context *intel, - GLint xoff, GLint yoff ); - - void (*meta_draw_quad)(struct intel_context *intel, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLfloat z, - GLubyte red, GLubyte green, - GLubyte blue, GLubyte alpha, - GLfloat s0, GLfloat s1, - GLfloat t0, GLfloat t1); - - - - } vtbl; - - GLint refcount; - GLuint Fallback; - GLuint NewGLState; - - GLuint last_swap_fence; - GLuint second_last_swap_fence; - - GLboolean aub_wrap; - GLuint stats_wm; - - struct intel_batchbuffer *batch; - - GLubyte clear_chan[4]; - GLuint ClearColor; - GLuint ClearDepth; - - GLfloat depth_scale; - GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ - GLuint depth_clear_mask; - GLuint stencil_clear_mask; - - GLboolean hw_stencil; - GLboolean hw_stipple; - GLboolean depth_buffer_is_float; - GLboolean no_hw; - GLboolean no_rast; - GLboolean thrashing; - GLboolean locked; - GLboolean strict_conformance; - GLboolean need_flush; - - - - /* AGP memory buffer manager: - */ - struct bufmgr *bm; - - - /* State for intelvb.c and inteltris.c. - */ - GLenum render_primitive; - GLenum reduced_primitive; - - struct intel_region *front_region; - struct intel_region *back_region; - struct intel_region *draw_region; - struct intel_region *depth_region; - - /* These refer to the current draw (front vs. back) buffer: - */ - int drawX; /* origin of drawable in draw buffer */ - int drawY; - GLuint numClipRects; /* cliprects for that buffer */ - drm_clip_rect_t *pClipRects; - struct gl_texture_object *frame_buffer_texobj; - - GLboolean scissor; - drm_clip_rect_t draw_rect; - drm_clip_rect_t scissor_rect; - - drm_context_t hHWContext; - drmLock *driHwLock; - int driFd; - - __DRIdrawablePrivate *driDrawable; - __DRIscreenPrivate *driScreen; - intelScreenPrivate *intelScreen; - volatile drmI830Sarea *sarea; - - FILE *aub_file; - - GLuint lastStamp; - - /** - * Configuration cache - */ - driOptionCache optionCache; - - /* VBI - */ - GLuint vbl_seq; - GLuint vblank_flags; - - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; -}; - -/* These are functions now: - */ -void LOCK_HARDWARE( struct intel_context *intel ); -void UNLOCK_HARDWARE( struct intel_context *intel ); - - -#define SUBPIXEL_X 0.125 -#define SUBPIXEL_Y 0.125 - -/* ================================================================ - * Color packing: - */ - -#define INTEL_PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define INTEL_PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define INTEL_PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define INTEL_PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - - -#define INTEL_PACKCOLOR(format, r, g, b, a) \ -(format == DV_PF_555 ? INTEL_PACKCOLOR1555(r,g,b,a) : \ - (format == DV_PF_565 ? INTEL_PACKCOLOR565(r,g,b) : \ - (format == DV_PF_8888 ? INTEL_PACKCOLOR8888(r,g,b,a) : \ - 0))) - - - -/* ================================================================ - * From linux kernel i386 header files, copes with odd sizes better - * than COPY_DWORDS would: - */ -#if defined(i386) || defined(__i386__) -static inline void * __memcpy(void * to, const void * from, size_t n) -{ - int d0, d1, d2; - __asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); - return (to); -} -#else -#define __memcpy(a,b,c) memcpy(a,b,c) -#endif - - -/* The system memcpy (at least on ubuntu 5.10) has problems copying - * to agp (writecombined) memory from a source which isn't 64-byte - * aligned - there is a 4x performance falloff. - * - * The x86 __memcpy is immune to this but is slightly slower - * (10%-ish) than the system memcpy. - * - * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but - * isn't much faster than x86_memcpy for agp copies. - * - * TODO: switch dynamically. - */ -static inline void *do_memcpy( void *dest, const void *src, size_t n ) -{ - if ( (((unsigned long)src) & 63) || - (((unsigned long)dest) & 63)) { - return __memcpy(dest, src, n); - } - else - return memcpy(dest, src, n); -} - - - - - -/* ================================================================ - * Debugging: - */ -extern int INTEL_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 - - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I965_G 0x29A2 -#define PCI_CHIP_I965_Q 0x2992 -#define PCI_CHIP_I965_G_1 0x2982 -#define PCI_CHIP_I946_GZ 0x2972 -#define PCI_CHIP_I965_GM 0x2A02 -#define PCI_CHIP_I965_GME 0x2A12 - - -/* ================================================================ - * intel_context.c: - */ - -extern GLboolean intelInitContext( struct intel_context *intel, - const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate, - struct dd_function_table *functions ); - -extern void intelGetLock(struct intel_context *intel, GLuint flags); - -extern void intelFinish( GLcontext *ctx ); -extern void intelFlush( GLcontext *ctx ); - -extern void intelInitDriverFunctions( struct dd_function_table *functions ); - - -/* ================================================================ - * intel_state.c: - */ -extern void intelInitStateFuncs( struct dd_function_table *functions ); - -#define COMPAREFUNC_ALWAYS 0 -#define COMPAREFUNC_NEVER 0x1 -#define COMPAREFUNC_LESS 0x2 -#define COMPAREFUNC_EQUAL 0x3 -#define COMPAREFUNC_LEQUAL 0x4 -#define COMPAREFUNC_GREATER 0x5 -#define COMPAREFUNC_NOTEQUAL 0x6 -#define COMPAREFUNC_GEQUAL 0x7 - -#define STENCILOP_KEEP 0 -#define STENCILOP_ZERO 0x1 -#define STENCILOP_REPLACE 0x2 -#define STENCILOP_INCRSAT 0x3 -#define STENCILOP_DECRSAT 0x4 -#define STENCILOP_INCR 0x5 -#define STENCILOP_DECR 0x6 -#define STENCILOP_INVERT 0x7 - -#define LOGICOP_CLEAR 0 -#define LOGICOP_NOR 0x1 -#define LOGICOP_AND_INV 0x2 -#define LOGICOP_COPY_INV 0x3 -#define LOGICOP_AND_RVRSE 0x4 -#define LOGICOP_INV 0x5 -#define LOGICOP_XOR 0x6 -#define LOGICOP_NAND 0x7 -#define LOGICOP_AND 0x8 -#define LOGICOP_EQUIV 0x9 -#define LOGICOP_NOOP 0xa -#define LOGICOP_OR_INV 0xb -#define LOGICOP_COPY 0xc -#define LOGICOP_OR_RVRSE 0xd -#define LOGICOP_OR 0xe -#define LOGICOP_SET 0xf - -#define BLENDFACT_ZERO 0x01 -#define BLENDFACT_ONE 0x02 -#define BLENDFACT_SRC_COLR 0x03 -#define BLENDFACT_INV_SRC_COLR 0x04 -#define BLENDFACT_SRC_ALPHA 0x05 -#define BLENDFACT_INV_SRC_ALPHA 0x06 -#define BLENDFACT_DST_ALPHA 0x07 -#define BLENDFACT_INV_DST_ALPHA 0x08 -#define BLENDFACT_DST_COLR 0x09 -#define BLENDFACT_INV_DST_COLR 0x0a -#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b -#define BLENDFACT_CONST_COLOR 0x0c -#define BLENDFACT_INV_CONST_COLOR 0x0d -#define BLENDFACT_CONST_ALPHA 0x0e -#define BLENDFACT_INV_CONST_ALPHA 0x0f -#define BLENDFACT_MASK 0x0f - - -extern int intel_translate_compare_func( GLenum func ); -extern int intel_translate_stencil_op( GLenum op ); -extern int intel_translate_blend_factor( GLenum factor ); -extern int intel_translate_logic_op( GLenum opcode ); - - -/* ================================================================ - * intel_buffers.c: - */ -void intelInitBufferFuncs( struct dd_function_table *functions ); - -struct intel_region *intel_readbuf_region( struct intel_context *intel ); -struct intel_region *intel_drawbuf_region( struct intel_context *intel ); - -extern void intelWindowMoved( struct intel_context *intel ); - -extern GLboolean intel_intersect_cliprects( drm_clip_rect_t *dest, - const drm_clip_rect_t *a, - const drm_clip_rect_t *b ); - - -/* ================================================================ - * intel_pixel_copy.c: - */ -void intelCopyPixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint destx, GLint desty, GLenum type); - -GLboolean intel_check_blit_fragment_ops(GLcontext * ctx); - -void intelBitmap(GLcontext * ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte * pixels); - -void intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging); -#define _NEW_WINDOW_POS 0x40000000 - - -/*====================================================================== - * Inline conversion functions. - * These are better-typed than the macros used previously: - */ -static inline struct intel_context *intel_context( GLcontext *ctx ) -{ - return (struct intel_context *)ctx; -} - -static inline struct intel_texture_object *intel_texture_object( struct gl_texture_object *obj ) -{ - return (struct intel_texture_object *)obj; -} - -static inline struct intel_texture_image *intel_texture_image( struct gl_texture_image *img ) -{ - return (struct intel_texture_image *)img; -} - -#endif - diff --git a/src/mesa/drivers/dri/i965/intel_decode.c b/src/mesa/drivers/dri/i965/intel_decode.c new file mode 120000 index 00000000000..f671b6cbb13 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_decode.c @@ -0,0 +1 @@ +../intel/intel_decode.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_depthstencil.c b/src/mesa/drivers/dri/i965/intel_depthstencil.c new file mode 120000 index 00000000000..4ac4ae690a3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_depthstencil.c @@ -0,0 +1 @@ +../intel/intel_depthstencil.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c new file mode 120000 index 00000000000..a19f86dcc57 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -0,0 +1 @@ +../intel/intel_fbo.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.c b/src/mesa/drivers/dri/i965/intel_ioctl.c deleted file mode 100644 index 0a8e976f706..00000000000 --- a/src/mesa/drivers/dri/i965/intel_ioctl.c +++ /dev/null @@ -1,205 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <sched.h> - -#include "mtypes.h" -#include "context.h" -#include "swrast/swrast.h" - -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "drm.h" -#include "bufmgr.h" - -static int intelWaitIdleLocked( struct intel_context *intel ) -{ - static int in_wait_idle = 0; - unsigned int fence; - - if (!in_wait_idle) { - if (INTEL_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "waiting for idle\n"); - } - - in_wait_idle = 1; - fence = bmSetFence(intel); - intelWaitIrq(intel, fence); - in_wait_idle = 0; - - return bmTestFence(intel, fence); - } else { - return 1; - } -} - -int intelEmitIrqLocked( struct intel_context *intel ) -{ - int seq = 1; - - if (!intel->no_hw) { - drmI830IrqEmit ie; - int ret; - - assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == - (DRM_LOCK_HELD|intel->hHWContext)); - - ie.irq_seq = &seq; - - ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, - &ie, sizeof(ie) ); - if ( ret ) { - fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret ); - exit(1); - } - - if (0) - fprintf(stderr, "%s --> %d\n", __FUNCTION__, seq ); - } - - return seq; -} - -void intelWaitIrq( struct intel_context *intel, int seq ) -{ - if (!intel->no_hw) { - drmI830IrqWait iw; - int ret, lastdispatch; - - if (0) - fprintf(stderr, "%s %d\n", __FUNCTION__, seq ); - - iw.irq_seq = seq; - - do { - lastdispatch = intel->sarea->last_dispatch; - ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) ); - - /* This seems quite often to return before it should!?! - */ - } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch) - || (ret == 0 && intel->sarea->last_dispatch - seq >= (1 << 24))); - - - if ( ret ) { - fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret ); - - if (intel->aub_file) { - intel->vtbl.aub_dump_bmp( intel, intel->ctx.Visual.doubleBufferMode ? 1 : 0 ); - } - - exit(1); - } - } -} - - -void intel_batch_ioctl( struct intel_context *intel, - GLuint start_offset, - GLuint used) -{ - drmI830BatchBuffer batch; - - assert(intel->locked); - assert(used); - - if (0) - fprintf(stderr, "%s used %d offset %x..%x\n", - __FUNCTION__, - used, - start_offset, - start_offset + used); - - batch.start = start_offset; - batch.used = used; - batch.cliprects = NULL; - batch.num_cliprects = 0; - batch.DR1 = 0; - batch.DR4 = 0; - - if (INTEL_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: 0x%x..0x%x\n", - __FUNCTION__, - batch.start, - batch.start + batch.used * 4); - - if (!intel->no_hw) { - if (drmCommandWrite (intel->driFd, DRM_I830_BATCHBUFFER, &batch, - sizeof(batch))) { - fprintf(stderr, "DRM_I830_BATCHBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } - - if (INTEL_DEBUG & DEBUG_SYNC) { - intelWaitIdleLocked(intel); - } - } -} - -void intel_cmd_ioctl( struct intel_context *intel, - char *buf, - GLuint used) -{ - drmI830CmdBuffer cmd; - - assert(intel->locked); - assert(used); - - cmd.buf = buf; - cmd.sz = used; - cmd.cliprects = intel->pClipRects; - cmd.num_cliprects = 0; - cmd.DR1 = 0; - cmd.DR4 = 0; - - if (INTEL_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: 0x%x..0x%x\n", - __FUNCTION__, - 0, - 0 + cmd.sz); - - if (!intel->no_hw) { - if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, - sizeof(cmd))) { - fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n", -errno); - UNLOCK_HARDWARE(intel); - exit(1); - } - - if (INTEL_DEBUG & DEBUG_SYNC) { - intelWaitIdleLocked(intel); - } - } -} diff --git a/src/mesa/drivers/dri/i965/intel_ioctl.h b/src/mesa/drivers/dri/i965/intel_ioctl.h deleted file mode 100644 index df276593626..00000000000 --- a/src/mesa/drivers/dri/i965/intel_ioctl.h +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_IOCTL_H -#define INTEL_IOCTL_H - -#include "intel_context.h" - -void intelWaitIrq( struct intel_context *intel, int seq ); -int intelEmitIrqLocked( struct intel_context *intel ); - -void intel_batch_ioctl( struct intel_context *intel, - GLuint start_offset, - GLuint used); - -void intel_cmd_ioctl( struct intel_context *intel, - char *buf, - GLuint used); - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 8486086b274..242fed0b6ae 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1,247 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_context.h" -#include "intel_mipmap_tree.h" -#include "intel_regions.h" -#include "bufmgr.h" -#include "enums.h" -#include "imports.h" - -static GLenum target_to_target( GLenum target ) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return GL_TEXTURE_CUBE_MAP_ARB; - default: - return target; - } -} - -struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, - GLenum target, - GLenum internal_format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint cpp, - GLboolean compressed) -{ - GLboolean ok; - struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); - - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("%s target %s format %s level %d..%d\n", __FUNCTION__, - _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), - first_level, - last_level); - - mt->target = target_to_target(target); - mt->internal_format = internal_format; - mt->first_level = first_level; - mt->last_level = last_level; - mt->width0 = width0; - mt->height0 = height0; - mt->depth0 = depth0; - mt->cpp = compressed ? 2 : cpp; - mt->compressed = compressed; - - switch (intel->intelScreen->deviceID) { -#if 0 - case PCI_CHIP_I945_G: - ok = i945_miptree_layout( mt ); - break; - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - ok = i915_miptree_layout( mt ); - break; -#endif - default: - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("assuming BRW texture layouts\n"); - ok = brw_miptree_layout( mt ); - break; - } - - if (ok) - mt->region = intel_region_alloc( intel, - mt->cpp, - mt->pitch, - mt->total_height ); - - if (!mt->region) { - free(mt); - return NULL; - } - - return mt; -} - - - -void intel_miptree_destroy( struct intel_context *intel, - struct intel_mipmap_tree *mt ) -{ - if (mt) { - GLuint i; - - intel_region_release(intel, &(mt->region)); - - for (i = 0; i < MAX_TEXTURE_LEVELS; i++) - if (mt->level[i].image_offset) - free(mt->level[i].image_offset); - - free(mt); - } -} - - - - -void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, - GLuint level, - GLuint nr_images, - GLuint x, GLuint y, - GLuint w, GLuint h, GLuint d) -{ - mt->level[level].width = w; - mt->level[level].height = h; - mt->level[level].depth = d; - mt->level[level].level_offset = (x + y * mt->pitch) * mt->cpp; - mt->level[level].nr_images = nr_images; - - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("%s level %d img size: %d,%d level_offset 0x%x\n", __FUNCTION__, level, w, h, - mt->level[level].level_offset); - - /* Not sure when this would happen, but anyway: - */ - if (mt->level[level].image_offset) { - free(mt->level[level].image_offset); - mt->level[level].image_offset = NULL; - } - - if (nr_images > 1) { - mt->level[level].image_offset = malloc(nr_images * sizeof(GLuint)); - mt->level[level].image_offset[0] = 0; - } -} - - - -void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, - GLuint level, - GLuint img, - GLuint x, GLuint y) -{ - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("%s level %d img %d pos %d,%d\n", __FUNCTION__, level, img, x, y); - - if (img == 0) - assert(x == 0 && y == 0); - - if (img > 0) - mt->level[level].image_offset[img] = (x + y * mt->pitch) * mt->cpp; -} - - -/* Although we use the image_offset[] array to store relative offsets - * to cube faces, Mesa doesn't know anything about this and expects - * each cube face to be treated as a separate image. - * - * These functions present that view to mesa: - */ -const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, - GLuint level) -{ - static const GLuint zero = 0; - - if (mt->target != GL_TEXTURE_3D || - mt->level[level].nr_images == 1) - return &zero; - else - return mt->level[level].image_offset; -} - - -GLuint intel_miptree_image_offset(struct intel_mipmap_tree *mt, - GLuint face, - GLuint level) -{ - if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) - return (mt->level[level].level_offset + - mt->level[level].image_offset[face]); - else - return mt->level[level].level_offset; -} - - - - - - -/* Upload data for a particular image. - */ -GLboolean intel_miptree_image_data(struct intel_context *intel, - struct intel_mipmap_tree *dst, - GLuint face, - GLuint level, - const void *src, - GLuint src_row_pitch, - GLuint src_image_pitch) -{ - GLuint depth = dst->level[level].depth; - GLuint dst_offset = intel_miptree_image_offset(dst, face, level); - const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); - GLuint i; - - DBG("%s\n", __FUNCTION__); - for (i = 0; i < depth; i++) { - if (!intel_region_data(intel, - dst->region, - dst_offset + dst_depth_offset[i], - 0, - 0, - src, - src_row_pitch, - 0, 0, /* source x,y */ - dst->level[level].width, - dst->level[level].height)) - return GL_FALSE; - src += src_image_pitch; - } - return GL_TRUE; -} - +../intel/intel_mipmap_tree.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h deleted file mode 100644 index dbd7167b778..00000000000 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ /dev/null @@ -1,166 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_MIPMAP_TREE_H -#define INTEL_MIPMAP_TREE_H - -#include "intel_regions.h" - -/* A layer on top of the intel_regions code which adds: - * - * - Code to size and layout a region to hold a set of mipmaps. - * - Query to determine if a new image fits in an existing tree. - * - * The fixed mipmap layout of intel hardware where one offset - * specifies the position of all images in a mipmap hierachy - * complicates the implementation of GL texture image commands, - * compared to hardware where each image is specified with an - * independent offset. - * - * In an ideal world, each texture object would be associated with a - * single bufmgr buffer or 2d intel_region, and all the images within - * the texture object would slot into the tree as they arrive. The - * reality can be a little messier, as images can arrive from the user - * with sizes that don't fit in the existing tree, or in an order - * where the tree layout cannot be guessed immediately. - * - * This structure encodes an idealized mipmap tree. The GL image - * commands build these where possible, otherwise store the images in - * temporary system buffers. - */ - - -struct intel_mipmap_level { - GLuint level_offset; - GLuint width; - GLuint height; - GLuint depth; - GLuint nr_images; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - GLuint *image_offset; -}; - -struct intel_mipmap_tree { - /* Effectively the key: - */ - GLenum target; - GLenum internal_format; - - GLuint first_level; - GLuint last_level; - - GLuint width0, height0, depth0; - GLuint cpp; - GLboolean compressed; - - /* Derived from the above: - */ - GLuint pitch; - GLuint depth_pitch; /* per-image on i945? */ - GLuint total_height; - - /* Includes image offset tables: - */ - struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct intel_region *region; - - /* These are also refcounted: - */ - GLuint refcount; -}; - - - -struct intel_mipmap_tree *intel_miptree_create( struct intel_context *intel, - GLenum target, - GLenum internal_format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - GLuint cpp, - GLboolean compressed); - -void intel_miptree_destroy( struct intel_context *intel, - struct intel_mipmap_tree *mt ); - - -/* Return the linear offset of an image relative to the start of the - * tree: - */ -GLuint intel_miptree_image_offset( struct intel_mipmap_tree *mt, - GLuint face, - GLuint level ); - -/* Return pointers to each 2d slice within an image. Indexed by depth - * value. - */ -const GLuint *intel_miptree_depth_offsets(struct intel_mipmap_tree *mt, - GLuint level); - - -void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, - GLuint level, - GLuint nr_images, - GLuint x, GLuint y, - GLuint w, GLuint h, GLuint d); - -void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, - GLuint level, - GLuint img, - GLuint x, GLuint y); - - -/* Upload an image into a tree - */ -GLboolean intel_miptree_image_data(struct intel_context *intel, - struct intel_mipmap_tree *dst, - GLuint face, - GLuint level, - const void *src, - GLuint src_row_pitch, - GLuint src_image_pitch); - -/* i915_mipmap_tree.c: - */ -GLboolean i915_miptree_layout( struct intel_mipmap_tree *mt ); -GLboolean i945_miptree_layout( struct intel_mipmap_tree *mt ); -GLboolean brw_miptree_layout( struct intel_mipmap_tree *mt ); - - - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c new file mode 120000 index 00000000000..d733c5e8745 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel.c @@ -0,0 +1 @@ +../intel/intel_pixel.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index 421fcc5e511..9085c7b0397 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -1,353 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portionsalloc - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "enums.h" -#include "image.h" -#include "colormac.h" -#include "mtypes.h" -#include "macros.h" -#include "bufferobj.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_regions.h" -#include "intel_buffer_objects.h" - - - -#define FILE_DEBUG_FLAG DEBUG_PIXEL - - -/* Unlike the other intel_pixel_* functions, the expectation here is - * that the incoming data is not in a PBO. With the XY_TEXT blit - * method, there's no benefit haveing it in a PBO, but we could - * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit - * PBO bitmaps. I think they are probably pretty rare though - I - * wonder if Xgl uses them? - */ -static const GLubyte *map_pbo( GLcontext *ctx, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap ) -{ - GLubyte *buf; - - if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, - GL_COLOR_INDEX, GL_BITMAP, - (GLvoid *) bitmap)) { - _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)"); - return NULL; - } - - buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - GL_READ_ONLY_ARB, - unpack->BufferObj); - if (!buf) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); - return NULL; - } - - return ADD_POINTERS(buf, bitmap); -} - -static GLboolean test_bit( const GLubyte *src, - GLuint bit ) -{ - return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; -} - -static void set_bit( GLubyte *dest, - GLuint bit ) -{ - dest[bit/8] |= 1 << (bit % 8); -} - -static int align(int x, int align) -{ - return (x + align - 1) & ~(align - 1); -} - -/* Extract a rectangle's worth of data from the bitmap. Called - * per-cliprect. - */ -static GLuint get_bitmap_rect(GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap, - GLuint x, GLuint y, - GLuint w, GLuint h, - GLubyte *dest, - GLuint row_align, - GLboolean invert) -{ - GLuint src_offset = (x + unpack->SkipPixels) & 0x7; - GLuint mask = unpack->LsbFirst ? 0 : 7; - GLuint bit = 0; - GLint row, col; - GLint first, last; - GLint incr; - GLuint count = 0; - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", - __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); - - if (invert) { - first = h-1; - last = 0; - incr = -1; - } - else { - first = 0; - last = h-1; - incr = 1; - } - - /* Require that dest be pre-zero'd. - */ - for (row = first; row != (last+incr); row += incr) { - const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, - width, height, - GL_COLOR_INDEX, GL_BITMAP, - y + row, x); - - for (col = 0; col < w; col++, bit++) { - if (test_bit(rowsrc, (col + src_offset) ^ mask)) { - set_bit(dest, bit ^ 7); - count++; - } - } - - if (row_align) - bit = (bit + row_align - 1) & ~(row_align - 1); - } - - return count; -} - - - - -/* - * Render a bitmap. - */ -static GLboolean -do_blit_bitmap( GLcontext *ctx, - GLint dstx, GLint dsty, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - - union { - GLuint ui; - GLubyte ub[4]; - } color; - - - if (unpack->BufferObj->Name) { - bitmap = map_pbo(ctx, width, height, unpack, bitmap); - if (bitmap == NULL) - return GL_TRUE; /* even though this is an error, we're done */ - } - - UNCLAMPED_FLOAT_TO_CHAN(color.ub[0], ctx->Current.RasterColor[2]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[1], ctx->Current.RasterColor[1]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[2], ctx->Current.RasterColor[0]); - UNCLAMPED_FLOAT_TO_CHAN(color.ub[3], ctx->Current.RasterColor[3]); - - /* Does zoom apply to bitmaps? - */ - if (!intel_check_blit_fragment_ops(ctx) || - ctx->Pixel.ZoomX != 1.0F || - ctx->Pixel.ZoomY != 1.0F) - return GL_FALSE; - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t dest_rect; - GLint nbox = dPriv->numClipRects; - GLint srcx = 0, srcy = 0; - GLint orig_screen_x1, orig_screen_y2; - GLuint i; - - - orig_screen_x1 = dPriv->x + dstx; - orig_screen_y2 = dPriv->y + (dPriv->h - dsty); - - /* Do scissoring in GL coordinates: - */ - if (ctx->Scissor.Enabled) - { - GLint x = ctx->Scissor.X; - GLint y = ctx->Scissor.Y; - GLuint w = ctx->Scissor.Width; - GLuint h = ctx->Scissor.Height; - - if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height)) - goto out; - } - - /* Convert from GL to hardware coordinates: - */ - dsty = dPriv->y + (dPriv->h - dsty - height); - dstx = dPriv->x + dstx; - - dest_rect.x1 = dstx; - dest_rect.y1 = dsty; - dest_rect.x2 = dstx + width; - dest_rect.y2 = dsty + height; - - for (i = 0; i < nbox; i++) { - drm_clip_rect_t rect; - int box_w, box_h; - GLint px, py; - GLuint stipple[32]; - - if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) - continue; - - /* Now go back to GL coordinates to figure out what subset of - * the bitmap we are uploading for this cliprect: - */ - box_w = rect.x2 - rect.x1; - box_h = rect.y2 - rect.y1; - srcx = rect.x1 - orig_screen_x1; - srcy = orig_screen_y2 - rect.y2; - - -#define DY 32 -#define DX 32 - - /* Then, finally, chop it all into chunks that can be - * digested by hardware: - */ - for (py = 0; py < box_h; py += DY) { - for (px = 0; px < box_w; px += DX) { - int h = MIN2(DY, box_h - py); - int w = MIN2(DX, box_w - px); - GLuint sz = align(align(w,8) * h, 64)/8; - GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? - ctx->Color.LogicOp : GL_COPY; - - assert(sz <= sizeof(stipple)); - memset(stipple, 0, sz); - - /* May need to adjust this when padding has been introduced in - * sz above: - */ - if (get_bitmap_rect(width, height, unpack, - bitmap, - srcx + px, srcy + py, w, h, - (GLubyte *)stipple, - 8, - GL_TRUE) == 0) - continue; - - /* - */ - intelEmitImmediateColorExpandBlit( intel, - dst->cpp, - (GLubyte *)stipple, - sz, - color.ui, - dst->pitch, - dst->buffer, - 0, - dst->tiled, - rect.x1 + px, - rect.y2 - (py + h), - w, h, - logic_op); - } - } - } - intel->need_flush = GL_TRUE; - out: - intel_batchbuffer_flush(intel->batch); - } - UNLOCK_HARDWARE(intel); - - - if (unpack->BufferObj->Name) { - /* done with PBO so unmap it now */ - ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, - unpack->BufferObj); - } - - return GL_TRUE; -} - - - - - -/* There are a large number of possible ways to implement bitmap on - * this hardware, most of them have some sort of drawback. Here are a - * few that spring to mind: - * - * Blit: - * - XY_MONO_SRC_BLT_CMD - * - use XY_SETUP_CLIP_BLT for cliprect clipping. - * - XY_TEXT_BLT - * - XY_TEXT_IMMEDIATE_BLT - * - blit per cliprect, subject to maximum immediate data size. - * - XY_COLOR_BLT - * - per pixel or run of pixels - * - XY_PIXEL_BLT - * - good for sparse bitmaps - * - * 3D engine: - * - Point per pixel - * - Translate bitmap to an alpha texture and render as a quad - * - Chop bitmap up into 32x32 squares and render w/polygon stipple. - */ -void -intelBitmap(GLcontext * ctx, - GLint x, GLint y, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte * pixels) -{ - if (do_blit_bitmap(ctx, x, y, width, height, - unpack, pixels)) - return; - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); - - _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels); -} +../intel/intel_pixel_bitmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c index 58dc49505fe..ee433605904 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c @@ -1,343 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "enums.h" -#include "image.h" -#include "mtypes.h" -#include "macros.h" -#include "state.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_ioctl.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_regions.h" - - -static struct intel_region * -copypix_src_region(struct intel_context *intel, GLenum type) -{ - switch (type) { - case GL_COLOR: - return intel_readbuf_region(intel); - case GL_DEPTH: - /* Don't think this is really possible execpt at 16bpp, when we have no stencil. - */ - if (intel->depth_region && intel->depth_region->cpp == 2) - return intel->depth_region; - case GL_STENCIL: - /* Don't think this is really possible. - */ - break; - case GL_DEPTH_STENCIL_EXT: - /* Does it matter whether it is stencil/depth or depth/stencil? - */ - return intel->depth_region; - default: - break; - } - - return NULL; -} - - - - -/** - * Check if any fragment operations are in effect which might effect - * glDraw/CopyPixels. - */ -GLboolean -intel_check_blit_fragment_ops(GLcontext * ctx) -{ - if (ctx->NewState) - _mesa_update_state(ctx); - - return !(ctx->_ImageTransferState || - ctx->RenderMode != GL_RENDER || - ctx->Color.AlphaEnabled || - ctx->Depth.Test || - ctx->Fog.Enabled || - ctx->Stencil.Enabled || - !ctx->Color.ColorMask[0] || - !ctx->Color.ColorMask[1] || - !ctx->Color.ColorMask[2] || - !ctx->Color.ColorMask[3] || /* can do this! */ - ctx->Texture._EnabledUnits || - ctx->FragmentProgram._Enabled || - ctx->Color.BlendEnabled); -} - -/* Doesn't work for overlapping regions. Could do a double copy or - * just fallback. - */ -static GLboolean -do_texture_copypixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - struct intel_region *src = copypix_src_region(intel, type); - GLenum src_format; - GLenum src_type; - - DBG("%s %d,%d %dx%d --> %d,%d\n", __FUNCTION__, - srcx, srcy, width, height, dstx, dsty); - - if (!src || !dst || type != GL_COLOR || - ctx->_ImageTransferState || - ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F || - ctx->RenderMode != GL_RENDER || - ctx->Texture._EnabledUnits || - ctx->FragmentProgram._Enabled || - src != dst ) - return GL_FALSE; - - /* Can't handle overlapping regions. Don't have sufficient control - * over rasterization to pull it off in-place. Punt on these for - * now. - * - * XXX: do a copy to a temporary. - */ - if (src->buffer == dst->buffer) { - drm_clip_rect_t srcbox; - drm_clip_rect_t dstbox; - drm_clip_rect_t tmp; - - srcbox.x1 = srcx; - srcbox.y1 = srcy; - srcbox.x2 = srcx + width - 1; - srcbox.y2 = srcy + height - 1; - - dstbox.x1 = dstx; - dstbox.y1 = dsty; - dstbox.x2 = dstx + width - 1; - dstbox.y2 = dsty + height - 1; - - DBG("src %d,%d %d,%d\n", srcbox.x1, srcbox.y1, srcbox.x2, srcbox.y2); - DBG("dst %d,%d %d,%d (%dx%d) (%f,%f)\n", dstbox.x1, dstbox.y1, dstbox.x2, dstbox.y2, - width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY); - - if (intel_intersect_cliprects(&tmp, &srcbox, &dstbox)) { - DBG("%s: regions overlap\n", __FUNCTION__); - return GL_FALSE; - } - } - - intelFlush(&intel->ctx); - - intel->vtbl.install_meta_state(intel); - - /* Is this true? Also will need to turn depth testing on according - * to state: - */ - intel->vtbl.meta_no_stencil_write(intel); - intel->vtbl.meta_no_depth_write(intel); - - /* Set the 3d engine to draw into the destination region: - */ - intel->vtbl.meta_draw_region(intel, dst, intel->depth_region); - - intel->vtbl.meta_import_pixel_state(intel); - - if (src->cpp == 2) { - src_format = GL_RGB; - src_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - src_format = GL_BGRA; - src_type = GL_UNSIGNED_BYTE; - } - - /* Set the frontbuffer up as a large rectangular texture. - */ - intel->vtbl.meta_frame_buffer_texture( intel, srcx - dstx, srcy - dsty ); - - intel->vtbl.meta_texture_blend_replace(intel); - - if (intel->driDrawable->numClipRects) - intel->vtbl.meta_draw_quad( intel, - dstx, dstx + width, - dsty, dsty + height, - ctx->Current.RasterPos[ 2 ], - 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0 ); - - intel->vtbl.leave_meta_state( intel ); - - DBG("%s: success\n", __FUNCTION__); - return GL_TRUE; -} - -/** - * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. - */ -static GLboolean -do_blit_copypixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *dst = intel_drawbuf_region(intel); - struct intel_region *src = copypix_src_region(intel, type); - - /* Copypixels can be more than a straight copy. Ensure all the - * extra operations are disabled: - */ - if (!intel_check_blit_fragment_ops(ctx) || - ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) - return GL_FALSE; - - if (!src || !dst) - return GL_FALSE; - - - - intelFlush(&intel->ctx); - -/* intel->vtbl.render_start(intel); */ -/* intel->vtbl.emit_state(intel); */ - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t dest_rect; - GLint nbox = dPriv->numClipRects; - GLint delta_x = 0; - GLint delta_y = 0; - GLuint i; - - /* Do scissoring in GL coordinates: - */ - if (ctx->Scissor.Enabled) - { - GLint x = ctx->Scissor.X; - GLint y = ctx->Scissor.Y; - GLuint w = ctx->Scissor.Width; - GLuint h = ctx->Scissor.Height; - GLint dx = dstx - srcx; - GLint dy = dsty - srcy; - - if (!_mesa_clip_to_region(x, y, x+w-1, y+h-1, &dstx, &dsty, &width, &height)) - goto out; - - srcx = dstx - dx; - srcy = dsty - dy; - } - - /* Convert from GL to hardware coordinates: - */ - dsty = dPriv->h - dsty - height; - srcy = dPriv->h - srcy - height; - dstx += dPriv->x; - dsty += dPriv->y; - srcx += dPriv->x; - srcy += dPriv->y; - - /* Clip against the source region. This is the only source - * clipping we do. Dst is clipped with cliprects below. - */ - { - delta_x = srcx - dstx; - delta_y = srcy - dsty; - - if (!_mesa_clip_to_region(0, 0, src->pitch, src->height, - &srcx, &srcy, &width, &height)) - goto out; - - dstx = srcx - delta_x; - dsty = srcy - delta_y; - } - - dest_rect.x1 = dstx; - dest_rect.y1 = dsty; - dest_rect.x2 = dstx + width; - dest_rect.y2 = dsty + height; - -/* intel->vtbl.emit_flush(intel, 0); */ - - /* Could do slightly more clipping: Eg, take the intersection of - * the existing set of cliprects and those cliprects translated - * by delta_x, delta_y: - * - * This code will not overwrite other windows, but will - * introduce garbage when copying from obscured window regions. - */ - for (i = 0; i < nbox; i++) { - drm_clip_rect_t rect; - - if (!intel_intersect_cliprects(&rect, &dest_rect, &box[i])) - continue; - - - intelEmitCopyBlit(intel, - dst->cpp, - src->pitch, src->buffer, 0, src->tiled, - dst->pitch, dst->buffer, 0, dst->tiled, - rect.x1 + delta_x, - rect.y1 + delta_y, /* srcx, srcy */ - rect.x1, rect.y1, /* dstx, dsty */ - rect.x2 - rect.x1, rect.y2 - rect.y1, - ctx->Color.ColorLogicOpEnabled ? - ctx->Color.LogicOp : GL_COPY); - } - - intel->need_flush = GL_TRUE; - out: - intel_batchbuffer_flush(intel->batch); - } - UNLOCK_HARDWARE(intel); - return GL_TRUE; -} - -void -intelCopyPixels(GLcontext * ctx, - GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint destx, GLint desty, GLenum type) -{ - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) - return; - - if (do_texture_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) - return; - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("fallback to _swrast_CopyPixels\n"); - - _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); -} +../intel/intel_pixel_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c new file mode 120000 index 00000000000..8431a24edfc --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -0,0 +1 @@ +../intel/intel_pixel_draw.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h deleted file mode 100644 index 3c448b3559a..00000000000 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ /dev/null @@ -1,91 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef _INTEL_REG_H_ -#define _INTEL_REG_H_ - - - -#define CMD_3D (0x3<<29) - - -#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) -#define PRIM_INDIRECT (1<<23) -#define PRIM_INLINE (0<<23) -#define PRIM_INDIRECT_SEQUENTIAL (0<<17) -#define PRIM_INDIRECT_ELTS (1<<17) - -#define PRIM3D_TRILIST (0x0<<18) -#define PRIM3D_TRISTRIP (0x1<<18) -#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) -#define PRIM3D_TRIFAN (0x3<<18) -#define PRIM3D_POLY (0x4<<18) -#define PRIM3D_LINELIST (0x5<<18) -#define PRIM3D_LINESTRIP (0x6<<18) -#define PRIM3D_RECTLIST (0x7<<18) -#define PRIM3D_POINTLIST (0x8<<18) -#define PRIM3D_DIB (0x9<<18) -#define PRIM3D_MASK (0x1f<<18) - -#define I915PACKCOLOR4444(r,g,b,a) \ - ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) - -#define I915PACKCOLOR1555(r,g,b,a) \ - ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ - ((a) ? 0x8000 : 0)) - -#define I915PACKCOLOR565(r,g,b) \ - ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) - -#define I915PACKCOLOR8888(r,g,b,a) \ - ((a<<24) | (r<<16) | (g<<8) | b) - - - - -#define BR00_BITBLT_CLIENT 0x40000000 -#define BR00_OP_COLOR_BLT 0x10000000 -#define BR00_OP_SRC_COPY_BLT 0x10C00000 -#define BR13_SOLID_PATTERN 0x80000000 - -#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) -#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) -#define XY_COLOR_BLT_WRITE_RGB (1<<20) - -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) - -#define XY_SRC_TILED (1<<15) -#define XY_DST_TILED (1<<11) - -#define FENCE_LINEAR 0 -#define FENCE_XMAJOR 1 -#define FENCE_YMAJOR 2 - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c index 835ecdd7257..89b2f15c10f 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_regions.c +++ b/src/mesa/drivers/dri/i965/intel_regions.c @@ -1,295 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Provide additional functionality on top of bufmgr buffers: - * - 2d semantics and blit operations - * - refcounting of buffers for multiple images in a buffer. - * - refcounting of buffer mappings. - * - some logic for moving the buffers to the best memory pools for - * given operations. - * - * Most of this is to make it easier to implement the fixed-layout - * mipmap tree required by intel hardware in the face of GL's - * programming interface where each image can be specifed in random - * order and it isn't clear what layout the tree should have until the - * last moment. - */ - -#include "intel_context.h" -#include "intel_regions.h" -#include "intel_blit.h" -#include "bufmgr.h" -#include "imports.h" - -/* XXX: Thread safety? - */ -GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *region) -{ - DBG("%s\n", __FUNCTION__); - if (!region->map_refcount++) { - region->map = bmMapBuffer(intel, region->buffer, 0); - if (!region->map) - region->map_refcount--; - } - - return region->map; -} - -void intel_region_unmap(struct intel_context *intel, - struct intel_region *region) -{ - DBG("%s\n", __FUNCTION__); - if (!--region->map_refcount) { - bmUnmapBufferAUB(intel, region->buffer, 0, 0); - region->map = NULL; - } -} - -struct intel_region *intel_region_alloc( struct intel_context *intel, - GLuint cpp, - GLuint pitch, - GLuint height ) -{ - struct intel_region *region = calloc(sizeof(*region), 1); - - DBG("%s %dx%dx%d == 0x%x bytes\n", __FUNCTION__, - cpp, pitch, height, cpp*pitch*height); - - region->cpp = cpp; - region->pitch = pitch; - region->height = height; /* needed? */ - region->refcount = 1; - - bmGenBuffers(intel, "tex", 1, ®ion->buffer, 6); - bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0); - - return region; -} - -void intel_region_reference( struct intel_region **dst, - struct intel_region *src) -{ - src->refcount++; - assert(*dst == NULL); - *dst = src; -} - -void intel_region_release( struct intel_context *intel, - struct intel_region **region ) -{ - if (!*region) - return; - - DBG("%s %d\n", __FUNCTION__, (*region)->refcount-1); - - if (--(*region)->refcount == 0) { - assert((*region)->map_refcount == 0); - bmDeleteBuffers(intel, 1, &(*region)->buffer); - free(*region); - } - *region = NULL; -} - - -struct intel_region *intel_region_create_static( struct intel_context *intel, - GLuint mem_type, - GLuint offset, - void *virtual, - GLuint cpp, - GLuint pitch, - GLuint height, - GLuint size, - GLboolean tiled ) -{ - struct intel_region *region = calloc(sizeof(*region), 1); - GLint pool; - - DBG("%s\n", __FUNCTION__); - - region->cpp = cpp; - region->pitch = pitch; - region->height = height; /* needed? */ - region->refcount = 1; - region->tiled = tiled; - - /* Recipe for creating a static buffer - create a static pool with - * the right offset and size, generate a buffer and use a special - * call to bind it to all of the memory in that pool. - */ - pool = bmInitPool(intel, offset, virtual, size, - (BM_MEM_AGP | - BM_NO_UPLOAD | - BM_NO_EVICT | - BM_NO_MOVE)); - if (pool < 0) { - _mesa_printf("bmInitPool failed for static region\n"); - exit(1); - } - - region->buffer = bmGenBufferStatic(intel, pool); - - return region; -} - - - - -void _mesa_copy_rect( GLubyte *dst, - GLuint cpp, - GLuint dst_pitch, - GLuint dst_x, - GLuint dst_y, - GLuint width, - GLuint height, - const GLubyte *src, - GLuint src_pitch, - GLuint src_x, - GLuint src_y ) -{ - GLuint i; - - dst_pitch *= cpp; - src_pitch *= cpp; - dst += dst_x * cpp; - src += src_x * cpp; - dst += dst_y * dst_pitch; - src += src_y * dst_pitch; - width *= cpp; - - if (width == dst_pitch && - width == src_pitch) - do_memcpy(dst, src, height * width); - else { - for (i = 0; i < height; i++) { - do_memcpy(dst, src, width); - dst += dst_pitch; - src += src_pitch; - } - } -} - - -/* Upload data to a rectangular sub-region. Lots of choices how to do this: - * - * - memcpy by span to current destination - * - upload data as new buffer and blit - * - * Currently always memcpy. - */ -GLboolean intel_region_data(struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - const void *src, GLuint src_pitch, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height) -{ - DBG("%s\n", __FUNCTION__); - - if (width == dst->pitch && - width == src_pitch && - dst_offset == 0 && - height == dst->height && - srcx == 0 && - srcy == 0) - { - return (bmBufferDataAUB(intel, - dst->buffer, - dst->cpp * width * dst->height, - src, 0, 0, 0) == 0); - } - else { - GLubyte *map = intel_region_map(intel, dst); - - if (map) { - assert (dst_offset + dstx + width + - (dsty + height - 1) * dst->pitch * dst->cpp <= - dst->pitch * dst->cpp * dst->height); - - _mesa_copy_rect(map + dst_offset, - dst->cpp, - dst->pitch, - dstx, dsty, - width, height, - src, - src_pitch, - srcx, srcy); - - intel_region_unmap(intel, dst); - return GL_TRUE; - } - else - return GL_FALSE; - } -} - -/* Copy rectangular sub-regions. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -void intel_region_copy( struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - struct intel_region *src, - GLuint src_offset, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height ) -{ - DBG("%s\n", __FUNCTION__); - - assert(src->cpp == dst->cpp); - - intelEmitCopyBlit(intel, - dst->cpp, - src->pitch, src->buffer, src_offset, src->tiled, - dst->pitch, dst->buffer, dst_offset, dst->tiled, - srcx, srcy, - dstx, dsty, - width, height, - GL_COPY ); -} - -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -void intel_region_fill( struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - GLuint width, GLuint height, - GLuint color ) -{ - DBG("%s\n", __FUNCTION__); - - intelEmitFillBlit(intel, - dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiled, - dstx, dsty, - width, height, - color ); -} - +../intel/intel_regions.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_regions.h b/src/mesa/drivers/dri/i965/intel_regions.h deleted file mode 100644 index d2235f1275b..00000000000 --- a/src/mesa/drivers/dri/i965/intel_regions.h +++ /dev/null @@ -1,140 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_REGIONS_H -#define INTEL_REGIONS_H - -#include "mtypes.h" -#include "bufmgr.h" /* for DBG! */ -struct intel_context; - -/* A layer on top of the bufmgr buffers that adds a few useful things: - * - * - Refcounting for local buffer references. - * - Refcounting for buffer maps - * - Buffer dimensions - pitch and height. - * - Blitter commands for copying 2D regions between buffers. - */ -struct intel_region { - struct buffer *buffer; - GLuint refcount; - GLuint cpp; - GLuint pitch; - GLuint height; - GLboolean tiled; - GLubyte *map; - GLuint map_refcount; -}; - -/* Allocate a refcounted region. Pointers to regions should only be - * copied by calling intel_reference_region(). - * - * No support for dynamically allocating tiled regions at this point. - */ -struct intel_region *intel_region_alloc( struct intel_context *intel, - GLuint cpp, - GLuint pitch, - GLuint height ); - -void intel_region_reference( struct intel_region **dst, - struct intel_region *src ); - -void intel_region_release(struct intel_context *intel, - struct intel_region **ib ); - -/* Static regions may be tiled. The assumption is that the X server - * has set up fence registers to define tiled zones in agp and these - * buffers are within those zones. Tiling regions without fence - * registers is more work. - */ -struct intel_region *intel_region_create_static( struct intel_context *intel, - GLuint mem_type, - GLuint offset, - void *virtual, - GLuint cpp, - GLuint pitch, - GLuint height, - GLuint size, - GLboolean tiled ); - -/* Map/unmap regions. This is refcounted also: - */ -GLubyte *intel_region_map(struct intel_context *intel, - struct intel_region *ib); - -void intel_region_unmap(struct intel_context *intel, - struct intel_region *ib); - - -/* Upload data to a rectangular sub-region - */ -GLboolean intel_region_data(struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - const void *src, GLuint src_stride, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height); - -/* Copy rectangular sub-regions - */ -void intel_region_copy( struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - struct intel_region *src, - GLuint src_offset, - GLuint srcx, GLuint srcy, - GLuint width, GLuint height ); - -/* Fill a rectangular sub-region - */ -void intel_region_fill( struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - GLuint width, GLuint height, - GLuint color ); - - -/*********************************************************************** - * Misc utilities: move to somewhere generic - */ -void _mesa_copy_rect( GLubyte *dst, - GLuint cpp, - GLuint dst_pitch, - GLuint dst_x, - GLuint dst_y, - GLuint width, - GLuint height, - const GLubyte *src, - GLuint src_pitch, - GLuint src_x, - GLuint src_y ); - - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index bc3dd30b7ed..f2db48272b9 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1,701 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "context.h" -#include "framebuffer.h" -#include "matrix.h" -#include "renderbuffer.h" -#include "simple_list.h" -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - - -#include "intel_screen.h" - -#include "intel_context.h" -#include "intel_tex.h" -#include "intel_span.h" -#include "intel_ioctl.h" - -#include "i830_dri.h" - -PUBLIC const char __driConfigOptions[] = -DRI_CONF_BEGIN - DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) - DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) - DRI_CONF_SECTION_END - DRI_CONF_SECTION_QUALITY - DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) - DRI_CONF_SECTION_END -DRI_CONF_END; -const GLuint __driNConfigOptions = 4; - -#ifdef USE_NEW_INTERFACE -static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -#endif /*USE_NEW_INTERFACE*/ - -/** - * Map all the memory regions described by the screen. - * \return GL_TRUE if success, GL_FALSE if error. - */ -GLboolean -intelMapScreenRegions(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - if (intelScreen->front.handle) { - if (drmMap(sPriv->fd, - intelScreen->front.handle, - intelScreen->front.size, - (drmAddress *)&intelScreen->front.map) != 0) { - _mesa_problem(NULL, "drmMap(frontbuffer) failed!"); - return GL_FALSE; - } - } else { - /* Use the old static allocation method if the server isn't setting up - * a movable handle for us. Add in the front buffer offset from - * framebuffer start, as our span routines (unlike other drivers) expect - * the renderbuffer address to point to the beginning of the - * renderbuffer. - */ - intelScreen->front.map = (char *)sPriv->pFB; - if (intelScreen->front.map == NULL) { - fprintf(stderr, "Failed to find framebuffer mapping\n"); - return GL_FALSE; - } - } - - if (drmMap(sPriv->fd, - intelScreen->back.handle, - intelScreen->back.size, - (drmAddress *)&intelScreen->back.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (drmMap(sPriv->fd, - intelScreen->depth.handle, - intelScreen->depth.size, - (drmAddress *)&intelScreen->depth.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (drmMap(sPriv->fd, - intelScreen->tex.handle, - intelScreen->tex.size, - (drmAddress *)&intelScreen->tex.map) != 0) { - intelUnmapScreenRegions(intelScreen); - return GL_FALSE; - } - - if (0) - printf("Mappings: front: %p back: %p depth: %p tex: %p\n", - intelScreen->front.map, - intelScreen->back.map, - intelScreen->depth.map, - intelScreen->tex.map); - return GL_TRUE; -} - - -void -intelUnmapScreenRegions(intelScreenPrivate *intelScreen) -{ -#define REALLY_UNMAP 1 - /* If front.handle is present, we're doing the dynamic front buffer mapping, - * but if we've fallen back to static allocation then we shouldn't try to - * unmap here. - */ - if (intelScreen->front.handle) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0) - printf("drmUnmap front failed!\n"); -#endif - intelScreen->front.map = NULL; - } - if (intelScreen->back.map) { -#if REALLY_UNMAP - if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0) - printf("drmUnmap back failed!\n"); -#endif - intelScreen->back.map = NULL; - } - if (intelScreen->depth.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->depth.map, intelScreen->depth.size); - intelScreen->depth.map = NULL; -#endif - } - if (intelScreen->tex.map) { -#if REALLY_UNMAP - drmUnmap(intelScreen->tex.map, intelScreen->tex.size); - intelScreen->tex.map = NULL; -#endif - } -} - - -static void -intelPrintDRIInfo(intelScreenPrivate *intelScreen, - __DRIscreenPrivate *sPriv, - I830DRIPtr gDRIPriv) -{ - fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->front.size, intelScreen->front.offset, - intelScreen->front.pitch); - fprintf(stderr, "*** Back size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->back.size, intelScreen->back.offset, - intelScreen->back.pitch); - fprintf(stderr, "*** Depth size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->depth.size, intelScreen->depth.offset, - intelScreen->depth.pitch); - fprintf(stderr, "*** Rotated size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->rotated.size, intelScreen->rotated.offset, - intelScreen->rotated.pitch); - fprintf(stderr, "*** Texture size: 0x%x offset: 0x%x\n", - intelScreen->tex.size, intelScreen->tex.offset); - fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); -} - - -static void -intelPrintSAREA(volatile drmI830Sarea *sarea) -{ - fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, sarea->height); - fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); - fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); - fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); - fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->tex_offset, sarea->tex_size, - (unsigned) sarea->tex_handle); - fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation); - fprintf(stderr, - "SAREA: rotated offset: 0x%08x size: 0x%x\n", - sarea->rotated_offset, sarea->rotated_size); - fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch); -} - - -/** - * A number of the screen parameters are obtained/computed from - * information in the SAREA. This function updates those parameters. - */ -void -intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, - volatile drmI830Sarea *sarea) -{ - intelScreen->width = sarea->width; - intelScreen->height = sarea->height; - - intelScreen->front.offset = sarea->front_offset; - intelScreen->front.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->front.handle = sarea->front_handle; - intelScreen->front.size = sarea->front_size; - intelScreen->front.tiled = sarea->front_tiled; - - intelScreen->back.offset = sarea->back_offset; - intelScreen->back.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->back.handle = sarea->back_handle; - intelScreen->back.size = sarea->back_size; - intelScreen->back.tiled = sarea->back_tiled; - - intelScreen->depth.offset = sarea->depth_offset; - intelScreen->depth.pitch = sarea->pitch * intelScreen->cpp; - intelScreen->depth.handle = sarea->depth_handle; - intelScreen->depth.size = sarea->depth_size; - intelScreen->depth.tiled = sarea->depth_tiled; - - intelScreen->tex.offset = sarea->tex_offset; - intelScreen->logTextureGranularity = sarea->log_tex_granularity; - intelScreen->tex.handle = sarea->tex_handle; - intelScreen->tex.size = sarea->tex_size; - - intelScreen->rotated.offset = sarea->rotated_offset; - intelScreen->rotated.pitch = sarea->rotated_pitch * intelScreen->cpp; - intelScreen->rotated.size = sarea->rotated_size; - intelScreen->rotated.tiled = sarea->rotated_tiled; - intelScreen->current_rotation = sarea->rotation; -#if 0 - matrix23Rotate(&intelScreen->rotMatrix, - sarea->width, sarea->height, sarea->rotation); -#endif - intelScreen->rotatedWidth = sarea->virtualX; - intelScreen->rotatedHeight = sarea->virtualY; - - if (0) - intelPrintSAREA(sarea); -} - - -static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen; - I830DRIPtr gDRIPriv = (I830DRIPtr)sPriv->pDevPriv; - PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = - (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface->getProcAddress("glxEnableExtension")); - void * const psc = sPriv->psc->screenConfigs; - volatile drmI830Sarea *sarea; - - if (sPriv->devPrivSize != sizeof(I830DRIRec)) { - fprintf(stderr,"\nERROR! sizeof(I830DRIRec) (%ld) does not match passed size from device driver (%d)\n", (unsigned long)sizeof(I830DRIRec), sPriv->devPrivSize); - return GL_FALSE; - } - - /* Allocate the private area */ - intelScreen = (intelScreenPrivate *)CALLOC(sizeof(intelScreenPrivate)); - if (!intelScreen) { - fprintf(stderr,"\nERROR! Allocating private area failed\n"); - return GL_FALSE; - } - /* parse information in __driConfigOptions */ - driParseOptionInfo (&intelScreen->optionCache, - __driConfigOptions, __driNConfigOptions); - - intelScreen->driScrnPriv = sPriv; - sPriv->private = (void *)intelScreen; - intelScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset; - sarea = (volatile drmI830Sarea *) - (((GLubyte *)sPriv->pSAREA)+intelScreen->sarea_priv_offset); - - intelScreen->deviceID = gDRIPriv->deviceID; - intelScreen->mem = gDRIPriv->mem; - intelScreen->cpp = gDRIPriv->cpp; - - switch (gDRIPriv->bitsPerPixel) { - case 15: intelScreen->fbFormat = DV_PF_555; break; - case 16: intelScreen->fbFormat = DV_PF_565; break; - case 32: intelScreen->fbFormat = DV_PF_8888; break; - } - - intelUpdateScreenFromSAREA(intelScreen, sarea); - - if (0) - intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); - - if (!intelMapScreenRegions(sPriv)) { - fprintf(stderr,"\nERROR! mapping regions\n"); - _mesa_free(intelScreen); - sPriv->private = NULL; - return GL_FALSE; - } - - intelScreen->drmMinor = sPriv->drmMinor; - - /* Determine if IRQs are active? */ - { - int ret; - drmI830GetParam gp; - - gp.param = I830_PARAM_IRQ_ACTIVE; - gp.value = &intelScreen->irq_active; - - ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drmI830GetParam: %d\n", ret); - return GL_FALSE; - } - } - - /* Determine if batchbuffers are allowed */ - { - int ret; - drmI830GetParam gp; - - gp.param = I830_PARAM_ALLOW_BATCHBUFFER; - gp.value = &intelScreen->allow_batchbuffer; - - ret = drmCommandWriteRead( sPriv->fd, DRM_I830_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "drmI830GetParam: (%d) %d\n", gp.param, ret); - return GL_FALSE; - } - } - - if (glx_enable_extension != NULL) { - (*glx_enable_extension)( psc, "GLX_SGI_swap_control" ); - (*glx_enable_extension)( psc, "GLX_SGI_video_sync" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_control" ); - (*glx_enable_extension)( psc, "GLX_MESA_swap_frame_usage" ); - (*glx_enable_extension)( psc, "GLX_SGI_make_current_read" ); - (*glx_enable_extension)( psc, "GLX_MESA_copy_sub_buffer" ); - } - - return GL_TRUE; -} - - -static void intelDestroyScreen(__DRIscreenPrivate *sPriv) -{ - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - - intelUnmapScreenRegions(intelScreen); - FREE(intelScreen); - sPriv->private = NULL; -} - -static GLboolean intelCreateBuffer( __DRIscreenPrivate *driScrnPriv, - __DRIdrawablePrivate *driDrawPriv, - const __GLcontextModes *mesaVis, - GLboolean isPixmap ) -{ - intelScreenPrivate *screen = (intelScreenPrivate *) driScrnPriv->private; - - if (isPixmap) { - return GL_FALSE; /* not implemented */ - } else { - GLboolean swStencil = (mesaVis->stencilBits > 0 && - mesaVis->depthBits != 24); - - struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); - - { - driRenderbuffer *frontRb - = driNewRenderbuffer(GL_RGBA, - screen->front.map, - screen->cpp, - screen->front.offset, screen->front.pitch, - driDrawPriv); - intelSetSpanFunctions(frontRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); - } - - if (mesaVis->doubleBufferMode) { - driRenderbuffer *backRb - = driNewRenderbuffer(GL_RGBA, - screen->back.map, - screen->cpp, - screen->back.offset, screen->back.pitch, - driDrawPriv); - intelSetSpanFunctions(backRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); - } - - if (mesaVis->depthBits == 16) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT16, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - else if (mesaVis->depthBits == 24) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT24, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - - if (mesaVis->stencilBits > 0 && !swStencil) { - driRenderbuffer *stencilRb - = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, - screen->depth.map, - screen->cpp, - screen->depth.offset, screen->depth.pitch, - driDrawPriv); - intelSetSpanFunctions(stencilRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); - } - - _mesa_add_soft_renderbuffers(fb, - GL_FALSE, /* color */ - GL_FALSE, /* depth */ - swStencil, - mesaVis->accumRedBits > 0, - GL_FALSE, /* alpha */ - GL_FALSE /* aux */); - driDrawPriv->driverPrivate = (void *) fb; - - return (driDrawPriv->driverPrivate != NULL); - } -} - -static void intelDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) -{ - _mesa_unreference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate))); -} - - -/** - * Get information about previous buffer swaps. - */ -static int -intelGetSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) -{ - struct intel_context *intel; - - if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) - || (dPriv->driContextPriv->driverPrivate == NULL) - || (sInfo == NULL) ) { - return -1; - } - - intel = dPriv->driContextPriv->driverPrivate; - sInfo->swap_count = intel->swap_count; - sInfo->swap_ust = intel->swap_ust; - sInfo->swap_missed_count = intel->swap_missed_count; - - sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) - ? driCalculateSwapUsage( dPriv, 0, intel->swap_missed_ust ) - : 0.0; - - return 0; -} - - -/* There are probably better ways to do this, such as an - * init-designated function to register chipids and createcontext - * functions. - */ -extern GLboolean i830CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - -extern GLboolean i915CreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - -extern GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); - - - - -static GLboolean intelCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) -{ -#if 0 - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - intelScreenPrivate *intelScreen = (intelScreenPrivate *)sPriv->private; - switch (intelScreen->deviceID) { - case PCI_CHIP_845_G: - case PCI_CHIP_I830_M: - case PCI_CHIP_I855_GM: - case PCI_CHIP_I865_G: - return i830CreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); - - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - return i915CreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); - - default: - fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID); - return GL_FALSE; - } -#else - return brwCreateContext( mesaVis, driContextPriv, - sharedContextPrivate ); -#endif -} - - -static const struct __DriverAPIRec intelAPI = { - .InitDriver = intelInitDriver, - .DestroyScreen = intelDestroyScreen, - .CreateContext = intelCreateContext, - .DestroyContext = intelDestroyContext, - .CreateBuffer = intelCreateBuffer, - .DestroyBuffer = intelDestroyBuffer, - .SwapBuffers = intelSwapBuffers, - .MakeCurrent = intelMakeCurrent, - .UnbindContext = intelUnbindContext, - .GetSwapInfo = intelGetSwapInfo, - .GetMSC = driGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = intelCopySubBuffer -}; - - -static __GLcontextModes * -intelFillInModes( unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, GLboolean have_back_buffer ) -{ - __GLcontextModes * modes; - __GLcontextModes * m; - unsigned num_modes; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - uint8_t depth_bits_array[3]; - uint8_t stencil_bits_array[3]; - - - depth_bits_array[0] = 0; - depth_bits_array[1] = depth_bits; - depth_bits_array[2] = depth_bits; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = 0; - stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; - - depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - num_modes = depth_buffer_factor * back_buffer_factor * 4; - - if ( pixel_bits == 16 ) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } - - modes = (*dri_interface->createContextModes)( num_modes, sizeof( __GLcontextModes ) ); - m = modes; - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_TRUE_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - if ( ! driFillInModes( & m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, depth_buffer_factor, - back_buffer_modes, back_buffer_factor, - GLX_DIRECT_COLOR ) ) { - fprintf( stderr, "[%s:%u] Error creating FBConfig!\n", - __func__, __LINE__ ); - return NULL; - } - - /* Mark the visual as slow if there are "fake" stencil bits. - */ - for ( m = modes ; m != NULL ; m = m->next ) { - if ( (m->stencilBits != 0) && (m->stencilBits != stencil_bits) ) { - m->visualRating = GLX_SLOW_CONFIG; - } - } - - return modes; -} - - -/** - * This is the bootstrap function for the driver. libGL supplies all of the - * requisite information about the system, and the driver initializes itself. - * This routine also fills in the linked list pointed to by \c driver_modes - * with the \c __GLcontextModes that the driver can support for windows or - * pbuffers. - * - * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on - * failure. - */ -PUBLIC -void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIscreen *psc, - const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - drmAddress pSAREA, int fd, - int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes ) - -{ - __DRIscreenPrivate *psp; - static const __DRIversion ddx_expected = { 1, 6, 0 }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = { 1, 3, 0 }; - - dri_interface = interface; - - if ( ! driCheckDriDdxDrmVersions2( "i915", - dri_version, & dri_expected, - ddx_version, & ddx_expected, - drm_version, & drm_expected ) ) { - return NULL; - } - - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, &intelAPI); - if ( psp != NULL ) { - I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; - *driver_modes = intelFillInModes( dri_priv->cpp * 8, - (dri_priv->cpp == 2) ? 16 : 24, - (dri_priv->cpp == 2) ? 0 : 8, - GL_TRUE ); - /* Calling driInitExtensions here, with a NULL context pointer, does not actually - * enable the extensions. It just makes sure that all the dispatch offsets for all - * the extensions that *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create is called, but we can't - * enable the extensions until we have a context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - intelInitExtensions(NULL, GL_FALSE); - } - - return (void *) psp; -} +../intel/intel_screen.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h deleted file mode 100644 index bf9a716082d..00000000000 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ /dev/null @@ -1,114 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef _INTEL_INIT_H_ -#define _INTEL_INIT_H_ - -#include <sys/time.h> -#include "dri_util.h" -#include "xmlconfig.h" -#include "i830_common.h" - -/* XXX: change name or eliminate to avoid conflict with "struct - * intel_region"!!! - */ -typedef struct { - drm_handle_t handle; - drmSize size; /* region size in bytes */ - char *map; /* memory map */ - int offset; /* from start of video mem, in bytes */ - int pitch; /* row stride, in pixels */ - unsigned int tiled; -} intelRegion; - -typedef struct -{ - intelRegion front; - intelRegion back; - intelRegion rotated; - intelRegion depth; - intelRegion tex; - - int deviceID; - int width; - int height; - int mem; /* unused */ - - int cpp; /* for front and back buffers */ - int fbFormat; - - int logTextureGranularity; - - __DRIscreenPrivate *driScrnPriv; - unsigned int sarea_priv_offset; - - int drmMinor; - - int irq_active; - int allow_batchbuffer; - -/* struct matrix23 rotMatrix; */ - - int current_rotation; /* 0, 90, 180 or 270 */ - int rotatedWidth, rotatedHeight; - - /** - * Configuration cache with default values for all contexts - */ - driOptionCache optionCache; -} intelScreenPrivate; - - -extern GLboolean -intelMapScreenRegions(__DRIscreenPrivate *sPriv); - -extern void -intelUnmapScreenRegions(intelScreenPrivate *intelScreen); - -extern void -intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen, - volatile drmI830Sarea *sarea); - -extern void -intelDestroyContext(__DRIcontextPrivate *driContextPriv); - -extern GLboolean -intelUnbindContext(__DRIcontextPrivate *driContextPriv); - -extern GLboolean -intelMakeCurrent(__DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv); - -extern void -intelSwapBuffers(__DRIdrawablePrivate *dPriv); - -extern void -intelCopySubBuffer( __DRIdrawablePrivate *dPriv, - int x, int y, int w, int h ); - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_span.c b/src/mesa/drivers/dri/i965/intel_span.c index 60fbeccdc54..05e5e8e5833 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_span.c +++ b/src/mesa/drivers/dri/i965/intel_span.c @@ -1,283 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "glheader.h" -#include "macros.h" -#include "mtypes.h" -#include "colormac.h" - -#include "intel_screen.h" -#include "intel_regions.h" -#include "intel_span.h" -#include "intel_ioctl.h" -#include "intel_tex.h" -#include "intel_batchbuffer.h" -#include "swrast/swrast.h" - -#undef DBG -#define DBG 0 - -#define LOCAL_VARS \ - struct intel_context *intel = intel_context(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *) drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch; \ - GLushort p; \ - (void) buf; (void) p - -#define LOCAL_DEPTH_VARS \ - struct intel_context *intel = intel_context(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *) drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch - -#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS - -#define INIT_MONO_PIXEL(p,color)\ - p = INTEL_PACKCOLOR565(color[0],color[1],color[2]) - -#define Y_FLIP(_y) (height - _y - 1) - -#define HW_LOCK() - -#define HW_UNLOCK() - -/* 16 bit, 565 rgb color spanline and pixel functions - */ -#define WRITE_RGBA( _x, _y, r, g, b, a ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = ( (((int)r & 0xf8) << 8) | \ - (((int)g & 0xfc) << 3) | \ - (((int)b & 0xf8) >> 3)) -#define WRITE_PIXEL( _x, _y, p ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = p - -#define READ_RGBA( rgba, _x, _y ) \ -do { \ - GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ - rgba[0] = (((p >> 11) & 0x1f) * 255) / 31; \ - rgba[1] = (((p >> 5) & 0x3f) * 255) / 63; \ - rgba[2] = (((p >> 0) & 0x1f) * 255) / 31; \ - rgba[3] = 255; \ -} while(0) - -#define TAG(x) intel##x##_565 -#include "spantmp.h" - -/* 15 bit, 555 rgb color spanline and pixel functions - */ -#define WRITE_RGBA( _x, _y, r, g, b, a ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = (((r & 0xf8) << 7) | \ - ((g & 0xf8) << 3) | \ - ((b & 0xf8) >> 3)) - -#define WRITE_PIXEL( _x, _y, p ) \ - *(GLushort *)(buf + _x*2 + _y*pitch) = p - -#define READ_RGBA( rgba, _x, _y ) \ -do { \ - GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch); \ - rgba[0] = (p >> 7) & 0xf8; \ - rgba[1] = (p >> 3) & 0xf8; \ - rgba[2] = (p << 3) & 0xf8; \ - rgba[3] = 255; \ -} while(0) - -#define TAG(x) intel##x##_555 -#include "spantmp.h" - -/* 16 bit depthbuffer functions. - */ -#define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + (_x)*2 + (_y)*pitch) = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch); - - -#define TAG(x) intel##x##_z16 -#include "depthtmp.h" - - -#undef LOCAL_VARS -#define LOCAL_VARS \ - struct intel_context *intel = intel_context(ctx); \ - __DRIdrawablePrivate *dPriv = intel->driDrawable; \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - GLuint pitch = drb->pitch; \ - GLuint height = dPriv->h; \ - char *buf = (char *)drb->Base.Data + \ - dPriv->x * drb->cpp + \ - dPriv->y * pitch; \ - GLuint p; \ - (void) buf; (void) p - -#undef INIT_MONO_PIXEL -#define INIT_MONO_PIXEL(p,color)\ - p = INTEL_PACKCOLOR8888(color[0],color[1],color[2],color[3]) - -/* 32 bit, 8888 argb color spanline and pixel functions - */ -#define WRITE_RGBA(_x, _y, r, g, b, a) \ - *(GLuint *)(buf + _x*4 + _y*pitch) = ((r << 16) | \ - (g << 8) | \ - (b << 0) | \ - (a << 24) ) - -#define WRITE_PIXEL(_x, _y, p) \ - *(GLuint *)(buf + _x*4 + _y*pitch) = p - - -#define READ_RGBA(rgba, _x, _y) \ - do { \ - GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch); \ - rgba[0] = (p >> 16) & 0xff; \ - rgba[1] = (p >> 8) & 0xff; \ - rgba[2] = (p >> 0) & 0xff; \ - rgba[3] = (p >> 24) & 0xff; \ - } while (0) - -#define TAG(x) intel##x##_8888 -#include "spantmp.h" - - -/* 24/8 bit interleaved depth/stencil functions - */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ - tmp &= 0xff000000; \ - tmp |= (d) & 0xffffff; \ - *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ -} - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) & 0xffffff; - - -#define TAG(x) intel##x##_z24_s8 -#include "depthtmp.h" - -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch); \ - tmp &= 0xffffff; \ - tmp |= ((d)<<24); \ - *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp; \ -} - -#define READ_STENCIL( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch) >> 24; - -#define TAG(x) intel##x##_z24_s8 -#include "stenciltmp.h" - - -/* Move locking out to get reasonable span performance. - */ -void intelSpanRenderStart( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context(ctx); - - if (intel->need_flush) { - LOCK_HARDWARE(intel); - intel->vtbl.emit_flush(intel, 0); - intel_batchbuffer_flush(intel->batch); - intel->need_flush = 0; - UNLOCK_HARDWARE(intel); - intelFinish(&intel->ctx); - } - - - LOCK_HARDWARE(intel); - - /* Just map the framebuffer and all textures. Bufmgr code will - * take care of waiting on the necessary fences: - */ - intel_region_map(intel, intel->front_region); - intel_region_map(intel, intel->back_region); - intel_region_map(intel, intel->depth_region); -} - -void intelSpanRenderFinish( GLcontext *ctx ) -{ - struct intel_context *intel = intel_context( ctx ); - - _swrast_flush( ctx ); - - /* Now unmap the framebuffer: - */ - intel_region_unmap(intel, intel->front_region); - intel_region_unmap(intel, intel->back_region); - intel_region_unmap(intel, intel->depth_region); - - UNLOCK_HARDWARE( intel ); -} - -void intelInitSpanFuncs( GLcontext *ctx ) -{ - struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); - swdd->SpanRenderStart = intelSpanRenderStart; - swdd->SpanRenderFinish = intelSpanRenderFinish; -} - - -/** - * Plug in the Get/Put routines for the given driRenderbuffer. - */ -void -intelSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) -{ - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 5 && vis->blueBits == 5) { - intelInitPointers_555(&drb->Base); - } - else if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { - intelInitPointers_565(&drb->Base); - } - else { - assert(vis->redBits == 8); - assert(vis->greenBits == 8); - assert(vis->blueBits == 8); - intelInitPointers_8888(&drb->Base); - } - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - intelInitDepthPointers_z16(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - intelInitDepthPointers_z24_s8(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - intelInitStencilPointers_z24_s8(&drb->Base); - } -} +../intel/intel_span.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_span.h b/src/mesa/drivers/dri/i965/intel_span.h deleted file mode 100644 index 2d4f8589d0f..00000000000 --- a/src/mesa/drivers/dri/i965/intel_span.h +++ /dev/null @@ -1,41 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef _INTEL_SPAN_H -#define _INTEL_SPAN_H - -#include "drirenderbuffer.h" - -extern void intelInitSpanFuncs( GLcontext *ctx ); - -extern void intelSpanRenderFinish( GLcontext *ctx ); -extern void intelSpanRenderStart( GLcontext *ctx ); - -extern void -intelSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c index 2e442db6198..67ef5f78c1a 100644 --- a/src/mesa/drivers/dri/i965/intel_state.c +++ b/src/mesa/drivers/dri/i965/intel_state.c @@ -26,18 +26,43 @@ **************************************************************************/ -#include "glheader.h" -#include "context.h" -#include "macros.h" -#include "enums.h" -#include "colormac.h" -#include "dd.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/enums.h" +#include "main/colormac.h" +#include "main/dd.h" #include "intel_screen.h" #include "intel_context.h" #include "intel_regions.h" #include "swrast/swrast.h" +int intel_translate_shadow_compare_func( GLenum func ) +{ + switch(func) { + case GL_NEVER: + return COMPAREFUNC_ALWAYS; + case GL_LESS: + return COMPAREFUNC_LEQUAL; + case GL_LEQUAL: + return COMPAREFUNC_LESS; + case GL_GREATER: + return COMPAREFUNC_GEQUAL; + case GL_GEQUAL: + return COMPAREFUNC_GREATER; + case GL_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case GL_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_ALWAYS: + return COMPAREFUNC_NEVER; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + int intel_translate_compare_func( GLenum func ) { switch(func) { @@ -170,15 +195,16 @@ int intel_translate_logic_op( GLenum opcode ) static void intelClearColor(GLcontext *ctx, const GLfloat color[4]) { struct intel_context *intel = intel_context(ctx); - intelScreenPrivate *screen = intel->intelScreen; UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color); - intel->ClearColor = INTEL_PACKCOLOR(screen->fbFormat, - intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3]); + intel->ClearColor8888 = INTEL_PACKCOLOR8888(intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2], + intel->clear_chan[3]); + intel->ClearColor565 = INTEL_PACKCOLOR565(intel->clear_chan[0], + intel->clear_chan[1], + intel->clear_chan[2]); } diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 4523969bfa5..d77ce749a3e 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -1,315 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "mtypes.h" -#include "image.h" -#include "texstore.h" -#include "texformat.h" -#include "teximage.h" -#include "texobj.h" -#include "swrast/swrast.h" - - -#include "intel_context.h" -#include "intel_tex.h" -#include "intel_mipmap_tree.h" - - -static GLuint target_to_face( GLenum target ) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - -static void intelTexImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_teximage1d( ctx, target, level, internalFormat, - width, border, format, type, - pixels, packing, texObj, texImage ); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1; -} - -static void intelTexSubImage1D( GLcontext *ctx, - GLenum target, - GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1; -} - - -/* Handles 2D, CUBE, RECT: - */ -static void intelTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_teximage2d( ctx, target, level, internalFormat, - width, height, border, format, type, - pixels, packing, texObj, texImage ); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - -static void intelTexSubImage2D( GLcontext *ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - -static void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, - height, border, imageSize, data, texObj, texImage); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - - -static void intelCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint face = target_to_face(target); - - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, imageSize, data, texObj, texImage); - - intelObj->dirty_images[face] |= (1 << level); - intelObj->dirty |= 1 << face; -} - - -static void intelTexImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_teximage3d(ctx, target, level, internalFormat, - width, height, depth, border, - format, type, pixels, - &ctx->Unpack, texObj, texImage); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1 << 0; -} - - -static void -intelTexSubImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); - - intelObj->dirty_images[0] |= (1 << level); - intelObj->dirty |= 1 << 0; -} - - - - -static struct gl_texture_object *intelNewTextureObject( GLcontext *ctx, - GLuint name, - GLenum target ) -{ - struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object); - - _mesa_initialize_texture_object(&obj->base, name, target); - - return &obj->base; -} - -static GLboolean intelIsTextureResident(GLcontext *ctx, - struct gl_texture_object *texObj) -{ -#if 0 - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - return - intelObj->mt && - intelObj->mt->region && - intel_is_region_resident(intel, intelObj->mt->region); -#endif - return 1; -} - - - -static void intelTexParameter( GLcontext *ctx, - GLenum target, - struct gl_texture_object *texObj, - GLenum pname, - const GLfloat *params ) -{ - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - switch (pname) { - /* Anything which can affect the calculation of firstLevel and - * lastLevel, as changes to these may invalidate the miptree. - */ - case GL_TEXTURE_MIN_FILTER: - case GL_TEXTURE_MAG_FILTER: - case GL_TEXTURE_BASE_LEVEL: - case GL_TEXTURE_MAX_LEVEL: - case GL_TEXTURE_MIN_LOD: - case GL_TEXTURE_MAX_LOD: - intelObj->dirty |= 1; - break; - - default: - break; - } -} - - -static void -intel_delete_texture_object( GLcontext *ctx, struct gl_texture_object *texObj ) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - if (intelObj->mt) - intel_miptree_destroy(intel, intelObj->mt); - - _mesa_delete_texture_object( ctx, texObj ); -} - -void intelInitTextureFuncs( struct dd_function_table *functions ) -{ - functions->NewTextureObject = intelNewTextureObject; - functions->TexImage1D = intelTexImage1D; - functions->TexImage2D = intelTexImage2D; - functions->TexImage3D = intelTexImage3D; - functions->TexSubImage1D = intelTexSubImage1D; - functions->TexSubImage2D = intelTexSubImage2D; - functions->TexSubImage3D = intelTexSubImage3D; - functions->CopyTexImage1D = _swrast_copy_teximage1d; - functions->CopyTexImage2D = _swrast_copy_teximage2d; - functions->CopyTexSubImage1D = _swrast_copy_texsubimage1d; - functions->CopyTexSubImage2D = _swrast_copy_texsubimage2d; - functions->CopyTexSubImage3D = _swrast_copy_texsubimage3d; - functions->DeleteTexture = intel_delete_texture_object; - functions->UpdateTexturePalette = NULL; - functions->IsTextureResident = intelIsTextureResident; - functions->TestProxyTexImage = _mesa_test_proxy_teximage; - functions->CompressedTexImage2D = intelCompressedTexImage2D; - functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; - functions->TexParameter = intelTexParameter; -} - - - - - +../intel/intel_tex.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h deleted file mode 100644 index e389d521461..00000000000 --- a/src/mesa/drivers/dri/i965/intel_tex.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTELTEX_INC -#define INTELTEX_INC - -#include "mtypes.h" -#include "intel_context.h" - - -void intelInitTextureFuncs( struct dd_function_table *functions ); - - -GLuint intel_finalize_mipmap_tree( struct intel_context *intel, - struct gl_texture_object *tObj ); - - -#endif diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c new file mode 120000 index 00000000000..87196c5d1ed --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c @@ -0,0 +1 @@ +../intel/intel_tex_copy.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_tex_format.c b/src/mesa/drivers/dri/i965/intel_tex_format.c new file mode 120000 index 00000000000..3415f754705 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_format.c @@ -0,0 +1 @@ +../intel/intel_tex_format.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c new file mode 120000 index 00000000000..567abe4974e --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -0,0 +1 @@ +../intel/intel_tex_image.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c new file mode 120000 index 00000000000..b3a8a3d7ca7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -0,0 +1 @@ +../intel/intel_tex_subimage.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 44ee94614d5..41a75674c27 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -1,256 +1 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "mtypes.h" -#include "macros.h" - -#include "intel_context.h" -#include "intel_mipmap_tree.h" -#include "intel_tex.h" -#include "bufmgr.h" - -/** - * Compute which mipmap levels that really need to be sent to the hardware. - * This depends on the base image size, GL_TEXTURE_MIN_LOD, - * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. - */ -static void intel_calculate_first_last_level( struct intel_texture_object *intelObj ) -{ - struct gl_texture_object *tObj = &intelObj->base; - const struct gl_texture_image * const baseImage = - tObj->Image[0][tObj->BaseLevel]; - - /* These must be signed values. MinLod and MaxLod can be negative numbers, - * and having firstLevel and lastLevel as signed prevents the need for - * extra sign checks. - */ - int firstLevel; - int lastLevel; - - /* Yes, this looks overly complicated, but it's all needed. - */ - switch (tObj->Target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP: - if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { - /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. - */ - firstLevel = lastLevel = tObj->BaseLevel; - } - else { - /* Currently not taking min/max lod into account here, those - * values are programmed as sampler state elsewhere and we - * upload the same mipmap levels regardless. Not sure if - * this makes sense as it means it isn't possible for the app - * to use min/max lod to reduce texture memory pressure: - */ - firstLevel = tObj->BaseLevel; - lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, - tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ - } - break; - case GL_TEXTURE_RECTANGLE_NV: - case GL_TEXTURE_4D_SGIS: - firstLevel = lastLevel = 0; - break; - default: - return; - } - - /* save these values */ - intelObj->firstLevel = firstLevel; - intelObj->lastLevel = lastLevel; -} - -static GLboolean copy_image_data_to_tree( struct intel_context *intel, - struct intel_texture_object *intelObj, - struct gl_texture_image *texImage, - GLuint face, - GLuint level) -{ - return intel_miptree_image_data(intel, - intelObj->mt, - face, - level, - texImage->Data, - texImage->RowStride, - (texImage->RowStride * - texImage->Height * - texImage->TexFormat->TexelBytes)); -} - -static void intel_texture_invalidate( struct intel_texture_object *intelObj ) -{ - GLint nr_faces, face; - intelObj->dirty = ~0; - - nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - for (face = 0; face < nr_faces; face++) - intelObj->dirty_images[face] = ~0; -} - -static void intel_texture_invalidate_cb( struct intel_context *intel, - void *ptr ) -{ - intel_texture_invalidate( (struct intel_texture_object *) ptr ); -} - - -/* - */ -GLuint intel_finalize_mipmap_tree( struct intel_context *intel, - struct gl_texture_object *tObj ) -{ - struct intel_texture_object *intelObj = intel_texture_object(tObj); - GLuint face, i; - GLuint nr_faces = 0; - struct gl_texture_image *firstImage; - - if( tObj == intel->frame_buffer_texobj ) - return GL_FALSE; - - /* We know/require this is true by now: - */ - assert(intelObj->base._Complete); - - /* What levels must the tree include at a minimum? - */ - if (intelObj->dirty) { - intel_calculate_first_last_level( intelObj ); -/* intel_miptree_destroy(intel, intelObj->mt); */ -/* intelObj->mt = NULL; */ - } - - firstImage = intelObj->base.Image[0][intelObj->firstLevel]; - - /* Fallback case: - */ - if (firstImage->Border) { - if (intelObj->mt) { - intel_miptree_destroy(intel, intelObj->mt); - intelObj->mt = NULL; - /* Set all images dirty: - */ - intel_texture_invalidate(intelObj); - } - return GL_FALSE; - } - - - - /* Check tree can hold all active levels. Check tree matches - * target, imageFormat, etc. - */ - if (intelObj->mt && - (intelObj->mt->target != intelObj->base.Target || - intelObj->mt->internal_format != firstImage->InternalFormat || - intelObj->mt->first_level != intelObj->firstLevel || - intelObj->mt->last_level != intelObj->lastLevel || - intelObj->mt->width0 != firstImage->Width || - intelObj->mt->height0 != firstImage->Height || - intelObj->mt->depth0 != firstImage->Depth || - intelObj->mt->cpp != firstImage->TexFormat->TexelBytes || - intelObj->mt->compressed != firstImage->IsCompressed)) - { - intel_miptree_destroy(intel, intelObj->mt); - intelObj->mt = NULL; - - /* Set all images dirty: - */ - intel_texture_invalidate(intelObj); - } - - - /* May need to create a new tree: - */ - if (!intelObj->mt) { - intelObj->mt = intel_miptree_create(intel, - intelObj->base.Target, - firstImage->InternalFormat, - intelObj->firstLevel, - intelObj->lastLevel, - firstImage->Width, - firstImage->Height, - firstImage->Depth, - firstImage->TexFormat->TexelBytes, - firstImage->IsCompressed); - - /* Tell the buffer manager that we will manage the backing - * store, but we still want it to do fencing for us. - */ - bmBufferSetInvalidateCB(intel, - intelObj->mt->region->buffer, - intel_texture_invalidate_cb, - intelObj, - GL_FALSE); - } - - /* Pull in any images not in the object's tree: - */ - if (intelObj->dirty) { - nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - for (face = 0; face < nr_faces; face++) { - if (intelObj->dirty_images[face]) { - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) { - struct gl_texture_image *texImage = intelObj->base.Image[face][i]; - - /* Need to import images in main memory or held in other trees. - */ - if (intelObj->dirty_images[face] & (1<<i) && - texImage) { - - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("copy data from image %d (%p) into object miptree\n", - i, - texImage->Data); - - if (!copy_image_data_to_tree(intel, - intelObj, - texImage, - face, - i)) - return GL_FALSE; - - } - } - } - } - - /* Only clear the dirty flags if everything went ok: - */ - for (face = 0; face < nr_faces; face++) { - intelObj->dirty_images[face] = 0; - } - - intelObj->dirty = 0; - } - - return GL_TRUE; -} +../intel/intel_tex_validate.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/server/i830_common.h b/src/mesa/drivers/dri/i965/server/i830_common.h deleted file mode 100644 index 49eb145f8bf..00000000000 --- a/src/mesa/drivers/dri/i965/server/i830_common.h +++ /dev/null @@ -1,221 +0,0 @@ -/************************************************************************** - -Copyright 2001 VA Linux Systems Inc., Fremont, California. -Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - - -#ifndef _I830_COMMON_H_ -#define _I830_COMMON_H_ - - -#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */ -#define I830_LOG_MIN_TEX_REGION_SIZE 14 - - -/* Driver specific DRM command indices - * NOTE: these are not OS specific, but they are driver specific - */ -#define DRM_I830_INIT 0x00 -#define DRM_I830_FLUSH 0x01 -#define DRM_I830_FLIP 0x02 -#define DRM_I830_BATCHBUFFER 0x03 -#define DRM_I830_IRQ_EMIT 0x04 -#define DRM_I830_IRQ_WAIT 0x05 -#define DRM_I830_GETPARAM 0x06 -#define DRM_I830_SETPARAM 0x07 -#define DRM_I830_ALLOC 0x08 -#define DRM_I830_FREE 0x09 -#define DRM_I830_INIT_HEAP 0x0a -#define DRM_I830_CMDBUFFER 0x0b -#define DRM_I830_DESTROY_HEAP 0x0c -#define DRM_I830_MMIO 0x10 - -typedef struct { - enum { - I830_INIT_DMA = 0x01, - I830_CLEANUP_DMA = 0x02, - I830_RESUME_DMA = 0x03 - } func; - unsigned int mmio_offset; - int sarea_priv_offset; - unsigned int ring_start; - unsigned int ring_end; - unsigned int ring_size; - unsigned int front_offset; - unsigned int back_offset; - unsigned int depth_offset; - unsigned int w; - unsigned int h; - unsigned int pitch; - unsigned int pitch_bits; - unsigned int back_pitch; - unsigned int depth_pitch; - unsigned int cpp; - unsigned int chipset; -} drmI830Init; - -typedef struct { - drmTextureRegion texList[I830_NR_TEX_REGIONS+1]; - int last_upload; /* last time texture was uploaded */ - int last_enqueue; /* last time a buffer was enqueued */ - volatile int last_dispatch; /* age of the most recently dispatched buffer */ - int ctxOwner; /* last context to upload state */ - int texAge; - int pf_enabled; /* is pageflipping allowed? */ - int pf_active; - int pf_current_page; /* which buffer is being displayed? */ - int perf_boxes; /* performance boxes to be displayed */ - int width, height; /* screen size in pixels */ - - drm_handle_t front_handle; - int front_offset; - int front_size; - - drm_handle_t back_handle; - int back_offset; - int back_size; - - drm_handle_t depth_handle; - int depth_offset; - int depth_size; - - drm_handle_t tex_handle; - int tex_offset; - int tex_size; - int log_tex_granularity; - int pitch; - int rotation; /* 0, 90, 180 or 270 */ - int rotated_offset; - int rotated_size; - int rotated_pitch; - int virtualX, virtualY; - - unsigned int front_tiled; - unsigned int back_tiled; - unsigned int depth_tiled; - unsigned int rotated_tiled; - unsigned int rotated2_tiled; -} drmI830Sarea; - -/* Flags for perf_boxes - */ -#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */ -#define I830_BOX_FLIP 0x2 /* populated by kernel */ -#define I830_BOX_WAIT 0x4 /* populated by kernel & client */ -#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */ -#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */ - - -typedef struct { - int start; /* agp offset */ - int used; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830BatchBuffer; - -typedef struct { - char *buf; /* agp offset */ - int sz; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830CmdBuffer; - -typedef struct { - int *irq_seq; -} drmI830IrqEmit; - -typedef struct { - int irq_seq; -} drmI830IrqWait; - -typedef struct { - int param; - int *value; -} drmI830GetParam; - -#define I830_PARAM_IRQ_ACTIVE 1 -#define I830_PARAM_ALLOW_BATCHBUFFER 2 - -typedef struct { - int param; - int value; -} drmI830SetParam; - -#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1 -#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 -#define I830_SETPARAM_ALLOW_BATCHBUFFER 3 - - -/* A memory manager for regions of shared memory: - */ -#define I830_MEM_REGION_AGP 1 - -typedef struct { - int region; - int alignment; - int size; - int *region_offset; /* offset from start of fb or agp */ -} drmI830MemAlloc; - -typedef struct { - int region; - int region_offset; -} drmI830MemFree; - -typedef struct { - int region; - int size; - int start; -} drmI830MemInitHeap; - -typedef struct { - int region; -} drmI830MemDestroyHeap; - -#define MMIO_READ 0 -#define MMIO_WRITE 1 - -#define MMIO_REGS_IA_PRIMATIVES_COUNT 0 -#define MMIO_REGS_IA_VERTICES_COUNT 1 -#define MMIO_REGS_VS_INVOCATION_COUNT 2 -#define MMIO_REGS_GS_PRIMITIVES_COUNT 3 -#define MMIO_REGS_GS_INVOCATION_COUNT 4 -#define MMIO_REGS_CL_PRIMITIVES_COUNT 5 -#define MMIO_REGS_CL_INVOCATION_COUNT 6 -#define MMIO_REGS_PS_INVOCATION_COUNT 7 -#define MMIO_REGS_PS_DEPTH_COUNT 8 - -typedef struct { - unsigned int read_write:1; - unsigned int reg:31; - void __user *data; -} drmI830MMIO; - -#endif /* _I830_DRM_H_ */ diff --git a/src/mesa/drivers/dri/i965/server/i830_dri.h b/src/mesa/drivers/dri/i965/server/i830_dri.h deleted file mode 100644 index 68213f69f5d..00000000000 --- a/src/mesa/drivers/dri/i965/server/i830_dri.h +++ /dev/null @@ -1,62 +0,0 @@ - -#ifndef _I830_DRI_H -#define _I830_DRI_H - -#include "xf86drm.h" -#include "i830_common.h" - -#define I830_MAX_DRAWABLES 256 - -#define I830_MAJOR_VERSION 1 -#define I830_MINOR_VERSION 3 -#define I830_PATCHLEVEL 0 - -#define I830_REG_SIZE 0x80000 - -typedef struct _I830DRIRec { - drm_handle_t regs; - drmSize regsSize; - - drmSize unused1; /* backbufferSize */ - drm_handle_t unused2; /* backbuffer */ - - drmSize unused3; /* depthbufferSize */ - drm_handle_t unused4; /* depthbuffer */ - - drmSize unused5; /* rotatedSize /*/ - drm_handle_t unused6; /* rotatedbuffer */ - - drm_handle_t unused7; /* textures */ - int unused8; /* textureSize */ - - drm_handle_t unused9; /* agp_buffers */ - drmSize unused10; /* agp_buf_size */ - - int deviceID; - int width; - int height; - int mem; - int cpp; - int bitsPerPixel; - - int unused11[8]; /* was front/back/depth/rotated offset/pitch */ - - int unused12; /* logTextureGranularity */ - int unused13; /* textureOffset */ - - int irq; - int sarea_priv_offset; -} I830DRIRec, *I830DRIPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830ConfigPrivRec, *I830ConfigPrivPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830DRIContextRec, *I830DRIContextPtr; - - -#endif diff --git a/src/mesa/drivers/dri/i965/server/intel.h b/src/mesa/drivers/dri/i965/server/intel.h deleted file mode 100644 index d7858a20c8d..00000000000 --- a/src/mesa/drivers/dri/i965/server/intel.h +++ /dev/null @@ -1,328 +0,0 @@ -#ifndef _INTEL_H_ -#define _INTEL_H_ - -#include "xf86drm.h" /* drm_handle_t, etc */ - -/* Intel */ -#ifndef PCI_CHIP_I810 -#define PCI_CHIP_I810 0x7121 -#define PCI_CHIP_I810_DC100 0x7123 -#define PCI_CHIP_I810_E 0x7125 -#define PCI_CHIP_I815 0x1132 -#define PCI_CHIP_I810_BRIDGE 0x7120 -#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 -#define PCI_CHIP_I810_E_BRIDGE 0x7124 -#define PCI_CHIP_I815_BRIDGE 0x1130 -#endif - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 - -#ifndef PCI_CHIP_I855_GM -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I855_GM_BRIDGE 0x3580 -#endif - -#ifndef PCI_CHIP_I865_G -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I865_G_BRIDGE 0x2570 -#endif - -#ifndef PCI_CHIP_I915_G -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I915_GM -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I915_GM_BRIDGE 0x2590 -#endif - -#ifndef PCI_CHIP_E7221_G -#define PCI_CHIP_E7221_G 0x258A -/* Same as I915_G_BRIDGE */ -#define PCI_CHIP_E7221_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I945_G -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_G_BRIDGE 0x2770 -#endif - -#ifndef PCI_CHIP_I945_GM -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 -#endif - -#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \ - pI810->Chipset == PCI_CHIP_I810_DC100 || \ - pI810->Chipset == PCI_CHIP_I810_E) -#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815) -#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M) -#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G) -#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM) -#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME)) -#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME)) -#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G) - -#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G) -#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM) -#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G) -#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM) -#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810)) - -#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810)) - -#define I830_GMCH_CTRL 0x52 - - -#define I830_GMCH_GMS_MASK 0x70 -#define I830_GMCH_GMS_DISABLED 0x00 -#define I830_GMCH_GMS_LOCAL 0x10 -#define I830_GMCH_GMS_STOLEN_512 0x20 -#define I830_GMCH_GMS_STOLEN_1024 0x30 -#define I830_GMCH_GMS_STOLEN_8192 0x40 - -#define I855_GMCH_GMS_MASK (0x7 << 4) -#define I855_GMCH_GMS_DISABLED 0x00 -#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) -#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) -#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4) -#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4) -#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4) -#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4) -#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4) - -typedef unsigned char Bool; -#define TRUE 1 -#define FALSE 0 - -#define PIPE_NONE 0<<0 -#define PIPE_CRT 1<<0 -#define PIPE_TV 1<<1 -#define PIPE_DFP 1<<2 -#define PIPE_LFP 1<<3 -#define PIPE_CRT2 1<<4 -#define PIPE_TV2 1<<5 -#define PIPE_DFP2 1<<6 -#define PIPE_LFP2 1<<7 - -typedef struct _I830MemPool *I830MemPoolPtr; -typedef struct _I830MemRange *I830MemRangePtr; -typedef struct _I830MemRange { - long Start; - long End; - long Size; - unsigned long Physical; - unsigned long Offset; /* Offset of AGP-allocated portion */ - unsigned long Alignment; - drm_handle_t Key; - unsigned long Pitch; // add pitch - I830MemPoolPtr Pool; -} I830MemRange; - -typedef struct _I830MemPool { - I830MemRange Total; - I830MemRange Free; - I830MemRange Fixed; - I830MemRange Allocated; -} I830MemPool; - -typedef struct { - int tail_mask; - I830MemRange mem; - unsigned char *virtual_start; - int head; - int tail; - int space; -} I830RingBuffer; - -typedef struct _I830Rec { - unsigned char *MMIOBase; - unsigned char *FbBase; - int cpp; - - unsigned int bios_version; - - /* These are set in PreInit and never changed. */ - long FbMapSize; - long TotalVideoRam; - I830MemRange StolenMemory; /* pre-allocated memory */ - long BIOSMemorySize; /* min stolen pool size */ - int BIOSMemSizeLoc; - - /* These change according to what has been allocated. */ - long FreeMemory; - I830MemRange MemoryAperture; - I830MemPool StolenPool; - long allocatedMemory; - - /* Regions allocated either from the above pools, or from agpgart. */ - /* for single and dual head configurations */ - I830MemRange FrontBuffer; - I830MemRange FrontBuffer2; - I830MemRange Scratch; - I830MemRange Scratch2; - - I830RingBuffer *LpRing; - - I830MemRange BackBuffer; - I830MemRange DepthBuffer; - I830MemRange TexMem; - int TexGranularity; - I830MemRange ContextMem; - int drmMinor; - Bool have3DWindows; - - Bool NeedRingBufferLow; - Bool allowPageFlip; - Bool disableTiling; - - int Chipset; - unsigned long LinearAddr; - unsigned long MMIOAddr; - - drmSize registerSize; /**< \brief MMIO register map size */ - drm_handle_t registerHandle; /**< \brief MMIO register map handle */ - // IOADDRESS ioBase; - int irq; /**< \brief IRQ number */ - int GttBound; - - drm_handle_t ring_map; - unsigned int Fence[8]; - -} I830Rec; - -/* - * 12288 is set as the maximum, chosen because it is enough for - * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare. - */ -#define I830_MAXIMUM_VBIOS_MEM 12288 -#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024) -#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024) - -/* Flags for memory allocation function */ -#define FROM_ANYWHERE 0x00000000 -#define FROM_POOL_ONLY 0x00000001 -#define FROM_NEW_ONLY 0x00000002 -#define FROM_MASK 0x0000000f - -#define ALLOCATE_AT_TOP 0x00000010 -#define ALLOCATE_AT_BOTTOM 0x00000020 -#define FORCE_GAPS 0x00000040 - -#define NEED_PHYSICAL_ADDR 0x00000100 -#define ALIGN_BOTH_ENDS 0x00000200 -#define FORCE_LOW 0x00000400 - -#define ALLOC_NO_TILING 0x00001000 -#define ALLOC_INITIAL 0x00002000 - -#define ALLOCATE_DRY_RUN 0x80000000 - -/* Chipset registers for VIDEO BIOS memory RW access */ -#define _855_DRAM_RW_CONTROL 0x58 -#define _845_DRAM_RW_CONTROL 0x90 -#define DRAM_WRITE 0x33330000 - -#define KB(x) ((x) * 1024) -#define MB(x) ((x) * KB(1024)) - -#define GTT_PAGE_SIZE KB(4) -#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y)) -#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y)) -#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE) -#define ROUND_TO_MB(x) ROUND_TO((x), MB(1)) -#define PRIMARY_RINGBUFFER_SIZE KB(128) - - -/* Ring buffer registers, p277, overview p19 - */ -#define LP_RING 0x2030 -#define HP_RING 0x2040 - -#define RING_TAIL 0x00 -#define TAIL_ADDR 0x000FFFF8 -#define I830_TAIL_MASK 0x001FFFF8 - -#define RING_HEAD 0x04 -#define HEAD_WRAP_COUNT 0xFFE00000 -#define HEAD_WRAP_ONE 0x00200000 -#define HEAD_ADDR 0x001FFFFC -#define I830_HEAD_MASK 0x001FFFFC - -#define RING_START 0x08 -#define START_ADDR 0x03FFFFF8 -#define I830_RING_START_MASK 0xFFFFF000 - -#define RING_LEN 0x0C -#define RING_NR_PAGES 0x001FF000 -#define I830_RING_NR_PAGES 0x001FF000 -#define RING_REPORT_MASK 0x00000006 -#define RING_REPORT_64K 0x00000002 -#define RING_REPORT_128K 0x00000004 -#define RING_NO_REPORT 0x00000000 -#define RING_VALID_MASK 0x00000001 -#define RING_VALID 0x00000001 -#define RING_INVALID 0x00000000 - - -/* Fence/Tiling ranges [0..7] - */ -#define FENCE 0x2000 -#define FENCE_NR 8 - -#define I915G_FENCE_START_MASK 0x0ff00000 - -#define I830_FENCE_START_MASK 0x07f80000 - -#define FENCE_START_MASK 0x03F80000 -#define FENCE_X_MAJOR 0x00000000 -#define FENCE_Y_MAJOR 0x00001000 -#define FENCE_SIZE_MASK 0x00000700 -#define FENCE_SIZE_512K 0x00000000 -#define FENCE_SIZE_1M 0x00000100 -#define FENCE_SIZE_2M 0x00000200 -#define FENCE_SIZE_4M 0x00000300 -#define FENCE_SIZE_8M 0x00000400 -#define FENCE_SIZE_16M 0x00000500 -#define FENCE_SIZE_32M 0x00000600 -#define FENCE_SIZE_64M 0x00000700 -#define I915G_FENCE_SIZE_1M 0x00000000 -#define I915G_FENCE_SIZE_2M 0x00000100 -#define I915G_FENCE_SIZE_4M 0x00000200 -#define I915G_FENCE_SIZE_8M 0x00000300 -#define I915G_FENCE_SIZE_16M 0x00000400 -#define I915G_FENCE_SIZE_32M 0x00000500 -#define I915G_FENCE_SIZE_64M 0x00000600 -#define I915G_FENCE_SIZE_128M 0x00000700 -#define FENCE_PITCH_1 0x00000000 -#define FENCE_PITCH_2 0x00000010 -#define FENCE_PITCH_4 0x00000020 -#define FENCE_PITCH_8 0x00000030 -#define FENCE_PITCH_16 0x00000040 -#define FENCE_PITCH_32 0x00000050 -#define FENCE_PITCH_64 0x00000060 -#define FENCE_VALID 0x00000001 - -#include <mmio.h> - -# define MMIO_IN8(base, offset) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) -# define MMIO_IN32(base, offset) \ - read_MMIO_LE32(base, offset) -# define MMIO_OUT8(base, offset, val) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val) -# define MMIO_OUT32(base, offset, val) \ - *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val) - - - /* Memory mapped register access macros */ -#define INREG8(addr) MMIO_IN8(MMIO, addr) -#define INREG(addr) MMIO_IN32(MMIO, addr) -#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val) -#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val) - -#define DSPABASE 0x70184 - -#endif diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c index 169fdbece30..effdd26448a 100644..120000 --- a/src/mesa/drivers/dri/i965/server/intel_dri.c +++ b/src/mesa/drivers/dri/i965/server/intel_dri.c @@ -1,1282 +1 @@ -/** - * \file server/intel_dri.c - * \brief File to perform the device-specific initialization tasks typically - * done in the X server. - * - * Here they are converted to run in the client (or perhaps a standalone - * process), and to work with the frame buffer device rather than the X - * server infrastructure. - * - * Copyright (C) 2006 Dave Airlie ([email protected]) - - Permission is hereby granted, free of charge, to any person obtaining a - copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sub license, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial portions - of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR - ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <unistd.h> - -#include "driver.h" -#include "drm.h" - -#include "intel.h" -#include "i830_dri.h" - -#include "memops.h" -#include "pciaccess.h" - -static size_t drm_page_size; -static int nextTile = 0; -#define xf86DrvMsg(...) do {} while(0) - -static const int pitches[] = { - 128 * 8, - 128 * 16, - 128 * 32, - 128 * 64, - 0 -}; - -static Bool I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea); - -static unsigned long -GetBestTileAlignment(unsigned long size) -{ - unsigned long i; - - for (i = KB(512); i < size; i <<= 1) - ; - - if (i > MB(64)) - i = MB(64); - - return i; -} - -static void SetFenceRegs(const DRIDriverContext *ctx, I830Rec *pI830) -{ - int i; - unsigned char *MMIO = ctx->MMIOAddress; - - for (i = 0; i < 8; i++) { - OUTREG(FENCE + i * 4, pI830->Fence[i]); - // if (I810_DEBUG & DEBUG_VERBOSE_VGA) - fprintf(stderr,"Fence Register : %x\n", pI830->Fence[i]); - } -} - -/* Tiled memory is good... really, really good... - * - * Need to make it less likely that we miss out on this - probably - * need to move the frontbuffer away from the 'guarenteed' alignment - * of the first memory segment, or perhaps allocate a discontigous - * framebuffer to get more alignment 'sweet spots'. - */ -static void -SetFence(const DRIDriverContext *ctx, I830Rec *pI830, - int nr, unsigned int start, unsigned int pitch, - unsigned int size) -{ - unsigned int val; - unsigned int fence_mask = 0; - unsigned int fence_pitch; - - if (nr < 0 || nr > 7) { - fprintf(stderr, - "SetFence: fence %d out of range\n",nr); - return; - } - - pI830->Fence[nr] = 0; - - if (IS_I9XX(pI830)) - fence_mask = ~I915G_FENCE_START_MASK; - else - fence_mask = ~I830_FENCE_START_MASK; - - if (start & fence_mask) { - fprintf(stderr, - "SetFence: %d: start (0x%08x) is not %s aligned\n", - nr, start, (IS_I9XX(pI830)) ? "1MB" : "512k"); - return; - } - - if (start % size) { - fprintf(stderr, - "SetFence: %d: start (0x%08x) is not size (%dk) aligned\n", - nr, start, size / 1024); - return; - } - - if (pitch & 127) { - fprintf(stderr, - "SetFence: %d: pitch (%d) not a multiple of 128 bytes\n", - nr, pitch); - return; - } - - val = (start | FENCE_X_MAJOR | FENCE_VALID); - - if (IS_I9XX(pI830)) { - switch (size) { - case MB(1): - val |= I915G_FENCE_SIZE_1M; - break; - case MB(2): - val |= I915G_FENCE_SIZE_2M; - break; - case MB(4): - val |= I915G_FENCE_SIZE_4M; - break; - case MB(8): - val |= I915G_FENCE_SIZE_8M; - break; - case MB(16): - val |= I915G_FENCE_SIZE_16M; - break; - case MB(32): - val |= I915G_FENCE_SIZE_32M; - break; - case MB(64): - val |= I915G_FENCE_SIZE_64M; - break; - default: - fprintf(stderr, - "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024); - return; - } - } else { - switch (size) { - case KB(512): - val |= FENCE_SIZE_512K; - break; - case MB(1): - val |= FENCE_SIZE_1M; - break; - case MB(2): - val |= FENCE_SIZE_2M; - break; - case MB(4): - val |= FENCE_SIZE_4M; - break; - case MB(8): - val |= FENCE_SIZE_8M; - break; - case MB(16): - val |= FENCE_SIZE_16M; - break; - case MB(32): - val |= FENCE_SIZE_32M; - break; - case MB(64): - val |= FENCE_SIZE_64M; - break; - default: - fprintf(stderr, - "SetFence: %d: illegal size (%d kByte)\n", nr, size / 1024); - return; - } - } - - if (IS_I9XX(pI830)) - fence_pitch = pitch / 512; - else - fence_pitch = pitch / 128; - - switch (fence_pitch) { - case 1: - val |= FENCE_PITCH_1; - break; - case 2: - val |= FENCE_PITCH_2; - break; - case 4: - val |= FENCE_PITCH_4; - break; - case 8: - val |= FENCE_PITCH_8; - break; - case 16: - val |= FENCE_PITCH_16; - break; - case 32: - val |= FENCE_PITCH_32; - break; - case 64: - val |= FENCE_PITCH_64; - break; - default: - fprintf(stderr, - "SetFence: %d: illegal pitch (%d)\n", nr, pitch); - return; - } - - pI830->Fence[nr] = val; -} - -static Bool -MakeTiles(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *pMem) -{ - int pitch, ntiles, i; - - pitch = pMem->Pitch * ctx->cpp; - /* - * Simply try to break the region up into at most four pieces of size - * equal to the alignment. - */ - ntiles = ROUND_TO(pMem->Size, pMem->Alignment) / pMem->Alignment; - if (ntiles >= 4) { - return FALSE; - } - - for (i = 0; i < ntiles; i++, nextTile++) { - SetFence(ctx, pI830, nextTile, pMem->Start + i * pMem->Alignment, - pitch, pMem->Alignment); - } - return TRUE; -} - -static void I830SetupMemoryTiling(const DRIDriverContext *ctx, I830Rec *pI830) -{ - int i; - - /* Clear out */ - for (i = 0; i < 8; i++) - pI830->Fence[i] = 0; - - nextTile = 0; - - if (pI830->BackBuffer.Alignment >= KB(512)) { - if (MakeTiles(ctx, pI830, &(pI830->BackBuffer))) { - fprintf(stderr, - "Activating tiled memory for the back buffer.\n"); - } else { - fprintf(stderr, - "MakeTiles failed for the back buffer.\n"); - pI830->allowPageFlip = FALSE; - } - } - - if (pI830->DepthBuffer.Alignment >= KB(512)) { - if (MakeTiles(ctx, pI830, &(pI830->DepthBuffer))) { - fprintf(stderr, - "Activating tiled memory for the depth buffer.\n"); - } else { - fprintf(stderr, - "MakeTiles failed for the depth buffer.\n"); - } - } - - return; -} - -static int I830DetectMemory(const DRIDriverContext *ctx, I830Rec *pI830) -{ - struct pci_device host_bridge; - uint32_t gmch_ctrl; - int memsize = 0; - int range; - - memset(&host_bridge, 0, sizeof(host_bridge)); - - pci_device_cfg_read_u32(&host_bridge, &gmch_ctrl, I830_GMCH_CTRL); - - /* We need to reduce the stolen size, by the GTT and the popup. - * The GTT varying according the the FbMapSize and the popup is 4KB */ - range = (ctx->shared.fbSize / (1024*1024)) + 4; - - if (IS_I85X(pI830) || IS_I865G(pI830) || IS_I9XX(pI830)) { - switch (gmch_ctrl & I830_GMCH_GMS_MASK) { - case I855_GMCH_GMS_STOLEN_1M: - memsize = MB(1) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_4M: - memsize = MB(4) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_8M: - memsize = MB(8) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_16M: - memsize = MB(16) - KB(range); - break; - case I855_GMCH_GMS_STOLEN_32M: - memsize = MB(32) - KB(range); - break; - case I915G_GMCH_GMS_STOLEN_48M: - if (IS_I9XX(pI830)) - memsize = MB(48) - KB(range); - break; - case I915G_GMCH_GMS_STOLEN_64M: - if (IS_I9XX(pI830)) - memsize = MB(64) - KB(range); - break; - } - } else { - switch (gmch_ctrl & I830_GMCH_GMS_MASK) { - case I830_GMCH_GMS_STOLEN_512: - memsize = KB(512) - KB(range); - break; - case I830_GMCH_GMS_STOLEN_1024: - memsize = MB(1) - KB(range); - break; - case I830_GMCH_GMS_STOLEN_8192: - memsize = MB(8) - KB(range); - break; - case I830_GMCH_GMS_LOCAL: - memsize = 0; - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, - "Local memory found, but won't be used.\n"); - break; - } - } - if (memsize > 0) { - fprintf(stderr, - "detected %d kB stolen memory.\n", memsize / 1024); - } else { - fprintf(stderr, - "no video memory detected.\n"); - } - return memsize; -} - -static int AgpInit(const DRIDriverContext *ctx, I830Rec *info) -{ - unsigned long mode = 0x4; - - if (drmAgpAcquire(ctx->drmFD) < 0) { - fprintf(stderr, "[gart] AGP not available\n"); - return 0; - } - - if (drmAgpEnable(ctx->drmFD, mode) < 0) { - fprintf(stderr, "[gart] AGP not enabled\n"); - drmAgpRelease(ctx->drmFD); - return 0; - } - else - fprintf(stderr, "[gart] AGP enabled at %dx\n", ctx->agpmode); - - return 1; -} - -/* - * Allocate memory from the given pool. Grow the pool if needed and if - * possible. - */ -static unsigned long -AllocFromPool(const DRIDriverContext *ctx, I830Rec *pI830, - I830MemRange *result, I830MemPool *pool, - long size, unsigned long alignment, int flags) -{ - long needed, start, end; - - if (!result || !pool || !size) - return 0; - - /* Calculate how much space is needed. */ - if (alignment <= GTT_PAGE_SIZE) - needed = size; - else { - start = ROUND_TO(pool->Free.Start, alignment); - end = ROUND_TO(start + size, alignment); - needed = end - pool->Free.Start; - } - if (needed > pool->Free.Size) { - return 0; - } - - result->Start = ROUND_TO(pool->Free.Start, alignment); - pool->Free.Start += needed; - result->End = pool->Free.Start; - - pool->Free.Size = pool->Free.End - pool->Free.Start; - result->Size = result->End - result->Start; - result->Pool = pool; - result->Alignment = alignment; - return needed; -} - -static unsigned long AllocFromAGP(const DRIDriverContext *ctx, I830Rec *pI830, long size, unsigned long alignment, I830MemRange *result) -{ - unsigned long start, end; - unsigned long newApStart, newApEnd; - int ret; - if (!result || !size) - return 0; - - if (!alignment) - alignment = 4; - - start = ROUND_TO(pI830->MemoryAperture.Start, alignment); - end = ROUND_TO(start + size, alignment); - newApStart = end; - newApEnd = pI830->MemoryAperture.End; - - ret=drmAgpAlloc(ctx->drmFD, size, 0, &(result->Physical), (drm_handle_t *)&(result->Key)); - - if (ret) - { - fprintf(stderr,"drmAgpAlloc failed %d\n", ret); - return 0; - } - pI830->allocatedMemory += size; - pI830->MemoryAperture.Start = newApStart; - pI830->MemoryAperture.End = newApEnd; - pI830->MemoryAperture.Size = newApEnd - newApStart; - // pI830->FreeMemory -= size; - result->Start = start; - result->End = start + size; - result->Size = size; - result->Offset = start; - result->Alignment = alignment; - result->Pool = NULL; - - return size; -} - -unsigned long -I830AllocVidMem(const DRIDriverContext *ctx, I830Rec *pI830, I830MemRange *result, I830MemPool *pool, long size, unsigned long alignment, int flags) -{ - int ret; - - if (!result) - return 0; - - /* Make sure these are initialised. */ - result->Size = 0; - result->Key = -1; - - if (!size) { - return 0; - } - - if (pool->Free.Size < size) - return AllocFromAGP(ctx, pI830, size, alignment, result); - else - { - ret = AllocFromPool(ctx, pI830, result, pool, size, alignment, flags); - - if (ret==0) - return AllocFromAGP(ctx, pI830, size, alignment, result); - return ret; - } -} - -static Bool BindAgpRange(const DRIDriverContext *ctx, I830MemRange *mem) -{ - if (!mem) - return FALSE; - - if (mem->Key == -1) - return TRUE; - - return !drmAgpBind(ctx->drmFD, mem->Key, mem->Offset); -} - -/* simple memory allocation routines needed */ -/* put ring buffer in low memory */ -/* need to allocate front, back, depth buffers aligned correctly, - allocate ring buffer, -*/ - -/* */ -static Bool -I830AllocateMemory(const DRIDriverContext *ctx, I830Rec *pI830) -{ - unsigned long size, ret; - unsigned long lines, lineSize, align; - - /* allocate ring buffer */ - memset(pI830->LpRing, 0, sizeof(I830RingBuffer)); - pI830->LpRing->mem.Key = -1; - - size = PRIMARY_RINGBUFFER_SIZE; - - ret = I830AllocVidMem(ctx, pI830, &pI830->LpRing->mem, &pI830->StolenPool, size, 0x1000, 0); - - if (ret != size) - { - fprintf(stderr,"unable to allocate ring buffer %ld\n", ret); - return FALSE; - } - - pI830->LpRing->tail_mask = pI830->LpRing->mem.Size - 1; - - - /* allocate front buffer */ - memset(&(pI830->FrontBuffer), 0, sizeof(pI830->FrontBuffer)); - pI830->FrontBuffer.Key = -1; - pI830->FrontBuffer.Pitch = ctx->shared.virtualWidth; - - align = KB(512); - - lineSize = ctx->shared.virtualWidth * ctx->cpp; - lines = (ctx->shared.virtualHeight + 15) / 16 * 16; - size = lineSize * lines; - size = ROUND_TO_PAGE(size); - - align = GetBestTileAlignment(size); - - ret = I830AllocVidMem(ctx, pI830, &pI830->FrontBuffer, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate front buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->BackBuffer), 0, sizeof(pI830->BackBuffer)); - pI830->BackBuffer.Key = -1; - pI830->BackBuffer.Pitch = ctx->shared.virtualWidth; - - ret = I830AllocVidMem(ctx, pI830, &pI830->BackBuffer, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate back buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->DepthBuffer), 0, sizeof(pI830->DepthBuffer)); - pI830->DepthBuffer.Key = -1; - pI830->DepthBuffer.Pitch = ctx->shared.virtualWidth; - - ret = I830AllocVidMem(ctx, pI830, &pI830->DepthBuffer, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate depth buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->ContextMem), 0, sizeof(pI830->ContextMem)); - pI830->ContextMem.Key = -1; - size = KB(32); - - ret = I830AllocVidMem(ctx, pI830, &pI830->ContextMem, &pI830->StolenPool, size, align, 0); - if (ret < size) - { - fprintf(stderr,"unable to allocate context buffer %ld\n", ret); - return FALSE; - } - - memset(&(pI830->TexMem), 0, sizeof(pI830->TexMem)); - pI830->TexMem.Key = -1; - - size = 32768 * 1024; - ret = AllocFromAGP(ctx, pI830, size, align, &pI830->TexMem); - if (ret < size) - { - fprintf(stderr,"unable to allocate texture memory %ld\n", ret); - return FALSE; - } - - return TRUE; -} - -static Bool -I830BindMemory(const DRIDriverContext *ctx, I830Rec *pI830) -{ - if (!BindAgpRange(ctx, &pI830->LpRing->mem)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->FrontBuffer)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->BackBuffer)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->DepthBuffer)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->ContextMem)) - return FALSE; - if (!BindAgpRange(ctx, &pI830->TexMem)) - return FALSE; - - return TRUE; -} - -static Bool -I830CleanupDma(const DRIDriverContext *ctx) -{ - drmI830Init info; - - memset(&info, 0, sizeof(drmI830Init)); - info.func = I830_CLEANUP_DMA; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT, - &info, sizeof(drmI830Init))) { - fprintf(stderr, "I830 Dma Cleanup Failed\n"); - return FALSE; - } - - return TRUE; -} - -static Bool -I830InitDma(const DRIDriverContext *ctx, I830Rec *pI830) -{ - I830RingBuffer *ring = pI830->LpRing; - drmI830Init info; - - memset(&info, 0, sizeof(drmI830Init)); - info.func = I830_INIT_DMA; - - info.ring_start = ring->mem.Start + pI830->LinearAddr; - info.ring_end = ring->mem.End + pI830->LinearAddr; - info.ring_size = ring->mem.Size; - - info.mmio_offset = (unsigned int)ctx->MMIOStart; - - info.sarea_priv_offset = sizeof(drm_sarea_t); - - info.front_offset = pI830->FrontBuffer.Start; - info.back_offset = pI830->BackBuffer.Start; - info.depth_offset = pI830->DepthBuffer.Start; - info.w = ctx->shared.virtualWidth; - info.h = ctx->shared.virtualHeight; - info.pitch = ctx->shared.virtualWidth; - info.back_pitch = pI830->BackBuffer.Pitch; - info.depth_pitch = pI830->DepthBuffer.Pitch; - info.cpp = ctx->cpp; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT, - &info, sizeof(drmI830Init))) { - fprintf(stderr, - "I830 Dma Initialization Failed\n"); - return FALSE; - } - - return TRUE; -} - -static int I830CheckDRMVersion( const DRIDriverContext *ctx, - I830Rec *pI830 ) -{ - drmVersionPtr version; - - version = drmGetVersion(ctx->drmFD); - - if (version) { - int req_minor, req_patch; - - req_minor = 4; - req_patch = 0; - - if (version->version_major != 1 || - version->version_minor < req_minor || - (version->version_minor == req_minor && - version->version_patchlevel < req_patch)) { - /* Incompatible drm version */ - fprintf(stderr, - "[dri] I830DRIScreenInit failed because of a version " - "mismatch.\n" - "[dri] i915.o kernel module version is %d.%d.%d " - "but version 1.%d.%d or newer is needed.\n" - "[dri] Disabling DRI.\n", - version->version_major, - version->version_minor, - version->version_patchlevel, - req_minor, - req_patch); - drmFreeVersion(version); - return 0; - } - - pI830->drmMinor = version->version_minor; - drmFreeVersion(version); - } - return 1; -} - -static void -I830SetRingRegs(const DRIDriverContext *ctx, I830Rec *pI830) -{ - unsigned int itemp; - unsigned char *MMIO = ctx->MMIOAddress; - - OUTREG(LP_RING + RING_LEN, 0); - OUTREG(LP_RING + RING_TAIL, 0); - OUTREG(LP_RING + RING_HEAD, 0); - - if ((long)(pI830->LpRing->mem.Start & I830_RING_START_MASK) != - pI830->LpRing->mem.Start) { - fprintf(stderr, - "I830SetRingRegs: Ring buffer start (%lx) violates its " - "mask (%x)\n", pI830->LpRing->mem.Start, I830_RING_START_MASK); - } - /* Don't care about the old value. Reserved bits must be zero anyway. */ - itemp = pI830->LpRing->mem.Start & I830_RING_START_MASK; - OUTREG(LP_RING + RING_START, itemp); - - if (((pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES) != - pI830->LpRing->mem.Size - 4096) { - fprintf(stderr, - "I830SetRingRegs: Ring buffer size - 4096 (%lx) violates its " - "mask (%x)\n", pI830->LpRing->mem.Size - 4096, - I830_RING_NR_PAGES); - } - /* Don't care about the old value. Reserved bits must be zero anyway. */ - itemp = (pI830->LpRing->mem.Size - 4096) & I830_RING_NR_PAGES; - itemp |= (RING_NO_REPORT | RING_VALID); - OUTREG(LP_RING + RING_LEN, itemp); - - pI830->LpRing->head = INREG(LP_RING + RING_HEAD) & I830_HEAD_MASK; - pI830->LpRing->tail = INREG(LP_RING + RING_TAIL); - pI830->LpRing->space = pI830->LpRing->head - (pI830->LpRing->tail + 8); - if (pI830->LpRing->space < 0) - pI830->LpRing->space += pI830->LpRing->mem.Size; - - SetFenceRegs(ctx, pI830); - - /* RESET THE DISPLAY PIPE TO POINT TO THE FRONTBUFFER - hacky - hacky hacky */ - OUTREG(DSPABASE, pI830->FrontBuffer.Start + pI830->LinearAddr); - -} - -static Bool -I830SetParam(const DRIDriverContext *ctx, int param, int value) -{ - drmI830SetParam sp; - - memset(&sp, 0, sizeof(sp)); - sp.param = param; - sp.value = value; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_SETPARAM, &sp, sizeof(sp))) { - fprintf(stderr, "I830 SetParam Failed\n"); - return FALSE; - } - - return TRUE; -} - -static Bool -I830DRIMapScreenRegions(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - fprintf(stderr, - "[drm] Mapping front buffer\n"); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)(sarea->front_offset + pI830->LinearAddr), - sarea->front_size, - DRM_FRAME_BUFFER, /*DRM_AGP,*/ - 0, - &sarea->front_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(front_handle) failed. Disabling DRI\n"); - return FALSE; - } - ctx->shared.hFrameBuffer = sarea->front_handle; - ctx->shared.fbSize = sarea->front_size; - fprintf(stderr, "[drm] Front Buffer = 0x%08x\n", - sarea->front_handle); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)(sarea->back_offset), - sarea->back_size, DRM_AGP, 0, - &sarea->back_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(back_handle) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] Back Buffer = 0x%08x\n", - sarea->back_handle); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)sarea->depth_offset, - sarea->depth_size, DRM_AGP, 0, - &sarea->depth_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(depth_handle) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] Depth Buffer = 0x%08x\n", - sarea->depth_handle); - - if (drmAddMap(ctx->drmFD, - (drm_handle_t)sarea->tex_offset, - sarea->tex_size, DRM_AGP, 0, - &sarea->tex_handle) < 0) { - fprintf(stderr, - "[drm] drmAddMap(tex_handle) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] textures = 0x%08x\n", - sarea->tex_handle); - - return TRUE; -} - - -static void -I830DRIUnmapScreenRegions(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ -#if 1 - if (sarea->front_handle) { - drmRmMap(ctx->drmFD, sarea->front_handle); - sarea->front_handle = 0; - } -#endif - if (sarea->back_handle) { - drmRmMap(ctx->drmFD, sarea->back_handle); - sarea->back_handle = 0; - } - if (sarea->depth_handle) { - drmRmMap(ctx->drmFD, sarea->depth_handle); - sarea->depth_handle = 0; - } - if (sarea->tex_handle) { - drmRmMap(ctx->drmFD, sarea->tex_handle); - sarea->tex_handle = 0; - } -} - -static void -I830InitTextureHeap(const DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - /* Start up the simple memory manager for agp space */ - drmI830MemInitHeap drmHeap; - drmHeap.region = I830_MEM_REGION_AGP; - drmHeap.start = 0; - drmHeap.size = sarea->tex_size; - - if (drmCommandWrite(ctx->drmFD, DRM_I830_INIT_HEAP, - &drmHeap, sizeof(drmHeap))) { - fprintf(stderr, - "[drm] Failed to initialized agp heap manager\n"); - } else { - fprintf(stderr, - "[drm] Initialized kernel agp heap manager, %d\n", - sarea->tex_size); - - I830SetParam(ctx, I830_SETPARAM_TEX_LRU_LOG_GRANULARITY, - sarea->log_tex_granularity); - } -} - -static Bool -I830DRIDoMappings(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - if (drmAddMap(ctx->drmFD, - (drm_handle_t)pI830->LpRing->mem.Start, - pI830->LpRing->mem.Size, DRM_AGP, 0, - &pI830->ring_map) < 0) { - fprintf(stderr, - "[drm] drmAddMap(ring_map) failed. Disabling DRI\n"); - return FALSE; - } - fprintf(stderr, "[drm] ring buffer = 0x%08x\n", - pI830->ring_map); - - if (I830InitDma(ctx, pI830) == FALSE) { - return FALSE; - } - - /* init to zero to be safe */ - - I830DRIMapScreenRegions(ctx, pI830, sarea); - I830InitTextureHeap(ctx, pI830, sarea); - - if (ctx->pciDevice != PCI_CHIP_845_G && - ctx->pciDevice != PCI_CHIP_I830_M) { - I830SetParam(ctx, I830_SETPARAM_USE_MI_BATCHBUFFER_START, 1 ); - } - - /* Okay now initialize the dma engine */ - { - pI830->irq = drmGetInterruptFromBusID(ctx->drmFD, - ctx->pciBus, - ctx->pciDevice, - ctx->pciFunc); - - if (drmCtlInstHandler(ctx->drmFD, pI830->irq)) { - fprintf(stderr, - "[drm] failure adding irq handler\n"); - pI830->irq = 0; - return FALSE; - } - else - fprintf(stderr, - "[drm] dma control initialized, using IRQ %d\n", - pI830->irq); - } - - fprintf(stderr, "[dri] visual configs initialized\n"); - - return TRUE; -} - -static Bool -I830ClearScreen(DRIDriverContext *ctx, I830Rec *pI830, drmI830Sarea *sarea) -{ - /* need to drmMap front and back buffers and zero them */ - drmAddress map_addr; - int ret; - - ret = drmMap(ctx->drmFD, - sarea->front_handle, - sarea->front_size, - &map_addr); - - if (ret) - { - fprintf(stderr, "Unable to map front buffer\n"); - return FALSE; - } - - drimemsetio((char *)map_addr, - 0, - sarea->front_size); - drmUnmap(map_addr, sarea->front_size); - - - ret = drmMap(ctx->drmFD, - sarea->back_handle, - sarea->back_size, - &map_addr); - - if (ret) - { - fprintf(stderr, "Unable to map back buffer\n"); - return FALSE; - } - - drimemsetio((char *)map_addr, - 0, - sarea->back_size); - drmUnmap(map_addr, sarea->back_size); - - return TRUE; -} - -static Bool -I830ScreenInit(DRIDriverContext *ctx, I830Rec *pI830) - -{ - I830DRIPtr pI830DRI; - drmI830Sarea *pSAREAPriv; - int err; - - drm_page_size = getpagesize(); - - pI830->registerSize = ctx->MMIOSize; - /* This is a hack for now. We have to have more than a 4k page here - * because of the size of the state. However, the state should be - * in a per-context mapping. This will be added in the Mesa 3.5 port - * of the I830 driver. - */ - ctx->shared.SAREASize = SAREA_MAX; - - /* Note that drmOpen will try to load the kernel module, if needed. */ - ctx->drmFD = drmOpen("i915", NULL ); - if (ctx->drmFD < 0) { - fprintf(stderr, "[drm] drmOpen failed\n"); - return 0; - } - - if ((err = drmSetBusid(ctx->drmFD, ctx->pciBusID)) < 0) { - fprintf(stderr, "[drm] drmSetBusid failed (%d, %s), %s\n", - ctx->drmFD, ctx->pciBusID, strerror(-err)); - return 0; - } - - if (drmAddMap( ctx->drmFD, - 0, - ctx->shared.SAREASize, - DRM_SHM, - DRM_CONTAINS_LOCK, - &ctx->shared.hSAREA) < 0) - { - fprintf(stderr, "[drm] drmAddMap failed\n"); - return 0; - } - - fprintf(stderr, "[drm] added %d byte SAREA at 0x%08x\n", - ctx->shared.SAREASize, ctx->shared.hSAREA); - - if (drmMap( ctx->drmFD, - ctx->shared.hSAREA, - ctx->shared.SAREASize, - (drmAddressPtr)(&ctx->pSAREA)) < 0) - { - fprintf(stderr, "[drm] drmMap failed\n"); - return 0; - - } - - memset(ctx->pSAREA, 0, ctx->shared.SAREASize); - fprintf(stderr, "[drm] mapped SAREA 0x%08x to %p, size %d\n", - ctx->shared.hSAREA, ctx->pSAREA, ctx->shared.SAREASize); - - - if (drmAddMap(ctx->drmFD, - ctx->MMIOStart, - ctx->MMIOSize, - DRM_REGISTERS, - DRM_READ_ONLY, - &pI830->registerHandle) < 0) { - fprintf(stderr, "[drm] drmAddMap mmio failed\n"); - return 0; - } - fprintf(stderr, - "[drm] register handle = 0x%08x\n", pI830->registerHandle); - - - if (!I830CheckDRMVersion(ctx, pI830)) { - return FALSE; - } - - /* Create a 'server' context so we can grab the lock for - * initialization ioctls. - */ - if ((err = drmCreateContext(ctx->drmFD, &ctx->serverContext)) != 0) { - fprintf(stderr, "%s: drmCreateContext failed %d\n", __FUNCTION__, err); - return 0; - } - - DRM_LOCK(ctx->drmFD, ctx->pSAREA, ctx->serverContext, 0); - - /* Initialize the SAREA private data structure */ - pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + - sizeof(drm_sarea_t)); - memset(pSAREAPriv, 0, sizeof(*pSAREAPriv)); - - pI830->StolenMemory.Size = I830DetectMemory(ctx, pI830); - pI830->StolenMemory.Start = 0; - pI830->StolenMemory.End = pI830->StolenMemory.Size; - - pI830->MemoryAperture.Start = pI830->StolenMemory.End; - pI830->MemoryAperture.End = KB(40000); - pI830->MemoryAperture.Size = pI830->MemoryAperture.End - pI830->MemoryAperture.Start; - - pI830->StolenPool.Fixed = pI830->StolenMemory; - pI830->StolenPool.Total = pI830->StolenMemory; - pI830->StolenPool.Free = pI830->StolenPool.Total; - pI830->FreeMemory = pI830->StolenPool.Total.Size; - - if (!AgpInit(ctx, pI830)) - return FALSE; - - if (I830AllocateMemory(ctx, pI830) == FALSE) - { - return FALSE; - } - - if (I830BindMemory(ctx, pI830) == FALSE) - { - return FALSE; - } - - pSAREAPriv->front_offset = pI830->FrontBuffer.Start; - pSAREAPriv->front_size = pI830->FrontBuffer.Size; - pSAREAPriv->width = ctx->shared.virtualWidth; - pSAREAPriv->height = ctx->shared.virtualHeight; - pSAREAPriv->pitch = ctx->shared.virtualWidth; - pSAREAPriv->virtualX = ctx->shared.virtualWidth; - pSAREAPriv->virtualY = ctx->shared.virtualHeight; - pSAREAPriv->back_offset = pI830->BackBuffer.Start; - pSAREAPriv->back_size = pI830->BackBuffer.Size; - pSAREAPriv->depth_offset = pI830->DepthBuffer.Start; - pSAREAPriv->depth_size = pI830->DepthBuffer.Size; - pSAREAPriv->tex_offset = pI830->TexMem.Start; - pSAREAPriv->tex_size = pI830->TexMem.Size; - pSAREAPriv->log_tex_granularity = pI830->TexGranularity; - - ctx->driverClientMsg = malloc(sizeof(I830DRIRec)); - ctx->driverClientMsgSize = sizeof(I830DRIRec); - pI830DRI = (I830DRIPtr)ctx->driverClientMsg; - pI830DRI->deviceID = pI830->Chipset; - pI830DRI->regsSize = I830_REG_SIZE; - pI830DRI->width = ctx->shared.virtualWidth; - pI830DRI->height = ctx->shared.virtualHeight; - pI830DRI->mem = ctx->shared.fbSize; - pI830DRI->cpp = ctx->cpp; - pI830DRI->backOffset = pI830->BackBuffer.Start; - pI830DRI->backPitch = pI830->BackBuffer.Pitch; - - pI830DRI->depthOffset = pI830->DepthBuffer.Start; - pI830DRI->depthPitch = pI830->DepthBuffer.Pitch; - - pI830DRI->fbOffset = pI830->FrontBuffer.Start; - pI830DRI->fbStride = pI830->FrontBuffer.Pitch; - - pI830DRI->bitsPerPixel = ctx->bpp; - pI830DRI->sarea_priv_offset = sizeof(drm_sarea_t); - - err = I830DRIDoMappings(ctx, pI830, pSAREAPriv); - if (err == FALSE) - return FALSE; - - I830SetupMemoryTiling(ctx, pI830); - - /* Quick hack to clear the front & back buffers. Could also use - * the clear ioctl to do this, but would need to setup hw state - * first. - */ - I830ClearScreen(ctx, pI830, pSAREAPriv); - - I830SetRingRegs(ctx, pI830); - - return TRUE; -} - - -/** - * \brief Validate the fbdev mode. - * - * \param ctx display handle. - * - * \return one on success, or zero on failure. - * - * Saves some registers and returns 1. - * - * \sa radeonValidateMode(). - */ -static int i830ValidateMode( const DRIDriverContext *ctx ) -{ - return 1; -} - -/** - * \brief Examine mode returned by fbdev. - * - * \param ctx display handle. - * - * \return one on success, or zero on failure. - * - * Restores registers that fbdev has clobbered and returns 1. - * - * \sa i810ValidateMode(). - */ -static int i830PostValidateMode( const DRIDriverContext *ctx ) -{ - I830Rec *pI830 = ctx->driverPrivate; - - I830SetRingRegs(ctx, pI830); - return 1; -} - - -/** - * \brief Initialize the framebuffer device mode - * - * \param ctx display handle. - * - * \return one on success, or zero on failure. - * - * Fills in \p info with some default values and some information from \p ctx - * and then calls I810ScreenInit() for the screen initialization. - * - * Before exiting clears the framebuffer memory accessing it directly. - */ -static int i830InitFBDev( DRIDriverContext *ctx ) -{ - I830Rec *pI830 = calloc(1, sizeof(I830Rec)); - int i; - - { - int dummy = ctx->shared.virtualWidth; - - switch (ctx->bpp / 8) { - case 1: dummy = (ctx->shared.virtualWidth + 127) & ~127; break; - case 2: dummy = (ctx->shared.virtualWidth + 31) & ~31; break; - case 3: - case 4: dummy = (ctx->shared.virtualWidth + 15) & ~15; break; - } - - ctx->shared.virtualWidth = dummy; - ctx->shared.Width = ctx->shared.virtualWidth; - } - - - for (i = 0; pitches[i] != 0; i++) { - if (pitches[i] >= ctx->shared.virtualWidth) { - ctx->shared.virtualWidth = pitches[i]; - break; - } - } - - ctx->driverPrivate = (void *)pI830; - - pI830->LpRing = calloc(1, sizeof(I830RingBuffer)); - pI830->Chipset = ctx->chipset; - pI830->LinearAddr = ctx->FBStart; - - if (!I830ScreenInit( ctx, pI830 )) - return 0; - - - return 1; -} - - -/** - * \brief The screen is being closed, so clean up any state and free any - * resources used by the DRI. - * - * \param ctx display handle. - * - * Unmaps the SAREA, closes the DRM device file descriptor and frees the driver - * private data. - */ -static void i830HaltFBDev( DRIDriverContext *ctx ) -{ - drmI830Sarea *pSAREAPriv; - I830Rec *pI830 = ctx->driverPrivate; - - if (pI830->irq) { - drmCtlUninstHandler(ctx->drmFD); - pI830->irq = 0; } - - I830CleanupDma(ctx); - - pSAREAPriv = (drmI830Sarea *)(((char*)ctx->pSAREA) + - sizeof(drm_sarea_t)); - - I830DRIUnmapScreenRegions(ctx, pI830, pSAREAPriv); - drmUnmap( ctx->pSAREA, ctx->shared.SAREASize ); - drmClose(ctx->drmFD); - - if (ctx->driverPrivate) { - free(ctx->driverPrivate); - ctx->driverPrivate = 0; - } -} - - -extern void i810NotifyFocus( int ); - -/** - * \brief Exported driver interface for Mini GLX. - * - * \sa DRIDriverRec. - */ -const struct DRIDriverRec __driDriver = { - i830ValidateMode, - i830PostValidateMode, - i830InitFBDev, - i830HaltFBDev, - NULL,//I830EngineShutdown, - NULL, //I830EngineRestore, -#ifndef _EMBEDDED - 0, -#else - i810NotifyFocus, -#endif -}; +../../intel/server/intel_dri.c
\ No newline at end of file |