diff options
author | Eric Anholt <[email protected]> | 2013-06-20 16:07:07 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2013-06-26 12:28:26 -0700 |
commit | 3dbba95b72262344b82fba018b7c2c1208754cd2 (patch) | |
tree | e8d08b711fae37888c280efa3d7c0a806cefa318 /src/mesa/drivers/dri/i965 | |
parent | 733d32f3765be84a7e908df7e99a278cadcee853 (diff) |
i965: Move the remaining intel code to the i965 directory.
Now that i915's forked off, they don't need to live in a shared directory.
Acked-by: Kenneth Graunke <[email protected]>
Acked-by: Chad Versace <[email protected]>
Acked-by: Adam Jackson <[email protected]>
(and I hear second hand that idr is OK with it, too)
Diffstat (limited to 'src/mesa/drivers/dri/i965')
43 files changed, 14725 insertions, 25 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 32b041a8caa..27c67d10ec0 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -31,7 +31,6 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa/ \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/intel \ -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ -I$(top_srcdir)/src/gtest/include \ -I$(top_builddir)/src/mesa/drivers/dri/common \ diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index d38cdf31cc6..8c6524e71af 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -1 +1,560 @@ -../intel/intel_batchbuffer.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" +#include "intel_reg.h" +#include "intel_bufmgr.h" +#include "intel_buffers.h" + +static void +intel_batchbuffer_reset(struct intel_context *intel); + +struct cached_batch_item { + struct cached_batch_item *next; + uint16_t header; + uint16_t size; +}; + +static void clear_cache( struct intel_context *intel ) +{ + struct cached_batch_item *item = intel->batch.cached_items; + + while (item) { + struct cached_batch_item *next = item->next; + free(item); + item = next; + } + + intel->batch.cached_items = NULL; +} + +void +intel_batchbuffer_init(struct intel_context *intel) +{ + intel_batchbuffer_reset(intel); + + if (intel->gen >= 6) { + /* We can't just use brw_state_batch to get a chunk of space for + * the gen6 workaround because it involves actually writing to + * the buffer, and the kernel doesn't let us write to the batch. + */ + intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, + "pipe_control workaround", + 4096, 4096); + } + + if (!intel->has_llc) { + intel->batch.cpu_map = malloc(intel->maxBatchSize); + intel->batch.map = intel->batch.cpu_map; + } +} + +static void +intel_batchbuffer_reset(struct intel_context *intel) +{ + if (intel->batch.last_bo != NULL) { + drm_intel_bo_unreference(intel->batch.last_bo); + intel->batch.last_bo = NULL; + } + intel->batch.last_bo = intel->batch.bo; + + clear_cache(intel); + + intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", + intel->maxBatchSize, 4096); + if (intel->has_llc) { + drm_intel_bo_map(intel->batch.bo, true); + intel->batch.map = intel->batch.bo->virtual; + } + + intel->batch.reserved_space = BATCH_RESERVED; + intel->batch.state_batch_offset = intel->batch.bo->size; + intel->batch.used = 0; + intel->batch.needs_sol_reset = false; +} + +void +intel_batchbuffer_save_state(struct intel_context *intel) +{ + intel->batch.saved.used = intel->batch.used; + intel->batch.saved.reloc_count = + drm_intel_gem_bo_get_reloc_count(intel->batch.bo); +} + +void +intel_batchbuffer_reset_to_saved(struct intel_context *intel) +{ + drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count); + + intel->batch.used = intel->batch.saved.used; + + /* Cached batch state is dead, since we just cleared some unknown part of the + * batchbuffer. Assume that the caller resets any other state necessary. + */ + clear_cache(intel); +} + +void +intel_batchbuffer_free(struct intel_context *intel) +{ + free(intel->batch.cpu_map); + drm_intel_bo_unreference(intel->batch.last_bo); + drm_intel_bo_unreference(intel->batch.bo); + drm_intel_bo_unreference(intel->batch.workaround_bo); + clear_cache(intel); +} + +static void +do_batch_dump(struct intel_context *intel) +{ + struct drm_intel_decode *decode; + struct intel_batchbuffer *batch = &intel->batch; + int ret; + + decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID); + if (!decode) + return; + + ret = drm_intel_bo_map(batch->bo, false); + if (ret == 0) { + drm_intel_decode_set_batch_pointer(decode, + batch->bo->virtual, + batch->bo->offset, + batch->used); + } else { + fprintf(stderr, + "WARNING: failed to map batchbuffer (%s), " + "dumping uploaded data instead.\n", strerror(ret)); + + drm_intel_decode_set_batch_pointer(decode, + batch->map, + batch->bo->offset, + batch->used); + } + + drm_intel_decode(decode); + + drm_intel_decode_context_free(decode); + + if (ret == 0) { + drm_intel_bo_unmap(batch->bo); + + if (intel->vtbl.debug_batch != NULL) + intel->vtbl.debug_batch(intel); + } +} + +/* TODO: Push this whole function into bufmgr. + */ +static int +do_flush_locked(struct intel_context *intel) +{ + struct intel_batchbuffer *batch = &intel->batch; + int ret = 0; + + if (intel->has_llc) { + drm_intel_bo_unmap(batch->bo); + } else { + ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); + if (ret == 0 && batch->state_batch_offset != batch->bo->size) { + ret = drm_intel_bo_subdata(batch->bo, + batch->state_batch_offset, + batch->bo->size - batch->state_batch_offset, + (char *)batch->map + batch->state_batch_offset); + } + } + + if (!intel->intelScreen->no_hw) { + int flags; + + if (intel->gen < 6 || !batch->is_blit) { + flags = I915_EXEC_RENDER; + } else { + flags = I915_EXEC_BLT; + } + + if (batch->needs_sol_reset) + flags |= I915_EXEC_GEN7_SOL_RESET; + + if (ret == 0) { + if (unlikely(INTEL_DEBUG & DEBUG_AUB) && intel->vtbl.annotate_aub) + intel->vtbl.annotate_aub(intel); + if (intel->hw_ctx == NULL || batch->is_blit) { + ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0, + flags); + } else { + ret = drm_intel_gem_bo_context_exec(batch->bo, intel->hw_ctx, + 4 * batch->used, flags); + } + } + } + + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + do_batch_dump(intel); + + if (ret != 0) { + fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); + exit(1); + } + intel->vtbl.new_batch(intel); + + return ret; +} + +int +_intel_batchbuffer_flush(struct intel_context *intel, + const char *file, int line) +{ + int ret; + + if (intel->batch.used == 0) + return 0; + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch.bo; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + 4*intel->batch.used); + + intel->batch.reserved_space = 0; + + if (intel->vtbl.finish_batch) + intel->vtbl.finish_batch(intel); + + /* Mark the end of the buffer. */ + intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); + if (intel->batch.used & 1) { + /* Round batchbuffer usage to 2 DWORDs. */ + intel_batchbuffer_emit_dword(intel, MI_NOOP); + } + + intel_upload_finish(intel); + + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!intel->no_batch_wrap); + + ret = do_flush_locked(intel); + + if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { + fprintf(stderr, "waiting for idle\n"); + drm_intel_bo_wait_rendering(intel->batch.bo); + } + + /* Reset the buffer: + */ + intel_batchbuffer_reset(intel); + + return ret; +} + + +/* This is the only way buffers get added to the validate list. + */ +bool +intel_batchbuffer_emit_reloc(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) +{ + int ret; + + ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, + buffer, delta, + read_domains, write_domain); + assert(ret == 0); + (void)ret; + + /* + * Using the old buffer offset, write in what the right data would be, in case + * the buffer doesn't move and we can short-circuit the relocation processing + * in the kernel + */ + intel_batchbuffer_emit_dword(intel, buffer->offset + delta); + + return true; +} + +bool +intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta) +{ + int ret; + + ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, + buffer, delta, + read_domains, write_domain); + assert(ret == 0); + (void)ret; + + /* + * Using the old buffer offset, write in what the right data would + * be, in case the buffer doesn't move and we can short-circuit the + * relocation processing in the kernel + */ + intel_batchbuffer_emit_dword(intel, buffer->offset + delta); + + return true; +} + +void +intel_batchbuffer_data(struct intel_context *intel, + const void *data, GLuint bytes, bool is_blit) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(intel, bytes, is_blit); + __memcpy(intel->batch.map + intel->batch.used, data, bytes); + intel->batch.used += bytes >> 2; +} + +void +intel_batchbuffer_cached_advance(struct intel_context *intel) +{ + struct cached_batch_item **prev = &intel->batch.cached_items, *item; + uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); + uint32_t *start = intel->batch.map + intel->batch.emit; + uint16_t op = *start >> 16; + + while (*prev) { + uint32_t *old; + + item = *prev; + old = intel->batch.map + item->header; + if (op == *old >> 16) { + if (item->size == sz && memcmp(old, start, sz) == 0) { + if (prev != &intel->batch.cached_items) { + *prev = item->next; + item->next = intel->batch.cached_items; + intel->batch.cached_items = item; + } + intel->batch.used = intel->batch.emit; + return; + } + + goto emit; + } + prev = &item->next; + } + + item = malloc(sizeof(struct cached_batch_item)); + if (item == NULL) + return; + + item->next = intel->batch.cached_items; + intel->batch.cached_items = item; + +emit: + item->size = sz; + item->header = intel->batch.emit; +} + +/** + * Restriction [DevSNB, DevIVB]: + * + * Prior to changing Depth/Stencil Buffer state (i.e. any combination of + * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, + * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall + * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth + * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by + * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), + * unless SW can otherwise guarantee that the pipeline from WM onwards is + * already flushed (e.g., via a preceding MI_FLUSH). + */ +void +intel_emit_depth_stall_flushes(struct intel_context *intel) +{ + assert(intel->gen >= 6 && intel->gen <= 7); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH() + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); +} + +/** + * From the BSpec, volume 2a.03: VS Stage Input / State: + * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs + * to be sent before any combination of VS associated 3DSTATE." + */ +void +gen7_emit_vs_workaround_flush(struct intel_context *intel) +{ + assert(intel->gen == 7); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(intel->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); +} + +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +void +intel_emit_post_sync_nonzero_flush(struct intel_context *intel) +{ + if (!intel->batch.need_workaround_flush) + return; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(intel->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + intel->batch.need_workaround_flush = false; +} + +/* Emit a pipelined flush to either flush render and texture cache for + * reading from a FBO-drawn texture, or flush so that frontbuffer + * render appears on the screen in DRI1. + * + * This is also used for the always_flush_cache driconf debug option. + */ +void +intel_batchbuffer_emit_mi_flush(struct intel_context *intel) +{ + if (intel->gen >= 6) { + if (intel->batch.is_blit) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if (intel->gen == 6) { + /* Hardware workaround: SNB B-Spec says: + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache + * Flush Enable =1, a PIPE_CONTROL with any non-zero + * post-sync-op is required. + */ + intel_emit_post_sync_nonzero_flush(intel); + } + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_TC_FLUSH | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } + } else if (intel->gen >= 4) { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_NO_WRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(1); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + } +} diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h new file mode 100644 index 00000000000..1a6d1aa4946 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -0,0 +1,173 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "main/mtypes.h" + +#include "intel_context.h" +#include "intel_bufmgr.h" +#include "intel_reg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Number of bytes to reserve for commands necessary to complete a batch. + * + * This includes: + * - MI_BATCHBUFFER_END (4 bytes) + * - Optional MI_NOOP for ensuring the batch length is qword aligned (4 bytes) + * - Any state emitted by vtbl->finish_batch(): + * - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes) + */ +#define BATCH_RESERVED 24 + +struct intel_batchbuffer; + +void intel_batchbuffer_init(struct intel_context *intel); +void intel_batchbuffer_free(struct intel_context *intel); +void intel_batchbuffer_save_state(struct intel_context *intel); +void intel_batchbuffer_reset_to_saved(struct intel_context *intel); + +int _intel_batchbuffer_flush(struct intel_context *intel, + const char *file, int line); + +#define intel_batchbuffer_flush(intel) \ + _intel_batchbuffer_flush(intel, __FILE__, __LINE__) + + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void intel_batchbuffer_data(struct intel_context *intel, + const void *data, GLuint bytes, bool is_blit); + +bool intel_batchbuffer_emit_reloc(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); +bool intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); +void intel_batchbuffer_emit_mi_flush(struct intel_context *intel); +void intel_emit_post_sync_nonzero_flush(struct intel_context *intel); +void intel_emit_depth_stall_flushes(struct intel_context *intel); +void gen7_emit_vs_workaround_flush(struct intel_context *intel); + +static INLINE uint32_t float_as_int(float f) +{ + union { + float f; + uint32_t d; + } fi; + + fi.f = f; + return fi.d; +} + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE unsigned +intel_batchbuffer_space(struct intel_context *intel) +{ + return (intel->batch.state_batch_offset - intel->batch.reserved_space) + - intel->batch.used*4; +} + + +static INLINE void +intel_batchbuffer_emit_dword(struct intel_context *intel, GLuint dword) +{ +#ifdef DEBUG + assert(intel_batchbuffer_space(intel) >= 4); +#endif + intel->batch.map[intel->batch.used++] = dword; +} + +static INLINE void +intel_batchbuffer_emit_float(struct intel_context *intel, float f) +{ + intel_batchbuffer_emit_dword(intel, float_as_int(f)); +} + +static INLINE void +intel_batchbuffer_require_space(struct intel_context *intel, + GLuint sz, int is_blit) +{ + + if (intel->gen >= 6 && + intel->batch.is_blit != is_blit && intel->batch.used) { + intel_batchbuffer_flush(intel); + } + + intel->batch.is_blit = is_blit; + +#ifdef DEBUG + assert(sz < intel->maxBatchSize - BATCH_RESERVED); +#endif + if (intel_batchbuffer_space(intel) < sz) + intel_batchbuffer_flush(intel); +} + +static INLINE void +intel_batchbuffer_begin(struct intel_context *intel, int n, bool is_blit) +{ + intel_batchbuffer_require_space(intel, n * 4, is_blit); + + intel->batch.emit = intel->batch.used; +#ifdef DEBUG + intel->batch.total = n; +#endif +} + +static INLINE void +intel_batchbuffer_advance(struct intel_context *intel) +{ +#ifdef DEBUG + struct intel_batchbuffer *batch = &intel->batch; + unsigned int _n = batch->used - batch->emit; + assert(batch->total != 0); + if (_n != batch->total) { + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", + _n, batch->total); + abort(); + } + batch->total = 0; +#endif +} + +void intel_batchbuffer_cached_advance(struct intel_context *intel); + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel, n, false) +#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel, n, true) +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel, d) +#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel,f) +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + intel_batchbuffer_emit_reloc(intel, buf, \ + read_domains, write_domain, delta); \ +} while (0) +#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ + intel_batchbuffer_emit_reloc_fenced(intel, buf, \ + read_domains, write_domain, delta); \ +} while (0) + +#define ADVANCE_BATCH() intel_batchbuffer_advance(intel); +#define CACHED_BATCH() intel_batchbuffer_cached_advance(intel); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index dd6c8d17c28..da56f55c4df 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -1 +1,770 @@ -../intel/intel_blit.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/mtypes.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/colormac.h" +#include "main/fbobject.h" + +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_reg.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +static void +intel_miptree_set_alpha_to_one(struct intel_context *intel, + struct intel_mipmap_tree *mt, + int x, int y, int width, int height); + +static GLuint translate_raster_op(GLenum logicop) +{ + switch(logicop) { + case GL_CLEAR: return 0x00; + case GL_AND: return 0x88; + case GL_AND_REVERSE: return 0x44; + case GL_COPY: return 0xCC; + case GL_AND_INVERTED: return 0x22; + case GL_NOOP: return 0xAA; + case GL_XOR: return 0x66; + case GL_OR: return 0xEE; + case GL_NOR: return 0x11; + case GL_EQUIV: return 0x99; + case GL_INVERT: return 0x55; + case GL_OR_REVERSE: return 0xDD; + case GL_COPY_INVERTED: return 0x33; + case GL_OR_INVERTED: return 0xBB; + case GL_NAND: return 0x77; + case GL_SET: return 0xFF; + default: return 0; + } +} + +static uint32_t +br13_for_cpp(int cpp) +{ + switch (cpp) { + case 4: + return BR13_8888; + break; + case 2: + return BR13_565; + break; + case 1: + return BR13_8; + break; + default: + assert(0); + return 0; + } +} + +/** + * Emits the packet for switching the blitter from X to Y tiled or back. + * + * This has to be called in a single BEGIN_BATCH_BLT_TILED() / + * ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as + * part of the power context, not a render context, and if the batchbuffer was + * to get flushed between setting and blitting, or blitting and restoring, our + * tiling state would leak into other unsuspecting applications (like the X + * server). + */ +static void +set_blitter_tiling(struct intel_context *intel, + bool dst_y_tiled, bool src_y_tiled) +{ + assert(intel->gen >= 6); + + /* Idle the blitter before we update how tiling is interpreted. */ + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); + OUT_BATCH(BCS_SWCTRL); + OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | + (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) | + (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0)); +} + +#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \ + BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \ + if (dst_y_tiled || src_y_tiled) \ + set_blitter_tiling(intel, dst_y_tiled, src_y_tiled); \ + } while (0) + +#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \ + if (dst_y_tiled || src_y_tiled) \ + set_blitter_tiling(intel, false, false); \ + ADVANCE_BATCH(); \ + } while (0) + +/** + * Implements a rectangular block transfer (blit) of pixels between two + * miptrees. + * + * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, + * but limited, pitches and sizes allowed. + * + * The src/dst coordinates are relative to the given level/slice of the + * miptree. + * + * If @src_flip or @dst_flip is set, then the rectangle within that miptree + * will be inverted (including scanline order) when copying. This is common + * in GL when copying between window system and user-created + * renderbuffers/textures. + */ +bool +intel_miptree_blit(struct intel_context *intel, + struct intel_mipmap_tree *src_mt, + int src_level, int src_slice, + uint32_t src_x, uint32_t src_y, bool src_flip, + struct intel_mipmap_tree *dst_mt, + int dst_level, int dst_slice, + uint32_t dst_x, uint32_t dst_y, bool dst_flip, + uint32_t width, uint32_t height, + GLenum logicop) +{ + /* No sRGB decode or encode is done by the hardware blitter, which is + * consistent with what we want in the callers (glCopyTexSubImage(), + * glBlitFramebuffer(), texture validation, etc.). + */ + gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format); + gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); + + /* The blitter doesn't support doing any format conversions. We do also + * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into + * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A + * channel to 1.0 at the end. + */ + if (src_format != dst_format && + ((src_format != MESA_FORMAT_ARGB8888 && + src_format != MESA_FORMAT_XRGB8888) || + (dst_format != MESA_FORMAT_ARGB8888 && + dst_format != MESA_FORMAT_XRGB8888))) { + perf_debug("%s: Can't use hardware blitter from %s to %s, " + "falling back.\n", __FUNCTION__, + _mesa_get_format_name(src_format), + _mesa_get_format_name(dst_format)); + return false; + } + + /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics + * Data Size Limitations): + * + * The BLT engine is capable of transferring very large quantities of + * graphics data. Any graphics data read from and written to the + * destination is permitted to represent a number of pixels that + * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line + * at the destination. The maximum number of pixels that may be + * represented per scan line’s worth of graphics data depends on the + * color depth. + * + * Furthermore, intelEmitCopyBlit (which is called below) uses a signed + * 16-bit integer to represent buffer pitch, so it can only handle buffer + * pitches < 32k. + * + * As a result of these two limitations, we can only use the blitter to do + * this copy when the region's pitch is less than 32k. + */ + if (src_mt->region->pitch > 32768 || + dst_mt->region->pitch > 32768) { + perf_debug("Falling back due to >32k pitch\n"); + return false; + } + + /* The blitter has no idea about HiZ or fast color clears, so we need to + * resolve the miptrees before we do anything. + */ + intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_slice); + intel_miptree_slice_resolve_depth(intel, dst_mt, dst_level, dst_slice); + intel_miptree_resolve_color(intel, src_mt); + intel_miptree_resolve_color(intel, dst_mt); + + if (src_flip) + src_y = src_mt->level[src_level].height - src_y - height; + + if (dst_flip) + dst_y = dst_mt->level[dst_level].height - dst_y - height; + + int src_pitch = src_mt->region->pitch; + if (src_flip != dst_flip) + src_pitch = -src_pitch; + + uint32_t src_image_x, src_image_y; + intel_miptree_get_image_offset(src_mt, src_level, src_slice, + &src_image_x, &src_image_y); + src_x += src_image_x; + src_y += src_image_y; + + uint32_t dst_image_x, dst_image_y; + intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, + &dst_image_x, &dst_image_y); + dst_x += dst_image_x; + dst_y += dst_image_y; + + if (!intelEmitCopyBlit(intel, + src_mt->cpp, + src_pitch, + src_mt->region->bo, src_mt->offset, + src_mt->region->tiling, + dst_mt->region->pitch, + dst_mt->region->bo, dst_mt->offset, + dst_mt->region->tiling, + src_x, src_y, + dst_x, dst_y, + width, height, + logicop)) { + return false; + } + + if (src_mt->format == MESA_FORMAT_XRGB8888 && + dst_mt->format == MESA_FORMAT_ARGB8888) { + intel_miptree_set_alpha_to_one(intel, dst_mt, + dst_x, dst_y, + width, height); + } + + return true; +} + +/* Copy BitBlt + */ +bool +intelEmitCopyBlit(struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + drm_intel_bo *src_buffer, + GLuint src_offset, + uint32_t src_tiling, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort src_x, GLshort src_y, + GLshort dst_x, GLshort dst_y, + GLshort w, GLshort h, + GLenum logic_op) +{ + GLuint CMD, BR13, pass = 0; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + drm_intel_bo *aper_array[3]; + bool dst_y_tiled = dst_tiling == I915_TILING_Y; + bool src_y_tiled = src_tiling == I915_TILING_Y; + BATCH_LOCALS; + + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return false; + } + if (src_tiling != I915_TILING_NONE) { + if (src_offset & 4095) + return false; + } + if ((dst_y_tiled || src_y_tiled) && intel->gen < 6) + return false; + + /* do space check before going any further */ + do { + aper_array[0] = intel->batch.bo; + aper_array[1] = dst_buffer; + aper_array[2] = src_buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + intel_batchbuffer_flush(intel); + pass++; + } else + break; + } while (pass < 2); + + if (pass >= 2) + return false; + + intel_batchbuffer_require_space(intel, 8 * 4, true); + DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop + * the low bits. + */ + if (src_pitch % 4 != 0 || dst_pitch % 4 != 0) + return false; + + /* For big formats (such as floating point), do the copy using 16 or 32bpp + * and multiply the coordinates. + */ + if (cpp > 4) { + if (cpp % 4 == 2) { + dst_x *= cpp / 2; + dst_x2 *= cpp / 2; + src_x *= cpp / 2; + cpp = 2; + } else { + assert(cpp % 4 == 0); + dst_x *= cpp / 4; + dst_x2 *= cpp / 4; + src_x *= cpp / 4; + cpp = 4; + } + } + + BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; + + switch (cpp) { + case 1: + case 2: + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return false; + } + +#ifndef I915 + if (dst_tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + if (src_tiling != I915_TILING_NONE) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } +#endif + + if (dst_y2 <= dst_y || dst_x2 <= dst_x) { + return true; + } + + assert(dst_x < dst_x2); + assert(dst_y < dst_y2); + + BEGIN_BATCH_BLT_TILED(8, dst_y_tiled, src_y_tiled); + + OUT_BATCH(CMD | (8 - 2)); + OUT_BATCH(BR13 | (uint16_t)dst_pitch); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH((uint16_t)src_pitch); + OUT_RELOC_FENCED(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, + src_offset); + + ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); + + intel_batchbuffer_emit_mi_flush(intel); + + return true; +} + + +/** + * Use blitting to clear the renderbuffers named by 'flags'. + * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field + * since that might include software renderbuffers or renderbuffers + * which we're clearing with triangles. + * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear + */ +GLbitfield +intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + GLuint clear_depth_value, clear_depth_mask; + GLint cx, cy, cw, ch; + GLbitfield fail_mask = 0; + BATCH_LOCALS; + + /* Note: we don't use this function on Gen7+ hardware, so we can safely + * ignore fast color clear issues. + */ + assert(intel->gen < 7); + + /* + * Compute values for clearing the buffers. + */ + clear_depth_value = 0; + clear_depth_mask = 0; + if (mask & BUFFER_BIT_DEPTH) { + clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear); + clear_depth_mask = XY_BLT_WRITE_RGB; + } + if (mask & BUFFER_BIT_STENCIL) { + clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24; + clear_depth_mask |= XY_BLT_WRITE_ALPHA; + } + + cx = fb->_Xmin; + if (_mesa_is_winsys_fbo(fb)) + cy = ctx->DrawBuffer->Height - fb->_Ymax; + else + cy = fb->_Ymin; + cw = fb->_Xmax - fb->_Xmin; + ch = fb->_Ymax - fb->_Ymin; + + if (cw == 0 || ch == 0) + return 0; + + /* Loop over all renderbuffers */ + mask &= (1 << BUFFER_COUNT) - 1; + while (mask) { + GLuint buf = ffs(mask) - 1; + bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL; + struct intel_renderbuffer *irb; + int x1, y1, x2, y2; + uint32_t clear_val; + uint32_t BR13, CMD; + struct intel_region *region; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + + mask &= ~(1 << buf); + + irb = intel_get_renderbuffer(fb, buf); + if (irb && irb->mt) { + region = irb->mt->region; + assert(region); + assert(region->bo); + } else { + fail_mask |= 1 << buf; + continue; + } + + /* OK, clear this renderbuffer */ + x1 = cx + irb->draw_x; + y1 = cy + irb->draw_y; + x2 = cx + cw + irb->draw_x; + y2 = cy + ch + irb->draw_y; + + pitch = region->pitch; + cpp = region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, + region->bo, pitch, + x1, y1, x2 - x1, y2 - y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + + /* Setup the blit command */ + if (cpp == 4) { + if (is_depth_stencil) { + CMD |= clear_depth_mask; + } else { + /* clearing RGBA */ + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + } + + assert(region->tiling != I915_TILING_Y); + +#ifndef I915 + if (region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= pitch; + + if (is_depth_stencil) { + clear_val = clear_depth_value; + } else { + uint8_t clear[4]; + GLfloat *color = ctx->Color.ClearColor.f; + + _mesa_unclamped_float_rgba_to_ubyte(clear, color); + + switch (intel_rb_format(irb)) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + clear_val = PACK_COLOR_8888(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_RGB565: + clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB4444: + clear_val = PACK_COLOR_4444(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB1555: + clear_val = PACK_COLOR_1555(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_A8: + clear_val = PACK_COLOR_8888(clear[3], clear[3], + clear[3], clear[3]); + break; + default: + fail_mask |= 1 << buf; + continue; + } + } + + BR13 |= br13_for_cpp(cpp); + + assert(x1 < x2); + assert(y1 < y2); + + /* do space check before going any further */ + aper_array[0] = intel->batch.bo; + aper_array[1] = region->bo; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel); + } + + BEGIN_BATCH_BLT(6); + OUT_BATCH(CMD | (6 - 2)); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC_FENCED(region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(clear_val); + ADVANCE_BATCH(); + + if (intel->always_flush_cache) + intel_batchbuffer_emit_mi_flush(intel); + + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) + mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + } + + return fail_mask; +} + +bool +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLenum logic_op) +{ + int dwords = ALIGN(src_size, 8) / 4; + uint32_t opcode, br13, blit_cmd; + + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return false; + if (dst_tiling == I915_TILING_Y) + return false; + } + + assert( logic_op - GL_CLEAR >= 0 ); + assert( logic_op - GL_CLEAR < 0x10 ); + assert(dst_pitch > 0); + + if (w < 0 || h < 0) + return true; + + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", + __FUNCTION__, + dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); + + intel_batchbuffer_require_space(intel, + (8 * 4) + + (3 * 4) + + dwords * 4, true); + + opcode = XY_SETUP_BLT_CMD; + if (cpp == 4) + opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; +#ifndef I915 + if (dst_tiling != I915_TILING_NONE) { + opcode |= XY_DST_TILED; + dst_pitch /= 4; + } +#endif + + br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); + br13 |= br13_for_cpp(cpp); + + blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ + if (dst_tiling != I915_TILING_NONE) + blit_cmd |= XY_DST_TILED; + + BEGIN_BATCH_BLT(8 + 3); + OUT_BATCH(opcode | (8 - 2)); + OUT_BATCH(br13); + OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ + OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + OUT_BATCH(0); /* bg */ + OUT_BATCH(fg_color); /* fg */ + OUT_BATCH(0); /* pattern base addr */ + + OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + ADVANCE_BATCH(); + + intel_batchbuffer_data(intel, src_bits, dwords * 4, true); + + intel_batchbuffer_emit_mi_flush(intel); + + return true; +} + +/* We don't have a memmove-type blit like some other hardware, so we'll do a + * rectangular blit covering a large space, then emit 1-scanline blit at the + * end to cover the last if we need. + */ +void +intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size) +{ + struct gl_context *ctx = &intel->ctx; + GLuint pitch, height; + bool ok; + + /* The pitch given to the GPU must be DWORD aligned, and + * we want width to match pitch. Max width is (1 << 15 - 1), + * rounding that down to the nearest DWORD is 1 << 15 - 4 + */ + pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4); + height = (pitch == 0) ? 1 : size / pitch; + ok = intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + pitch, height, /* w, h */ + GL_COPY); + if (!ok) + _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height); + + src_offset += pitch * height; + dst_offset += pitch * height; + size -= pitch * height; + assert (size < (1 << 15)); + pitch = ALIGN(size, 4); + if (size != 0) { + ok = intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + size, 1, /* w, h */ + GL_COPY); + if (!ok) + _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1); + } +} + +/** + * Used to initialize the alpha value of an ARGB8888 miptree after copying + * into it from an XRGB8888 source. + * + * This is very common with glCopyTexImage2D(). Note that the coordinates are + * relative to the start of the miptree, not relative to a slice within the + * miptree. + */ +static void +intel_miptree_set_alpha_to_one(struct intel_context *intel, + struct intel_mipmap_tree *mt, + int x, int y, int width, int height) +{ + struct intel_region *region = mt->region; + uint32_t BR13, CMD; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + BATCH_LOCALS; + + pitch = region->pitch; + cpp = region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, region->bo, pitch, x, y, width, height); + + BR13 = br13_for_cpp(cpp) | 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + CMD |= XY_BLT_WRITE_ALPHA; + +#ifndef I915 + if (region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= pitch; + + /* do space check before going any further */ + aper_array[0] = intel->batch.bo; + aper_array[1] = region->bo; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel); + } + + bool dst_y_tiled = region->tiling == I915_TILING_Y; + + BEGIN_BATCH_BLT_TILED(6, dst_y_tiled, false); + OUT_BATCH(CMD | (6 - 2)); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + height) << 16) | (x + width)); + OUT_RELOC_FENCED(region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ + ADVANCE_BATCH_TILED(dst_y_tiled, false); + + intel_batchbuffer_emit_mi_flush(intel); +} diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h new file mode 100644 index 00000000000..0decc80a6e0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BLIT_H +#define INTEL_BLIT_H + +#include "intel_context.h" + +extern void intelCopyBuffer(const __DRIdrawable * dpriv, + const drm_clip_rect_t * rect); + +extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask); + +bool +intelEmitCopyBlit(struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + drm_intel_bo *src_buffer, + GLuint src_offset, + uint32_t src_tiling, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort srcx, GLshort srcy, + GLshort dstx, GLshort dsty, + GLshort w, GLshort h, + GLenum logicop ); + +bool intel_miptree_blit(struct intel_context *intel, + struct intel_mipmap_tree *src_mt, + int src_level, int src_slice, + uint32_t src_x, uint32_t src_y, bool src_flip, + struct intel_mipmap_tree *dst_mt, + int dst_level, int dst_slice, + uint32_t dst_x, uint32_t dst_y, bool dst_flip, + uint32_t width, uint32_t height, + GLenum logicop); + +bool +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLenum logic_op); +void intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index e06dd3c8d3c..f568864f4b4 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -1 +1,853 @@ -../intel/intel_buffer_objects.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/bufferobj.h" + +#include "intel_blit.h" +#include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" + +#ifndef I915 +#include "brw_context.h" +#endif + +static GLboolean +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); + +/** Allocates a new drm_intel_bo to store the data for the buffer object. */ +static void +intel_bufferobj_alloc_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj) +{ + intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj", + intel_obj->Base.Size, 64); + +#ifndef I915 + /* the buffer might be bound as a uniform buffer, need to update it + */ + { + struct brw_context *brw = brw_context(&intel->ctx); + brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER; + } +#endif +} + +static void +release_buffer(struct intel_buffer_object *intel_obj) +{ + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; + intel_obj->offset = 0; + intel_obj->source = 0; +} + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object * +intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target) +{ + struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); + + _mesa_initialize_buffer_object(ctx, &obj->Base, name, target); + + obj->buffer = NULL; + + return &obj->Base; +} + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void +intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* Buffer objects are automatically unmapped when deleting according + * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy + * (though it does if you call glDeleteBuffers) + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, obj); + + free(intel_obj->sys_buffer); + + drm_intel_bo_unreference(intel_obj->buffer); + free(intel_obj); +} + + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via ctx->Driver.BufferData(). + * \return true for success, false if out of memory + */ +static GLboolean +intel_bufferobj_data(struct gl_context * ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + /* Part of the ABI, but this function doesn't use it. + */ +#ifndef I915 + (void) target; +#endif + + intel_obj->Base.Size = size; + intel_obj->Base.Usage = usage; + + assert(!obj->Pointer); /* Mesa should have unmapped it */ + + if (intel_obj->buffer != NULL) + release_buffer(intel_obj); + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + + if (size != 0) { +#ifdef I915 + /* On pre-965, stick VBOs in system memory, as we're always doing + * swtnl with their contents anyway. + */ + if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { + intel_obj->sys_buffer = malloc(size); + if (intel_obj->sys_buffer != NULL) { + if (data != NULL) + memcpy(intel_obj->sys_buffer, data, size); + return true; + } + } +#endif + intel_bufferobj_alloc_buffer(intel, intel_obj); + if (!intel_obj->buffer) + return false; + + if (data != NULL) + drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); + } + + return true; +} + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +intel_bufferobj_subdata(struct gl_context * ctx, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + bool busy; + + if (size == 0) + return; + + assert(intel_obj); + + /* If we have a single copy in system memory, update that */ + if (intel_obj->sys_buffer) { + if (intel_obj->source) + release_buffer(intel_obj); + + if (intel_obj->buffer == NULL) { + memcpy((char *)intel_obj->sys_buffer + offset, data, size); + return; + } + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } + + /* Otherwise we need to update the copy in video memory. */ + busy = + drm_intel_bo_busy(intel_obj->buffer) || + drm_intel_bo_references(intel->batch.bo, intel_obj->buffer); + + if (busy) { + if (size == intel_obj->Base.Size) { + /* Replace the current busy bo with fresh data. */ + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); + } else { + perf_debug("Using a blit copy to avoid stalling on %ldb " + "glBufferSubData() to a busy buffer object.\n", + (long)size); + drm_intel_bo *temp_bo = + drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); + + drm_intel_bo_subdata(temp_bo, 0, size, data); + + intel_emit_linear_blit(intel, + intel_obj->buffer, offset, + temp_bo, 0, + size); + + drm_intel_bo_unreference(temp_bo); + } + } else { + drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); + } +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void +intel_bufferobj_get_subdata(struct gl_context * ctx, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, struct gl_buffer_object *obj) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); + + assert(intel_obj); + if (intel_obj->sys_buffer) + memcpy(data, (char *)intel_obj->sys_buffer + offset, size); + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); + } + drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); + } +} + + + +/** + * Called via glMapBufferRange and glMapBuffer + * + * The goal of this extension is to allow apps to accumulate their rendering + * at the same time as they accumulate their buffer object. Without it, + * you'd end up blocking on execution of rendering every time you mapped + * the buffer to put new data in. + * + * We support it in 3 ways: If unsynchronized, then don't bother + * flushing the batchbuffer before mapping the buffer, which can save blocking + * in many cases. If we would still block, and they allow the whole buffer + * to be invalidated, then just allocate a new buffer to replace the old one. + * If not, and we'd block, and they allow the subrange of the buffer to be + * invalidated, then we can make a new little BO, let them write into that, + * and blit it into the real BO at unmap time. + */ +static void * +intel_bufferobj_map_range(struct gl_context * ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also + * internally uses our functions directly. + */ + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + + if (intel_obj->sys_buffer) { + const bool read_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; + + if (!read_only && intel_obj->source) + release_buffer(intel_obj); + + if (!intel_obj->buffer || intel_obj->source) { + obj->Pointer = intel_obj->sys_buffer + offset; + return obj->Pointer; + } + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } + + if (intel_obj->buffer == NULL) { + obj->Pointer = NULL; + return NULL; + } + + /* If the access is synchronized (like a normal buffer mapping), then get + * things flushed out so the later mapping syncs appropriately through GEM. + * If the user doesn't care about existing buffer contents and mapping would + * cause us to block, then throw out the old buffer. + * + * If they set INVALIDATE_BUFFER, we can pitch the current contents to + * achieve the required synchronization. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + } else { + perf_debug("Stalling on the GPU for mapping a busy buffer " + "object\n"); + intel_flush(ctx); + } + } else if (drm_intel_bo_busy(intel_obj->buffer) && + (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + } + } + + /* If the user is mapping a range of an active buffer object but + * doesn't require the current contents of that range, make a new + * BO, and we'll copy what they put in there out at unmap or + * FlushRange time. + */ + if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { + intel_obj->range_map_buffer = malloc(length); + obj->Pointer = intel_obj->range_map_buffer; + } else { + intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, + "range map", + length, 64); + if (!(access & GL_MAP_READ_BIT)) { + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); + } else { + drm_intel_bo_map(intel_obj->range_map_bo, + (access & GL_MAP_WRITE_BIT) != 0); + } + obj->Pointer = intel_obj->range_map_bo->virtual; + } + return obj->Pointer; + } + + if (access & GL_MAP_UNSYNCHRONIZED_BIT) + drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); + else if (!(access & GL_MAP_READ_BIT)) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + } else { + drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + } + + obj->Pointer = intel_obj->buffer->virtual + offset; + return obj->Pointer; +} + +/* Ideally we'd use a BO to avoid taking up cache space for the temporary + * data, but FlushMappedBufferRange may be followed by further writes to + * the pointer, so we would have to re-map after emitting our blit, which + * would defeat the point. + */ +static void +intel_bufferobj_flush_mapped_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + drm_intel_bo *temp_bo; + + /* Unless we're in the range map using a temporary system buffer, + * there's no work to do. + */ + if (intel_obj->range_map_buffer == NULL) + return; + + if (length == 0) + return; + + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64); + + drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset + offset, + temp_bo, 0, + length); + + drm_intel_bo_unreference(temp_bo); +} + + +/** + * Called via glUnmapBuffer(). + */ +static GLboolean +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + assert(obj->Pointer); + if (intel_obj->sys_buffer != NULL) { + /* always keep the mapping around. */ + } else if (intel_obj->range_map_buffer != NULL) { + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel); + free(intel_obj->range_map_buffer); + intel_obj->range_map_buffer = NULL; + } else if (intel_obj->range_map_bo != NULL) { + drm_intel_bo_unmap(intel_obj->range_map_bo); + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset, + intel_obj->range_map_bo, 0, + obj->Length); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel); + + drm_intel_bo_unreference(intel_obj->range_map_bo); + intel_obj->range_map_bo = NULL; + } else if (intel_obj->buffer != NULL) { + drm_intel_bo_unmap(intel_obj->buffer); + } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + + return true; +} + +drm_intel_bo * +intel_bufferobj_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint flag) +{ + if (intel_obj->source) + release_buffer(intel_obj); + + if (intel_obj->buffer == NULL) { + intel_bufferobj_alloc_buffer(intel, intel_obj); + drm_intel_bo_subdata(intel_obj->buffer, + 0, intel_obj->Base.Size, + intel_obj->sys_buffer); + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + intel_obj->offset = 0; + } + + return intel_obj->buffer; +} + +#define INTEL_UPLOAD_SIZE (64*1024) + +void +intel_upload_finish(struct intel_context *intel) +{ + if (!intel->upload.bo) + return; + + if (intel->upload.buffer_len) { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + drm_intel_bo_unreference(intel->upload.bo); + intel->upload.bo = NULL; +} + +static void wrap_buffers(struct intel_context *intel, GLuint size) +{ + intel_upload_finish(intel); + + if (size < INTEL_UPLOAD_SIZE) + size = INTEL_UPLOAD_SIZE; + + intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0); + intel->upload.offset = 0; +} + +void intel_upload_data(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset) +{ + GLuint base, delta; + + base = (intel->upload.offset + align - 1) / align * align; + if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { + wrap_buffers(intel, size); + base = 0; + } + + drm_intel_bo_reference(intel->upload.bo); + *return_bo = intel->upload.bo; + *return_offset = base; + + delta = base - intel->upload.offset; + if (intel->upload.buffer_len && + intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) + { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + if (size < sizeof(intel->upload.buffer)) + { + if (intel->upload.buffer_len == 0) + intel->upload.buffer_offset = base; + else + intel->upload.buffer_len += delta; + + memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size); + intel->upload.buffer_len += size; + } + else + { + drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); + } + + intel->upload.offset = base + size; +} + +void *intel_upload_map(struct intel_context *intel, GLuint size, GLuint align) +{ + GLuint base, delta; + char *ptr; + + base = (intel->upload.offset + align - 1) / align * align; + if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { + wrap_buffers(intel, size); + base = 0; + } + + delta = base - intel->upload.offset; + if (intel->upload.buffer_len && + intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) + { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + if (size <= sizeof(intel->upload.buffer)) { + if (intel->upload.buffer_len == 0) + intel->upload.buffer_offset = base; + else + intel->upload.buffer_len += delta; + + ptr = intel->upload.buffer + intel->upload.buffer_len; + intel->upload.buffer_len += size; + } else + ptr = malloc(size); + + return ptr; +} + +void intel_upload_unmap(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset) +{ + GLuint base; + + base = (intel->upload.offset + align - 1) / align * align; + if (size > sizeof(intel->upload.buffer)) { + drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); + free((void*)ptr); + } + + drm_intel_bo_reference(intel->upload.bo); + *return_bo = intel->upload.bo; + *return_offset = base; + + intel->upload.offset = base + size; +} + +drm_intel_bo * +intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint align, GLuint *offset) +{ + if (intel_obj->buffer == NULL) { + intel_upload_data(intel, + intel_obj->sys_buffer, intel_obj->Base.Size, align, + &intel_obj->buffer, &intel_obj->offset); + intel_obj->source = 1; + } + + *offset = intel_obj->offset; + return intel_obj->buffer; +} + +static void +intel_bufferobj_copy_subdata(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr read_offset, GLintptr write_offset, + GLsizeiptr size) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_src = intel_buffer_object(src); + struct intel_buffer_object *intel_dst = intel_buffer_object(dst); + drm_intel_bo *src_bo, *dst_bo; + GLuint src_offset; + + if (size == 0) + return; + + /* If we're in system memory, just map and memcpy. */ + if (intel_src->sys_buffer || intel_dst->sys_buffer) { + /* The same buffer may be used, but note that regions copied may + * not overlap. + */ + if (src == dst) { + char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_READ_BIT | + GL_MAP_WRITE_BIT, + dst); + memmove(ptr + write_offset, ptr + read_offset, size); + intel_bufferobj_unmap(ctx, dst); + } else { + const char *src_ptr; + char *dst_ptr; + + src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); + + memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); + + intel_bufferobj_unmap(ctx, src); + intel_bufferobj_unmap(ctx, dst); + } + return; + } + + /* Otherwise, we have real BOs, so blit them. */ + + dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); + src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset); + + intel_emit_linear_blit(intel, + dst_bo, write_offset, + src_bo, read_offset + src_offset, size); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel); +} + +static GLenum +intel_buffer_purgeable(drm_intel_bo *buffer) +{ + int retained = 0; + + if (buffer != NULL) + retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED); + + return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE; +} + +static GLenum +intel_buffer_object_purgeable(struct gl_context * ctx, + struct gl_buffer_object *obj, + GLenum option) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object (obj); + + if (intel_obj->buffer != NULL) + return intel_buffer_purgeable(intel_obj->buffer); + + if (option == GL_RELEASED_APPLE) { + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + + return GL_RELEASED_APPLE; + } else { + /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ + struct intel_context *intel = intel_context(ctx); + drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_obj, INTEL_READ); + + return intel_buffer_purgeable(bo); + } +} + +static GLenum +intel_texture_object_purgeable(struct gl_context * ctx, + struct gl_texture_object *obj, + GLenum option) +{ + struct intel_texture_object *intel; + + (void) ctx; + (void) option; + + intel = intel_texture_object(obj); + if (intel->mt == NULL || intel->mt->region == NULL) + return GL_RELEASED_APPLE; + + return intel_buffer_purgeable(intel->mt->region->bo); +} + +static GLenum +intel_render_object_purgeable(struct gl_context * ctx, + struct gl_renderbuffer *obj, + GLenum option) +{ + struct intel_renderbuffer *intel; + + (void) ctx; + (void) option; + + intel = intel_renderbuffer(obj); + if (intel->mt == NULL) + return GL_RELEASED_APPLE; + + return intel_buffer_purgeable(intel->mt->region->bo); +} + +static GLenum +intel_buffer_unpurgeable(drm_intel_bo *buffer) +{ + int retained; + + retained = 0; + if (buffer != NULL) + retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED); + + return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE; +} + +static GLenum +intel_buffer_object_unpurgeable(struct gl_context * ctx, + struct gl_buffer_object *obj, + GLenum option) +{ + (void) ctx; + (void) option; + + return intel_buffer_unpurgeable(intel_buffer_object (obj)->buffer); +} + +static GLenum +intel_texture_object_unpurgeable(struct gl_context * ctx, + struct gl_texture_object *obj, + GLenum option) +{ + struct intel_texture_object *intel; + + (void) ctx; + (void) option; + + intel = intel_texture_object(obj); + if (intel->mt == NULL || intel->mt->region == NULL) + return GL_UNDEFINED_APPLE; + + return intel_buffer_unpurgeable(intel->mt->region->bo); +} + +static GLenum +intel_render_object_unpurgeable(struct gl_context * ctx, + struct gl_renderbuffer *obj, + GLenum option) +{ + struct intel_renderbuffer *intel; + + (void) ctx; + (void) option; + + intel = intel_renderbuffer(obj); + if (intel->mt == NULL) + return GL_UNDEFINED_APPLE; + + return intel_buffer_unpurgeable(intel->mt->region->bo); +} + +void +intelInitBufferObjectFuncs(struct dd_function_table *functions) +{ + functions->NewBufferObject = intel_bufferobj_alloc; + functions->DeleteBuffer = intel_bufferobj_free; + functions->BufferData = intel_bufferobj_data; + functions->BufferSubData = intel_bufferobj_subdata; + functions->GetBufferSubData = intel_bufferobj_get_subdata; + functions->MapBufferRange = intel_bufferobj_map_range; + functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; + functions->UnmapBuffer = intel_bufferobj_unmap; + functions->CopyBufferSubData = intel_bufferobj_copy_subdata; + + functions->BufferObjectPurgeable = intel_buffer_object_purgeable; + functions->TextureObjectPurgeable = intel_texture_object_purgeable; + functions->RenderObjectPurgeable = intel_render_object_purgeable; + + functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable; + functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable; + functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable; +} diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.h b/src/mesa/drivers/dri/i965/intel_buffer_objects.h new file mode 100644 index 00000000000..92a4121ce50 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BUFFEROBJ_H +#define INTEL_BUFFEROBJ_H + +#include "main/mtypes.h" + +struct intel_context; +struct gl_buffer_object; + + +/** + * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object. + */ +struct intel_buffer_object +{ + struct gl_buffer_object Base; + drm_intel_bo *buffer; /* the low-level buffer manager's buffer handle */ + GLuint offset; /* any offset into that buffer */ + + /** System memory buffer data, if not using a BO to store the data. */ + void *sys_buffer; + + drm_intel_bo *range_map_bo; + void *range_map_buffer; + unsigned int range_map_offset; + GLsizei range_map_size; + + bool source; +}; + + +/* Get the bm buffer associated with a GL bufferobject: + */ +drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint flag); +drm_intel_bo *intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint align, + GLuint *offset); + +void intel_upload_data(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset); + +void *intel_upload_map(struct intel_context *intel, + GLuint size, GLuint align); +void intel_upload_unmap(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset); + +void intel_upload_finish(struct intel_context *intel); + +/* Hook the bufferobject implementation into mesa: + */ +void intelInitBufferObjectFuncs(struct dd_function_table *functions); + +static inline struct intel_buffer_object * +intel_buffer_object(struct gl_buffer_object *obj) +{ + return (struct intel_buffer_object *) obj; +} + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c index c86daa49f47..fdad480cb39 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_buffers.c +++ b/src/mesa/drivers/dri/i965/intel_buffers.c @@ -1 +1,118 @@ -../intel/intel_buffers.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_context.h" +#include "intel_buffers.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" + +/** + * Return pointer to current color reading region, or NULL. + */ +struct intel_region * +intel_readbuf_region(struct intel_context *intel) +{ + struct intel_renderbuffer *irb + = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); + if (irb && irb->mt) + return irb->mt->region; + else + return NULL; +} + +/** + * Check if we're about to draw into the front color buffer. + * If so, set the intel->front_buffer_dirty field to true. + */ +void +intel_check_front_buffer_rendering(struct intel_context *intel) +{ + const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; + if (_mesa_is_winsys_fbo(fb)) { + /* drawing to window system buffer */ + if (fb->_NumColorDrawBuffers > 0) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + intel->front_buffer_dirty = true; + } + } + } +} + +static void +intelDrawBuffer(struct gl_context * ctx, GLenum mode) +{ + if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + struct intel_context *const intel = intel_context(ctx); + const bool was_front_buffer_rendering = + intel->is_front_buffer_rendering; + + intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK); + + /* If we weren't front-buffer rendering before but we are now, + * invalidate our DRI drawable so we'll ask for new buffers + * (including the fake front) before we start rendering again. + */ + if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) + dri2InvalidateDrawable(intel->driContext->driDrawablePriv); + } + + intel_draw_buffer(ctx); +} + + +static void +intelReadBuffer(struct gl_context * ctx, GLenum mode) +{ + if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + struct intel_context *const intel = intel_context(ctx); + const bool was_front_buffer_reading = + intel->is_front_buffer_reading; + + intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer reading before but we are now, + * invalidate our DRI drawable so we'll ask for new buffers + * (including the fake front) before we start reading again. + */ + if (!was_front_buffer_reading && intel->is_front_buffer_reading) + dri2InvalidateDrawable(intel->driContext->driReadablePriv); + } +} + + +void +intelInitBufferFuncs(struct dd_function_table *functions) +{ + functions->DrawBuffer = intelDrawBuffer; + functions->ReadBuffer = intelReadBuffer; +} diff --git a/src/mesa/drivers/dri/i965/intel_buffers.h b/src/mesa/drivers/dri/i965/intel_buffers.h new file mode 100644 index 00000000000..4e3d13087fe --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_buffers.h @@ -0,0 +1,56 @@ + +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BUFFERS_H +#define INTEL_BUFFERS_H + +#include "dri_util.h" +#include "drm.h" +#include "intel_context.h" + +struct intel_context; +struct intel_framebuffer; + +extern struct intel_region *intel_readbuf_region(struct intel_context *intel); + +extern void intel_check_front_buffer_rendering(struct intel_context *intel); + +static inline void +intel_draw_buffer(struct gl_context * ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel->vtbl.update_draw_buffer(intel); +} + +extern void intelInitBufferFuncs(struct dd_function_table *functions); +#ifdef I915 +void intelCalcViewport(struct gl_context * ctx); +#endif + +#endif /* INTEL_BUFFERS_H */ diff --git a/src/mesa/drivers/dri/i965/intel_chipset.h b/src/mesa/drivers/dri/i965/intel_chipset.h new file mode 100644 index 00000000000..1e98cf4215d --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_chipset.h @@ -0,0 +1,314 @@ + /* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ + +#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ +#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 +#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ +#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 +#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ +#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a + +#define PCI_CHIP_BAYTRAIL_M_1 0x0F31 +#define PCI_CHIP_BAYTRAIL_M_2 0x0F32 +#define PCI_CHIP_BAYTRAIL_M_3 0x0F33 +#define PCI_CHIP_BAYTRAIL_M_4 0x0157 +#define PCI_CHIP_BAYTRAIL_D 0x0155 + +#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ +#define PCI_CHIP_HASWELL_GT2 0x0412 +#define PCI_CHIP_HASWELL_GT3 0x0422 +#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ +#define PCI_CHIP_HASWELL_M_GT2 0x0416 +#define PCI_CHIP_HASWELL_M_GT3 0x0426 +#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */ +#define PCI_CHIP_HASWELL_S_GT2 0x041A +#define PCI_CHIP_HASWELL_S_GT3 0x042A +#define PCI_CHIP_HASWELL_B_GT1 0x040B /* Reserved */ +#define PCI_CHIP_HASWELL_B_GT2 0x041B +#define PCI_CHIP_HASWELL_B_GT3 0x042B +#define PCI_CHIP_HASWELL_E_GT1 0x040E /* Reserved */ +#define PCI_CHIP_HASWELL_E_GT2 0x041E +#define PCI_CHIP_HASWELL_E_GT3 0x042E +#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */ +#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12 +#define PCI_CHIP_HASWELL_SDV_GT3 0x0C22 +#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */ +#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 +#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0C26 +#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */ +#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A +#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0C2A +#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0C0B /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0C1B +#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0C2B +#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0C0E /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0C1E +#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0C2E +#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ +#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 +#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 +#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ +#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 +#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 +#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ +#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A +#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A +#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B +#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B +#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E +#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E +#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ +#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 +#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 +#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A +#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A +#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B +#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B +#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E +#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM || \ + IS_IGD(devid) || \ + devid == PCI_CHIP_ILM_G) + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +/* Compat macro for intel_decode.c */ +#define IS_IRONLAKE(devid) IS_GEN5(devid) + +#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) + +#define IS_GEN6(devid) (IS_SNB_GT1(devid) || IS_SNB_GT2(devid)) + +#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ + devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_IVYBRIDGE_S_GT1) + +#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ + devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_IVYBRIDGE_S_GT2) + +#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid)) + +#define IS_BAYTRAIL(devid) (devid == PCI_CHIP_BAYTRAIL_M_1 || \ + devid == PCI_CHIP_BAYTRAIL_M_2 || \ + devid == PCI_CHIP_BAYTRAIL_M_3 || \ + devid == PCI_CHIP_BAYTRAIL_M_4 || \ + devid == PCI_CHIP_BAYTRAIL_D) + +#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ + IS_BAYTRAIL(devid) || \ + IS_HASWELL(devid)) + +#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ + devid == PCI_CHIP_HASWELL_M_GT1 || \ + devid == PCI_CHIP_HASWELL_S_GT1 || \ + devid == PCI_CHIP_HASWELL_B_GT1 || \ + devid == PCI_CHIP_HASWELL_E_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_E_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_E_GT1) +#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2 || \ + devid == PCI_CHIP_HASWELL_M_GT2 || \ + devid == PCI_CHIP_HASWELL_S_GT2 || \ + devid == PCI_CHIP_HASWELL_B_GT2 || \ + devid == PCI_CHIP_HASWELL_E_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_E_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_B_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_E_GT2) +#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \ + devid == PCI_CHIP_HASWELL_M_GT3 || \ + devid == PCI_CHIP_HASWELL_S_GT3 || \ + devid == PCI_CHIP_HASWELL_B_GT3 || \ + devid == PCI_CHIP_HASWELL_E_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_E_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_E_GT3) + +#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ + IS_HSW_GT2(devid) || \ + IS_HSW_GT3(devid)) + +#define IS_965(devid) (IS_GEN4(devid) || \ + IS_G4X(devid) || \ + IS_GEN5(devid) || \ + IS_GEN6(devid) || \ + IS_GEN7(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) + +#define IS_GEN3(devid) (IS_915(devid) || \ + IS_945(devid)) + +#define IS_GEN2(devid) (devid == PCI_CHIP_I830_M || \ + devid == PCI_CHIP_845_G || \ + devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I865_G) diff --git a/src/mesa/drivers/dri/i965/intel_clear.h b/src/mesa/drivers/dri/i965/intel_clear.h new file mode 100644 index 00000000000..7fd6b310a9d --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_clear.h @@ -0,0 +1,38 @@ + +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_CLEAR_H +#define INTEL_CLEAR_H + +struct dd_function_table; + +extern void +intelInitClearFuncs(struct dd_function_table *functions); + + +#endif /* INTEL_CLEAR_H */ diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c index 27a1cbb255e..23d8281b4ca 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_context.c +++ b/src/mesa/drivers/dri/i965/intel_context.c @@ -1 +1,1010 @@ -../intel/intel_context.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/extensions.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/imports.h" +#include "main/points.h" +#include "main/renderbuffer.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" +#include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" + +#include "intel_chipset.h" +#include "intel_buffers.h" +#include "intel_tex.h" +#include "intel_batchbuffer.h" +#include "intel_clear.h" +#include "intel_extensions.h" +#include "intel_pixel.h" +#include "intel_regions.h" +#include "intel_buffer_objects.h" +#include "intel_fbo.h" +#include "intel_bufmgr.h" +#include "intel_screen.h" +#include "intel_mipmap_tree.h" + +#include "utils.h" +#include "../glsl/ralloc.h" + +#ifndef INTEL_DEBUG +int INTEL_DEBUG = (0); +#endif + + +static const GLubyte * +intelGetString(struct gl_context * ctx, GLenum name) +{ + const struct intel_context *const intel = intel_context(ctx); + const char *chipset; + static char buffer[128]; + + switch (name) { + case GL_VENDOR: + return (GLubyte *) "Intel Open Source Technology Center"; + break; + + case GL_RENDERER: + switch (intel->intelScreen->deviceID) { +#undef CHIPSET +#define CHIPSET(id, symbol, str) case id: chipset = str; break; +#include "pci_ids/i915_pci_ids.h" +#include "pci_ids/i965_pci_ids.h" + default: + chipset = "Unknown Intel Chipset"; + break; + } + + (void) driGetRendererString(buffer, chipset, 0); + return (GLubyte *) buffer; + + default: + return NULL; + } +} + +void +intel_resolve_for_dri2_flush(struct intel_context *intel, + __DRIdrawable *drawable) +{ + if (intel->gen < 6) { + /* MSAA and fast color clear are not supported, so don't waste time + * checking whether a resolve is needed. + */ + return; + } + + struct gl_framebuffer *fb = drawable->driverPrivate; + struct intel_renderbuffer *rb; + + /* Usually, only the back buffer will need to be downsampled. However, + * the front buffer will also need it if the user has rendered into it. + */ + static const gl_buffer_index buffers[2] = { + BUFFER_BACK_LEFT, + BUFFER_FRONT_LEFT, + }; + + for (int i = 0; i < 2; ++i) { + rb = intel_get_renderbuffer(fb, buffers[i]); + if (rb == NULL || rb->mt == NULL) + continue; + if (rb->mt->num_samples <= 1) + intel_miptree_resolve_color(intel, rb->mt); + else + intel_miptree_downsample(intel, rb->mt); + } +} + +static void +intel_flush_front(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + __DRIcontext *driContext = intel->driContext; + __DRIdrawable *driDrawable = driContext->driDrawablePriv; + __DRIscreen *const screen = intel->intelScreen->driScrnPriv; + + if (intel->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + if (screen->dri2.loader->flushFrontBuffer != NULL && + driDrawable && + driDrawable->loaderPrivate) { + + /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. + * + * This potentially resolves both front and back buffer. It + * is unnecessary to resolve the back, but harms nothing except + * performance. And no one cares about front-buffer render + * performance. + */ + intel_resolve_for_dri2_flush(intel, driDrawable); + + screen->dri2.loader->flushFrontBuffer(driDrawable, + driDrawable->loaderPrivate); + + /* We set the dirty bit in intel_prepare_render() if we're + * front buffer rendering once we get there. + */ + intel->front_buffer_dirty = false; + } + } +} + +static unsigned +intel_bits_per_pixel(const struct intel_renderbuffer *rb) +{ + return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; +} + +static void +intel_query_dri2_buffers(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer **buffers, + int *count); + +static void +intel_process_dri2_buffer(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer *buffer, + struct intel_renderbuffer *rb, + const char *buffer_name); + +void +intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) +{ + struct gl_framebuffer *fb = drawable->driverPrivate; + struct intel_renderbuffer *rb; + struct intel_context *intel = context->driverPrivate; + __DRIbuffer *buffers = NULL; + int i, count; + const char *region_name; + + /* Set this up front, so that in case our buffers get invalidated + * while we're getting new buffers, we don't clobber the stamp and + * thus ignore the invalidate. */ + drawable->lastStamp = drawable->dri2.stamp; + + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) + fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); + + intel_query_dri2_buffers(intel, drawable, &buffers, &count); + + if (buffers == NULL) + return; + + for (i = 0; i < count; i++) { + switch (buffers[i].attachment) { + case __DRI_BUFFER_FRONT_LEFT: + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + region_name = "dri2 front buffer"; + break; + + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + region_name = "dri2 fake front buffer"; + break; + + case __DRI_BUFFER_BACK_LEFT: + rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); + region_name = "dri2 back buffer"; + break; + + case __DRI_BUFFER_DEPTH: + case __DRI_BUFFER_HIZ: + case __DRI_BUFFER_DEPTH_STENCIL: + case __DRI_BUFFER_STENCIL: + case __DRI_BUFFER_ACCUM: + default: + fprintf(stderr, + "unhandled buffer attach event, attachment type %d\n", + buffers[i].attachment); + return; + } + + intel_process_dri2_buffer(intel, drawable, &buffers[i], rb, region_name); + } + + driUpdateFramebufferSize(&intel->ctx, drawable); +} + +/** + * intel_prepare_render should be called anywhere that curent read/drawbuffer + * state is required. + */ +void +intel_prepare_render(struct intel_context *intel) +{ + __DRIcontext *driContext = intel->driContext; + __DRIdrawable *drawable; + + drawable = driContext->driDrawablePriv; + if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + intel_update_renderbuffers(driContext, drawable); + intel_draw_buffer(&intel->ctx); + driContext->dri2.draw_stamp = drawable->dri2.stamp; + } + + drawable = driContext->driReadablePriv; + if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + intel_update_renderbuffers(driContext, drawable); + driContext->dri2.read_stamp = drawable->dri2.stamp; + } + + /* If we're currently rendering to the front buffer, the rendering + * that will happen next will probably dirty the front buffer. So + * mark it as dirty here. + */ + if (intel->is_front_buffer_rendering) + intel->front_buffer_dirty = true; + + /* Wait for the swapbuffers before the one we just emitted, so we + * don't get too many swaps outstanding for apps that are GPU-heavy + * but not CPU-heavy. + * + * We're using intelDRI2Flush (called from the loader before + * swapbuffer) and glFlush (for front buffer rendering) as the + * indicator that a frame is done and then throttle when we get + * here as we prepare to render the next frame. At this point for + * round trips for swap/copy and getting new buffers are done and + * we'll spend less time waiting on the GPU. + * + * Unfortunately, we don't have a handle to the batch containing + * the swap, and getting our hands on that doesn't seem worth it, + * so we just us the first batch we emitted after the last swap. + */ + if (intel->need_throttle && intel->first_post_swapbuffers_batch) { + if (!intel->disable_throttling) + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + intel->need_throttle = false; + } +} + +static void +intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + struct intel_context *intel = intel_context(ctx); + __DRIcontext *driContext = intel->driContext; + + if (intel->saved_viewport) + intel->saved_viewport(ctx, x, y, w, h); + + if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { + dri2InvalidateDrawable(driContext->driDrawablePriv); + dri2InvalidateDrawable(driContext->driReadablePriv); + } +} + +static const struct dri_debug_control debug_control[] = { + { "tex", DEBUG_TEXTURE}, + { "state", DEBUG_STATE}, + { "ioctl", DEBUG_IOCTL}, + { "blit", DEBUG_BLIT}, + { "mip", DEBUG_MIPTREE}, + { "fall", DEBUG_PERF}, + { "perf", DEBUG_PERF}, + { "bat", DEBUG_BATCH}, + { "pix", DEBUG_PIXEL}, + { "buf", DEBUG_BUFMGR}, + { "reg", DEBUG_REGION}, + { "fbo", DEBUG_FBO}, + { "fs", DEBUG_WM }, + { "gs", DEBUG_GS}, + { "sync", DEBUG_SYNC}, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "dri", DEBUG_DRI }, + { "sf", DEBUG_SF }, + { "stats", DEBUG_STATS }, + { "wm", DEBUG_WM }, + { "urb", DEBUG_URB }, + { "vs", DEBUG_VS }, + { "clip", DEBUG_CLIP }, + { "aub", DEBUG_AUB }, + { "shader_time", DEBUG_SHADER_TIME }, + { "no16", DEBUG_NO16 }, + { "blorp", DEBUG_BLORP }, + { NULL, 0 } +}; + + +static void +intelInvalidateState(struct gl_context * ctx, GLuint new_state) +{ + struct intel_context *intel = intel_context(ctx); + + if (ctx->swrast_context) + _swrast_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + + intel->NewGLState |= new_state; + + if (intel->vtbl.invalidate_state) + intel->vtbl.invalidate_state( intel, new_state ); +} + +void +intel_flush_rendering_to_batch(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + + if (intel->Fallback) + _swrast_flush(ctx); + + if (intel->gen < 4) + INTEL_FIREVERTICES(intel); +} + +void +_intel_flush(struct gl_context *ctx, const char *file, int line) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush_rendering_to_batch(ctx); + + if (intel->batch.used) + _intel_batchbuffer_flush(intel, file, line); +} + +static void +intel_glFlush(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush(ctx); + intel_flush_front(ctx); + if (intel->is_front_buffer_rendering) + intel->need_throttle = true; +} + +void +intelFinish(struct gl_context * ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush(ctx); + intel_flush_front(ctx); + + if (intel->batch.last_bo) + drm_intel_bo_wait_rendering(intel->batch.last_bo); +} + +void +intelInitDriverFunctions(struct dd_function_table *functions) +{ + _mesa_init_driver_functions(functions); + + functions->Flush = intel_glFlush; + functions->Finish = intelFinish; + functions->GetString = intelGetString; + functions->UpdateState = intelInvalidateState; + + intelInitTextureFuncs(functions); + intelInitTextureImageFuncs(functions); + intelInitTextureSubImageFuncs(functions); + intelInitTextureCopyImageFuncs(functions); + intelInitClearFuncs(functions); + intelInitBufferFuncs(functions); + intelInitPixelFuncs(functions); + intelInitBufferObjectFuncs(functions); + intel_init_syncobj_functions(functions); +} + +static bool +validate_context_version(struct intel_screen *screen, + int mesa_api, + unsigned major_version, + unsigned minor_version, + unsigned *dri_ctx_error) +{ + unsigned req_version = 10 * major_version + minor_version; + unsigned max_version = 0; + + switch (mesa_api) { + case API_OPENGL_COMPAT: + max_version = screen->max_gl_compat_version; + break; + case API_OPENGL_CORE: + max_version = screen->max_gl_core_version; + break; + case API_OPENGLES: + max_version = screen->max_gl_es1_version; + break; + case API_OPENGLES2: + max_version = screen->max_gl_es2_version; + break; + default: + max_version = 0; + break; + } + + if (max_version == 0) { + *dri_ctx_error = __DRI_CTX_ERROR_BAD_API; + return false; + } else if (req_version > max_version) { + *dri_ctx_error = __DRI_CTX_ERROR_BAD_VERSION; + return false; + } + + return true; +} + +bool +intelInitContext(struct intel_context *intel, + int api, + unsigned major_version, + unsigned minor_version, + const struct gl_config * mesaVis, + __DRIcontext * driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions, + unsigned *dri_ctx_error) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = sPriv->driverPrivate; + int bo_reuse_mode; + struct gl_config visual; + + /* we can't do anything without a connection to the device */ + if (intelScreen->bufmgr == NULL) { + *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; + return false; + } + + if (!validate_context_version(intelScreen, + api, major_version, minor_version, + dri_ctx_error)) + return false; + + /* Can't rely on invalidate events, fall back to glViewport hack */ + if (!driContextPriv->driScreenPriv->dri2.useInvalidate) { + intel->saved_viewport = functions->Viewport; + functions->Viewport = intel_viewport; + } + + if (mesaVis == NULL) { + memset(&visual, 0, sizeof visual); + mesaVis = &visual; + } + + intel->intelScreen = intelScreen; + + if (!_mesa_initialize_context(&intel->ctx, api, mesaVis, shareCtx, + functions)) { + *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; + printf("%s: failed to init mesa context\n", __FUNCTION__); + return false; + } + + driContextPriv->driverPrivate = intel; + intel->driContext = driContextPriv; + intel->driFd = sPriv->fd; + + intel->gen = intelScreen->gen; + + const int devID = intelScreen->deviceID; + if (IS_SNB_GT1(devID) || IS_IVB_GT1(devID) || IS_HSW_GT1(devID)) + intel->gt = 1; + else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID)) + intel->gt = 2; + else if (IS_HSW_GT3(devID)) + intel->gt = 3; + else + intel->gt = 0; + + if (IS_HASWELL(devID)) { + intel->is_haswell = true; + } else if (IS_BAYTRAIL(devID)) { + intel->is_baytrail = true; + intel->gt = 1; + } else if (IS_G4X(devID)) { + intel->is_g4x = true; + } else if (IS_945(devID)) { + intel->is_945 = true; + } + + if (intel->gen >= 5) { + intel->needs_ff_sync = true; + } + + intel->has_separate_stencil = intel->intelScreen->hw_has_separate_stencil; + intel->must_use_separate_stencil = intel->intelScreen->hw_must_use_separate_stencil; + intel->has_hiz = intel->gen >= 6; + intel->has_llc = intel->intelScreen->hw_has_llc; + intel->has_swizzling = intel->intelScreen->hw_has_swizzling; + + memset(&ctx->TextureFormatSupported, + 0, sizeof(ctx->TextureFormatSupported)); + + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, + sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915"); + if (intel->gen < 4) + intel->maxBatchSize = 4096; + else + intel->maxBatchSize = BATCH_SZ; + + /* Estimate the size of the mappable aperture into the GTT. There's an + * ioctl to get the whole GTT size, but not one to get the mappable subset. + * It turns out it's basically always 256MB, though some ancient hardware + * was smaller. + */ + uint32_t gtt_size = 256 * 1024 * 1024; + if (intel->gen == 2) + gtt_size = 128 * 1024 * 1024; + + /* We don't want to map two objects such that a memcpy between them would + * just fault one mapping in and then the other over and over forever. So + * we would need to divide the GTT size by 2. Additionally, some GTT is + * taken up by things like the framebuffer and the ringbuffer and such, so + * be more conservative. + */ + intel->max_gtt_map_object_size = gtt_size / 4; + + intel->bufmgr = intelScreen->bufmgr; + + bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); + switch (bo_reuse_mode) { + case DRI_CONF_BO_REUSE_DISABLED: + break; + case DRI_CONF_BO_REUSE_ALL: + intel_bufmgr_gem_enable_reuse(intel->bufmgr); + break; + } + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 5.0; + ctx->Const.MaxLineWidthAA = 5.0; + ctx->Const.LineWidthGranularity = 0.5; + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = 255.0; + ctx->Const.MaxPointSizeAA = 3.0; + ctx->Const.PointSizeGranularity = 1.0; + + if (intel->gen >= 6) + ctx->Const.MaxClipPlanes = 8; + + ctx->Const.StripTextureBorder = GL_TRUE; + + /* reinitialize the context point state. + * It depend on constants in __struct gl_contextRec::Const + */ + _mesa_init_point(ctx); + + if (intel->gen >= 4) { + ctx->Const.MaxRenderbufferSize = 8192; + } else { + ctx->Const.MaxRenderbufferSize = 2048; + } + + /* Initialize the software rasterizer and helper modules. + * + * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for + * software fallbacks (which we have to support on legacy GL to do weird + * glDrawPixels(), glBitmap(), and other functions). + */ + if (intel->gen <= 3 || api != API_OPENGL_CORE) { + _swrast_CreateContext(ctx); + } + + _vbo_CreateContext(ctx); + if (ctx->swrast_context) { + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + + /* Configure swrast to match hardware characteristics: */ + _swrast_allow_pixel_fog(ctx, false); + _swrast_allow_vertex_fog(ctx, true); + } + + _mesa_meta_init(ctx); + + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; + intel->hw_stipple = 1; + + intel->RenderIndex = ~0; + + intelInitExtensions(ctx); + + INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); + if (INTEL_DEBUG & DEBUG_BUFMGR) + dri_bufmgr_set_debug(intel->bufmgr, true); + if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && intel->gen < 7) { + fprintf(stderr, + "shader_time debugging requires gen7 (Ivybridge) or better.\n"); + INTEL_DEBUG &= ~DEBUG_SHADER_TIME; + } + if (INTEL_DEBUG & DEBUG_PERF) + intel->perf_debug = true; + + if (INTEL_DEBUG & DEBUG_AUB) + drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true); + + intel_batchbuffer_init(intel); + + intel_fbo_init(intel); + + intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); + + if (!driQueryOptionb(&intel->optionCache, "hiz")) { + intel->has_hiz = false; + /* On gen6, you can only do separate stencil with HIZ. */ + if (intel->gen == 6) + intel->has_separate_stencil = false; + } + + intel->prim.primitive = ~0; + + /* Force all software fallbacks */ +#ifdef I915 + if (driQueryOptionb(&intel->optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D rasterization\n"); + intel->no_rast = 1; + } +#endif + + if (driQueryOptionb(&intel->optionCache, "always_flush_batch")) { + fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); + intel->always_flush_batch = 1; + } + + if (driQueryOptionb(&intel->optionCache, "always_flush_cache")) { + fprintf(stderr, "flushing GPU caches before/after each draw call\n"); + intel->always_flush_cache = 1; + } + + if (driQueryOptionb(&intel->optionCache, "disable_throttling")) { + fprintf(stderr, "disabling flush throttling\n"); + intel->disable_throttling = 1; + } + + return true; +} + +void +intelDestroyContext(__DRIcontext * driContextPriv) +{ + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + struct gl_context *ctx = &intel->ctx; + + assert(intel); /* should never be null */ + if (intel) { + INTEL_FIREVERTICES(intel); + + /* Dump a final BMP in case the application doesn't call SwapBuffers */ + if (INTEL_DEBUG & DEBUG_AUB) { + intel_batchbuffer_flush(intel); + aub_dump_bmp(&intel->ctx); + } + + _mesa_meta_free(&intel->ctx); + + intel->vtbl.destroy(intel); + + if (ctx->swrast_context) { + _swsetup_DestroyContext(&intel->ctx); + _tnl_DestroyContext(&intel->ctx); + } + _vbo_DestroyContext(&intel->ctx); + + if (ctx->swrast_context) + _swrast_DestroyContext(&intel->ctx); + intel->Fallback = 0x0; /* don't call _swrast_Flush later */ + + intel_batchbuffer_free(intel); + + free(intel->prim.vb); + intel->prim.vb = NULL; + drm_intel_bo_unreference(intel->prim.vb_bo); + intel->prim.vb_bo = NULL; + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + + driDestroyOptionCache(&intel->optionCache); + + /* free the Mesa context */ + _mesa_free_context_data(&intel->ctx); + + _math_matrix_dtr(&intel->ViewportMatrix); + + ralloc_free(intel); + driContextPriv->driverPrivate = NULL; + } +} + +GLboolean +intelUnbindContext(__DRIcontext * driContextPriv) +{ + /* Unset current context and dispath table */ + _mesa_make_current(NULL, NULL, NULL); + + return true; +} + +/** + * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior + * on window system framebuffers. + * + * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if + * your renderbuffer can do sRGB encode, and you can flip a switch that does + * sRGB encode if the renderbuffer can handle it. You can ask specifically + * for a visual where you're guaranteed to be capable, but it turns out that + * everyone just makes all their ARGB8888 visuals capable and doesn't offer + * incapable ones, becuase there's no difference between the two in resources + * used. Applications thus get built that accidentally rely on the default + * visual choice being sRGB, so we make ours sRGB capable. Everything sounds + * great... + * + * But for GLES2/3, they decided that it was silly to not turn on sRGB encode + * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. + * So they removed the enable knob and made it "if the renderbuffer is sRGB + * capable, do sRGB encode". Then, for your window system renderbuffers, you + * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals + * and get no sRGB encode (assuming that both kinds of visual are available). + * Thus our choice to support sRGB by default on our visuals for desktop would + * result in broken rendering of GLES apps that aren't expecting sRGB encode. + * + * Unfortunately, renderbuffer setup happens before a context is created. So + * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 + * context (without an sRGB visual, though we don't have sRGB visuals exposed + * yet), we go turn that back off before anyone finds out. + */ +static void +intel_gles3_srgb_workaround(struct intel_context *intel, + struct gl_framebuffer *fb) +{ + struct gl_context *ctx = &intel->ctx; + + if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) + return; + + /* Some day when we support the sRGB capable bit on visuals available for + * GLES, we'll need to respect that and not disable things here. + */ + fb->Visual.sRGBCapable = false; + for (int i = 0; i < BUFFER_COUNT; i++) { + if (fb->Attachment[i].Renderbuffer && + fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) { + fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888; + } + } +} + +GLboolean +intelMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) +{ + struct intel_context *intel; + GET_CURRENT_CONTEXT(curCtx); + + if (driContextPriv) + intel = (struct intel_context *) driContextPriv->driverPrivate; + else + intel = NULL; + + /* According to the glXMakeCurrent() man page: "Pending commands to + * the previous context, if any, are flushed before it is released." + * But only flush if we're actually changing contexts. + */ + if (intel_context(curCtx) && intel_context(curCtx) != intel) { + _mesa_flush(curCtx); + } + + if (driContextPriv) { + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb, *readFb; + + if (driDrawPriv == NULL && driReadPriv == NULL) { + fb = _mesa_get_incomplete_framebuffer(); + readFb = _mesa_get_incomplete_framebuffer(); + } else { + fb = driDrawPriv->driverPrivate; + readFb = driReadPriv->driverPrivate; + driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; + driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; + } + + intel_prepare_render(intel); + _mesa_make_current(ctx, fb, readFb); + + intel_gles3_srgb_workaround(intel, ctx->WinSysDrawBuffer); + intel_gles3_srgb_workaround(intel, ctx->WinSysReadBuffer); + + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer + * is NULL at that point. We can't call _mesa_makecurrent() + * first, since we need the buffer size for the initial + * viewport. So just call intel_draw_buffer() again here. */ + intel_draw_buffer(ctx); + } + else { + _mesa_make_current(NULL, NULL, NULL); + } + + return true; +} + +/** + * \brief Query DRI2 to obtain a DRIdrawable's buffers. + * + * To determine which DRI buffers to request, examine the renderbuffers + * attached to the drawable's framebuffer. Then request the buffers with + * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). + * + * This is called from intel_update_renderbuffers(). + * + * \param drawable Drawable whose buffers are queried. + * \param buffers [out] List of buffers returned by DRI2 query. + * \param buffer_count [out] Number of buffers returned. + * + * \see intel_update_renderbuffers() + * \see DRI2GetBuffers() + * \see DRI2GetBuffersWithFormat() + */ +static void +intel_query_dri2_buffers(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer **buffers, + int *buffer_count) +{ + __DRIscreen *screen = intel->intelScreen->driScrnPriv; + struct gl_framebuffer *fb = drawable->driverPrivate; + int i = 0; + unsigned attachments[8]; + + struct intel_renderbuffer *front_rb; + struct intel_renderbuffer *back_rb; + + front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); + + memset(attachments, 0, sizeof(attachments)); + if ((intel->is_front_buffer_rendering || + intel->is_front_buffer_reading || + !back_rb) && front_rb) { + /* If a fake front buffer is in use, then querying for + * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from + * the real front buffer to the fake front buffer. So before doing the + * query, we need to make sure all the pending drawing has landed in the + * real front buffer. + */ + intel_flush(&intel->ctx); + intel_flush_front(&intel->ctx); + + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + attachments[i++] = intel_bits_per_pixel(front_rb); + } else if (front_rb && intel->front_buffer_dirty) { + /* We have pending front buffer rendering, but we aren't querying for a + * front buffer. If the front buffer we have is a fake front buffer, + * the X server is going to throw it away when it processes the query. + * So before doing the query, make sure all the pending drawing has + * landed in the real front buffer. + */ + intel_flush(&intel->ctx); + intel_flush_front(&intel->ctx); + } + + if (back_rb) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = intel_bits_per_pixel(back_rb); + } + + assert(i <= ARRAY_SIZE(attachments)); + + *buffers = screen->dri2.loader->getBuffersWithFormat(drawable, + &drawable->w, + &drawable->h, + attachments, i / 2, + buffer_count, + drawable->loaderPrivate); +} + +/** + * \brief Assign a DRI buffer's DRM region to a renderbuffer. + * + * This is called from intel_update_renderbuffers(). + * + * \par Note: + * DRI buffers whose attachment point is DRI2BufferStencil or + * DRI2BufferDepthStencil are handled as special cases. + * + * \param buffer_name is a human readable name, such as "dri2 front buffer", + * that is passed to intel_region_alloc_for_handle(). + * + * \see intel_update_renderbuffers() + * \see intel_region_alloc_for_handle() + */ +static void +intel_process_dri2_buffer(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer *buffer, + struct intel_renderbuffer *rb, + const char *buffer_name) +{ + struct intel_region *region = NULL; + + if (!rb) + return; + + unsigned num_samples = rb->Base.Base.NumSamples; + + /* We try to avoid closing and reopening the same BO name, because the first + * use of a mapping of the buffer involves a bunch of page faulting which is + * moderately expensive. + */ + if (num_samples == 0) { + if (rb->mt && + rb->mt->region && + rb->mt->region->name == buffer->name) + return; + } else { + if (rb->mt && + rb->mt->singlesample_mt && + rb->mt->singlesample_mt->region && + rb->mt->singlesample_mt->region->name == buffer->name) + return; + } + + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { + fprintf(stderr, + "attaching buffer %d, at %d, cpp %d, pitch %d\n", + buffer->name, buffer->attachment, + buffer->cpp, buffer->pitch); + } + + intel_miptree_release(&rb->mt); + region = intel_region_alloc_for_handle(intel->intelScreen, + buffer->cpp, + drawable->w, + drawable->h, + buffer->pitch, + buffer->name, + buffer_name); + if (!region) + return; + + rb->mt = intel_miptree_create_for_dri2_buffer(intel, + buffer->attachment, + intel_rb_format(rb), + num_samples, + region); + intel_region_release(®ion); +} diff --git a/src/mesa/drivers/dri/i965/intel_context.h b/src/mesa/drivers/dri/i965/intel_context.h new file mode 100644 index 00000000000..84452b941c8 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_context.h @@ -0,0 +1,638 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELCONTEXT_INC +#define INTELCONTEXT_INC + + +#include <stdbool.h> +#include <string.h> +#include "main/mtypes.h" +#include "main/mm.h" + +#ifdef __cplusplus +extern "C" { + /* Evil hack for using libdrm in a c++ compiler. */ + #define virtual virt +#endif + +#include "drm.h" +#include "intel_bufmgr.h" + +#include "intel_screen.h" +#include "intel_tex_obj.h" +#include "i915_drm.h" + +#ifdef __cplusplus + #undef virtual +#endif + +#include "tnl/t_vertex.h" + +#define TAG(x) intel##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +#define DV_PF_555 (1<<8) +#define DV_PF_565 (2<<8) +#define DV_PF_8888 (3<<8) +#define DV_PF_4444 (8<<8) +#define DV_PF_1555 (9<<8) + +struct intel_region; +struct intel_context; + +typedef void (*intel_tri_func) (struct intel_context *, intelVertex *, + intelVertex *, intelVertex *); +typedef void (*intel_line_func) (struct intel_context *, intelVertex *, + intelVertex *); +typedef void (*intel_point_func) (struct intel_context *, intelVertex *); + +/** + * Bits for intel->Fallback field + */ +/*@{*/ +#define INTEL_FALLBACK_DRAW_BUFFER 0x1 +#define INTEL_FALLBACK_READ_BUFFER 0x2 +#define INTEL_FALLBACK_DEPTH_BUFFER 0x4 +#define INTEL_FALLBACK_STENCIL_BUFFER 0x8 +#define INTEL_FALLBACK_USER 0x10 +#define INTEL_FALLBACK_RENDERMODE 0x20 +#define INTEL_FALLBACK_TEXTURE 0x40 +#define INTEL_FALLBACK_DRIVER 0x1000 /**< first for drivers */ +/*@}*/ + +extern void intelFallback(struct intel_context *intel, GLbitfield bit, + bool mode); +#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) + + +#define INTEL_WRITE_PART 0x1 +#define INTEL_WRITE_FULL 0x2 +#define INTEL_READ 0x4 + +#define INTEL_MAX_FIXUP 64 + +#ifndef likely +#ifdef __GNUC__ +#define likely(expr) (__builtin_expect(expr, 1)) +#define unlikely(expr) (__builtin_expect(expr, 0)) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#endif +#endif + +struct intel_sync_object { + struct gl_sync_object Base; + + /** Batch associated with this sync object */ + drm_intel_bo *bo; +}; + +struct brw_context; + +struct intel_batchbuffer { + /** Current batchbuffer being queued up. */ + drm_intel_bo *bo; + /** Last BO submitted to the hardware. Used for glFinish(). */ + drm_intel_bo *last_bo; + /** BO for post-sync nonzero writes for gen6 workaround. */ + drm_intel_bo *workaround_bo; + bool need_workaround_flush; + + struct cached_batch_item *cached_items; + + uint16_t emit, total; + uint16_t used, reserved_space; + uint32_t *map; + uint32_t *cpu_map; +#define BATCH_SZ (8192*sizeof(uint32_t)) + + uint32_t state_batch_offset; + bool is_blit; + bool needs_sol_reset; + + struct { + uint16_t used; + int reloc_count; + } saved; +}; + +/** + * intel_context is derived from Mesa's context class: struct gl_context. + */ +struct intel_context +{ + struct gl_context ctx; /**< base class, must be first field */ + + struct + { + void (*destroy) (struct intel_context * intel); + void (*emit_state) (struct intel_context * intel); + void (*finish_batch) (struct intel_context * intel); + void (*new_batch) (struct intel_context * intel); + void (*emit_invarient_state) (struct intel_context * intel); + void (*update_texture_state) (struct intel_context * intel); + + void (*render_start) (struct intel_context * intel); + void (*render_prevalidate) (struct intel_context * intel); + void (*set_draw_region) (struct intel_context * intel, + struct intel_region * draw_regions[], + struct intel_region * depth_region, + GLuint num_regions); + void (*update_draw_buffer)(struct intel_context *intel); + + void (*reduced_primitive_state) (struct intel_context * intel, + GLenum rprim); + + bool (*check_vertex_size) (struct intel_context * intel, + GLuint expected); + void (*invalidate_state) (struct intel_context *intel, + GLuint new_state); + + void (*assert_not_dirty) (struct intel_context *intel); + + void (*debug_batch)(struct intel_context *intel); + void (*annotate_aub)(struct intel_context *intel); + bool (*render_target_supported)(struct intel_context *intel, + struct gl_renderbuffer *rb); + + /** Can HiZ be enabled on a depthbuffer of the given format? */ + bool (*is_hiz_depth_format)(struct intel_context *intel, + gl_format format); + + /** + * Surface state operations (i965+ only) + * \{ + */ + void (*update_texture_surface)(struct gl_context *ctx, + unsigned unit, + uint32_t *binding_table, + unsigned surf_index); + void (*update_renderbuffer_surface)(struct brw_context *brw, + struct gl_renderbuffer *rb, + bool layered, + unsigned unit); + void (*update_null_renderbuffer_surface)(struct brw_context *brw, + unsigned unit); + void (*create_constant_surface)(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, + uint32_t size, + uint32_t *out_offset, + bool dword_pitch); + /** \} */ + + /** + * Send the appropriate state packets to configure depth, stencil, and + * HiZ buffers (i965+ only) + */ + void (*emit_depth_stencil_hiz)(struct brw_context *brw, + struct intel_mipmap_tree *depth_mt, + uint32_t depth_offset, + uint32_t depthbuffer_format, + uint32_t depth_surface_type, + struct intel_mipmap_tree *stencil_mt, + bool hiz, bool separate_stencil, + uint32_t width, uint32_t height, + uint32_t tile_x, uint32_t tile_y); + + } vtbl; + + GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ + GLuint NewGLState; + + dri_bufmgr *bufmgr; + unsigned int maxBatchSize; + + /** + * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965. + */ + int gen; + int gt; + bool needs_ff_sync; + bool is_haswell; + bool is_baytrail; + bool is_g4x; + bool is_945; + bool has_separate_stencil; + bool must_use_separate_stencil; + bool has_hiz; + bool has_llc; + bool has_swizzling; + + int urb_size; + + drm_intel_context *hw_ctx; + + struct intel_batchbuffer batch; + + drm_intel_bo *first_post_swapbuffers_batch; + bool need_throttle; + bool no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ + + /** + * Set if we're either a debug context or the INTEL_DEBUG=perf environment + * variable is set, this is the flag indicating to do expensive work that + * might lead to a perf_debug() call. + */ + bool perf_debug; + + struct + { + GLuint id; + uint32_t start_ptr; /**< for i8xx */ + uint32_t primitive; /**< Current hardware primitive type */ + void (*flush) (struct intel_context *); + drm_intel_bo *vb_bo; + uint8_t *vb; + unsigned int start_offset; /**< Byte offset of primitive sequence */ + unsigned int current_offset; /**< Byte offset of next vertex */ + unsigned int count; /**< Number of vertices in current primitive */ + } prim; + + struct { + drm_intel_bo *bo; + GLuint offset; + uint32_t buffer_len; + uint32_t buffer_offset; + char buffer[4096]; + } upload; + + uint32_t max_gtt_map_object_size; + + GLuint stats_wm; + + /* Offsets of fields within the current vertex: + */ + GLuint coloroffset; + GLuint specoffset; + GLuint wpos_offset; + + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + GLuint vertex_attr_count; + + bool hw_stencil; + bool hw_stipple; + bool no_rast; + bool always_flush_batch; + bool always_flush_cache; + bool disable_throttling; + + /* State for intelvb.c and inteltris.c. + */ + GLuint RenderIndex; + GLmatrix ViewportMatrix; + GLenum render_primitive; + GLenum reduced_primitive; /*< Only gen < 6 */ + GLuint vertex_size; + GLubyte *verts; /* points to tnl->clipspace.vertex_buf */ + + /* Fallback rasterization functions + */ + intel_point_func draw_point; + intel_line_func draw_line; + intel_tri_func draw_tri; + + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + bool front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + bool is_front_buffer_rendering; + /** + * Track whether front-buffer is the current read target. + * + * This is closely associated with is_front_buffer_rendering, but may + * be set separately. The DRI2 fake front buffer must be referenced + * either way. + */ + bool is_front_buffer_reading; + + bool use_early_z; + + int driFd; + + __DRIcontext *driContext; + struct intel_screen *intelScreen; + void (*saved_viewport)(struct gl_context * ctx, + GLint x, GLint y, GLsizei width, GLsizei height); + + /** + * Configuration cache + */ + driOptionCache optionCache; +}; + +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +/** + * Align a value down to an alignment value + * + * If \c value is not already aligned to the requested alignment value, it + * will be rounded down. + * + * \param value Value to be rounded + * \param alignment Alignment value to be used. This must be a power of two. + * + * \sa ALIGN() + */ +#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1)) + +static INLINE uint32_t +U_FIXED(float value, uint32_t frac_bits) +{ + value *= (1 << frac_bits); + return value < 0 ? 0 : value; +} + +static INLINE uint32_t +S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 << frac_bits); +} + +#define INTEL_FIREVERTICES(intel) \ +do { \ + if ((intel)->prim.flush) \ + (intel)->prim.flush(intel); \ +} while (0) + +/* ================================================================ + * From linux kernel i386 header files, copes with odd sizes better + * than COPY_DWORDS would: + * XXX Put this in src/mesa/main/imports.h ??? + */ +#if defined(i386) || defined(__i386__) +static INLINE void * __memcpy(void * to, const void * from, size_t n) +{ + int d0, d1, d2; + __asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); + return (to); +} +#else +#define __memcpy(a,b,c) memcpy(a,b,c) +#endif + + +/* ================================================================ + * Debugging: + */ +extern int INTEL_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BLIT 0x8 +#define DEBUG_MIPTREE 0x10 +#define DEBUG_PERF 0x20 +#define DEBUG_BATCH 0x80 +#define DEBUG_PIXEL 0x100 +#define DEBUG_BUFMGR 0x200 +#define DEBUG_REGION 0x400 +#define DEBUG_FBO 0x800 +#define DEBUG_GS 0x1000 +#define DEBUG_SYNC 0x2000 +#define DEBUG_PRIMS 0x4000 +#define DEBUG_VERTS 0x8000 +#define DEBUG_DRI 0x10000 +#define DEBUG_SF 0x20000 +#define DEBUG_STATS 0x100000 +#define DEBUG_WM 0x400000 +#define DEBUG_URB 0x800000 +#define DEBUG_VS 0x1000000 +#define DEBUG_CLIP 0x2000000 +#define DEBUG_AUB 0x4000000 +#define DEBUG_SHADER_TIME 0x8000000 +#define DEBUG_BLORP 0x10000000 +#define DEBUG_NO16 0x20000000 + +#ifdef HAVE_ANDROID_PLATFORM +#define LOG_TAG "INTEL-MESA" +#include <cutils/log.h> +#ifndef ALOGW +#define ALOGW LOGW +#endif +#define dbg_printf(...) ALOGW(__VA_ARGS__) +#else +#define dbg_printf(...) printf(__VA_ARGS__) +#endif /* HAVE_ANDROID_PLATFORM */ + +#define DBG(...) do { \ + if (unlikely(INTEL_DEBUG & FILE_DEBUG_FLAG)) \ + dbg_printf(__VA_ARGS__); \ +} while(0) + +#define perf_debug(...) do { \ + static GLuint msg_id = 0; \ + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) \ + dbg_printf(__VA_ARGS__); \ + if (intel->perf_debug) \ + _mesa_gl_debug(&intel->ctx, &msg_id, \ + MESA_DEBUG_TYPE_PERFORMANCE, \ + MESA_DEBUG_SEVERITY_MEDIUM, \ + __VA_ARGS__); \ +} while(0) + +#define WARN_ONCE(cond, fmt...) do { \ + if (unlikely(cond)) { \ + static bool _warned = false; \ + static GLuint msg_id = 0; \ + if (!_warned) { \ + fprintf(stderr, "WARNING: "); \ + fprintf(stderr, fmt); \ + _warned = true; \ + \ + _mesa_gl_debug(ctx, &msg_id, \ + MESA_DEBUG_TYPE_OTHER, \ + MESA_DEBUG_SEVERITY_HIGH, fmt); \ + } \ + } \ +} while (0) + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +/* ================================================================ + * intel_context.c: + */ + +extern bool intelInitContext(struct intel_context *intel, + int api, + unsigned major_version, + unsigned minor_version, + const struct gl_config * mesaVis, + __DRIcontext * driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions, + unsigned *dri_ctx_error); + +extern void intelFinish(struct gl_context * ctx); +extern void intel_flush_rendering_to_batch(struct gl_context *ctx); +extern void _intel_flush(struct gl_context * ctx, const char *file, int line); + +#define intel_flush(ctx) _intel_flush(ctx, __FILE__, __LINE__) + +extern void intelInitDriverFunctions(struct dd_function_table *functions); + +void intel_init_syncobj_functions(struct dd_function_table *functions); + + +/* ================================================================ + * intel_state.c: + */ + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +enum { + DRI_CONF_BO_REUSE_DISABLED, + DRI_CONF_BO_REUSE_ALL +}; + +extern int intel_translate_shadow_compare_func(GLenum func); +extern int intel_translate_compare_func(GLenum func); +extern int intel_translate_stencil_op(GLenum op); +extern int intel_translate_blend_factor(GLenum factor); +extern int intel_translate_logic_op(GLenum opcode); + +void intel_update_renderbuffers(__DRIcontext *context, + __DRIdrawable *drawable); +void intel_prepare_render(struct intel_context *intel); + +void +intel_resolve_for_dri2_flush(struct intel_context *intel, + __DRIdrawable *drawable); + +void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id); +void intel_init_texture_formats(struct gl_context *ctx); + +/*====================================================================== + * Inline conversion functions. + * These are better-typed than the macros used previously: + */ +static INLINE struct intel_context * +intel_context(struct gl_context * ctx) +{ + return (struct intel_context *) ctx; +} + +static INLINE bool +is_power_of_two(uint32_t value) +{ + return (value & (value - 1)) == 0; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index a2f3e8cd208..5cb2fa38f33 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -1 +1,188 @@ -../intel/intel_extensions.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/version.h" + +#include "intel_chipset.h" +#include "intel_context.h" +#include "intel_extensions.h" +#include "intel_reg.h" +#include "utils.h" + +/** + * Initializes potential list of extensions if ctx == NULL, or actually enables + * extensions for a context. + */ +void +intelInitExtensions(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + + ctx->Extensions.ARB_draw_elements_base_vertex = true; + ctx->Extensions.ARB_explicit_attrib_location = true; + ctx->Extensions.ARB_framebuffer_object = true; + ctx->Extensions.ARB_half_float_pixel = true; + ctx->Extensions.ARB_internalformat_query = true; + ctx->Extensions.ARB_map_buffer_range = true; + ctx->Extensions.ARB_point_sprite = true; + ctx->Extensions.ARB_shader_objects = true; + ctx->Extensions.ARB_shading_language_100 = true; + ctx->Extensions.ARB_sync = true; + ctx->Extensions.ARB_texture_border_clamp = true; + ctx->Extensions.ARB_texture_cube_map = true; + ctx->Extensions.ARB_texture_env_combine = true; + ctx->Extensions.ARB_texture_env_crossbar = true; + ctx->Extensions.ARB_texture_env_dot3 = true; + ctx->Extensions.ARB_texture_storage = true; + ctx->Extensions.ARB_vertex_program = true; + ctx->Extensions.ARB_vertex_shader = true; + ctx->Extensions.EXT_blend_color = true; + ctx->Extensions.EXT_blend_equation_separate = true; + ctx->Extensions.EXT_blend_func_separate = true; + ctx->Extensions.EXT_blend_minmax = true; + ctx->Extensions.EXT_framebuffer_blit = true; + ctx->Extensions.EXT_framebuffer_object = true; + ctx->Extensions.EXT_fog_coord = true; + ctx->Extensions.EXT_gpu_program_parameters = true; + ctx->Extensions.EXT_packed_depth_stencil = true; + ctx->Extensions.EXT_pixel_buffer_object = true; + ctx->Extensions.EXT_point_parameters = true; + ctx->Extensions.EXT_provoking_vertex = true; + ctx->Extensions.EXT_secondary_color = true; + ctx->Extensions.EXT_separate_shader_objects = true; + ctx->Extensions.EXT_texture_env_dot3 = true; + ctx->Extensions.EXT_texture_filter_anisotropic = true; + ctx->Extensions.APPLE_object_purgeable = true; + ctx->Extensions.MESA_pack_invert = true; + ctx->Extensions.MESA_ycbcr_texture = true; + ctx->Extensions.NV_blend_square = true; + ctx->Extensions.NV_texture_rectangle = true; + ctx->Extensions.TDFX_texture_compression_FXT1 = true; + ctx->Extensions.OES_EGL_image = true; + ctx->Extensions.OES_draw_texture = true; + + if (intel->gen >= 6) + ctx->Const.GLSLVersion = 140; + else + ctx->Const.GLSLVersion = 120; + _mesa_override_glsl_version(ctx); + + if (intel->gen >= 6) { + ctx->Extensions.EXT_framebuffer_multisample = true; + ctx->Extensions.EXT_transform_feedback = true; + ctx->Extensions.ARB_blend_func_extended = !driQueryOptionb(&intel->optionCache, "disable_blend_func_extended"); + ctx->Extensions.ARB_draw_buffers_blend = true; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_uniform_buffer_object = true; + ctx->Extensions.ARB_texture_buffer_object = true; + ctx->Extensions.ARB_texture_buffer_object_rgb32 = true; + ctx->Extensions.ARB_texture_cube_map_array = true; + ctx->Extensions.OES_depth_texture_cube_map = true; + ctx->Extensions.ARB_shading_language_packing = true; + ctx->Extensions.ARB_texture_multisample = true; + ctx->Extensions.ARB_texture_storage_multisample = true; + } + + if (intel->gen >= 5) { + ctx->Extensions.ARB_texture_query_lod = true; + ctx->Extensions.EXT_timer_query = true; + } + + if (intel->gen >= 6) { + uint64_t dummy; + /* Test if the kernel has the ioctl. */ + if (drm_intel_reg_read(intel->bufmgr, TIMESTAMP, &dummy) == 0) + ctx->Extensions.ARB_timer_query = true; + } + + if (intel->gen >= 4) { + if (ctx->API == API_OPENGL_CORE) + ctx->Extensions.ARB_base_instance = true; + if (ctx->API != API_OPENGL_CORE) + ctx->Extensions.ARB_color_buffer_float = true; + ctx->Extensions.ARB_depth_buffer_float = true; + ctx->Extensions.ARB_depth_clamp = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_instanced_arrays = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.ARB_fragment_program_shadow = true; + ctx->Extensions.ARB_fragment_shader = true; + ctx->Extensions.ARB_half_float_vertex = true; + ctx->Extensions.ARB_occlusion_query = true; + ctx->Extensions.ARB_occlusion_query2 = true; + ctx->Extensions.ARB_point_sprite = true; + ctx->Extensions.ARB_seamless_cube_map = true; + ctx->Extensions.ARB_shader_bit_encoding = true; + ctx->Extensions.ARB_shader_texture_lod = true; + ctx->Extensions.ARB_texture_float = true; + ctx->Extensions.EXT_texture_shared_exponent = true; + ctx->Extensions.EXT_packed_float = true; + ctx->Extensions.ARB_texture_compression_rgtc = true; + ctx->Extensions.ARB_texture_rg = true; + ctx->Extensions.ARB_texture_rgb10_a2ui = true; + ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true; + ctx->Extensions.EXT_draw_buffers2 = true; + ctx->Extensions.EXT_framebuffer_sRGB = true; + ctx->Extensions.EXT_texture_array = true; + ctx->Extensions.EXT_texture_integer = true; + ctx->Extensions.EXT_texture_snorm = true; + ctx->Extensions.EXT_texture_swizzle = true; + ctx->Extensions.EXT_vertex_array_bgra = true; + ctx->Extensions.ATI_envmap_bumpmap = true; + ctx->Extensions.MESA_texture_array = true; + ctx->Extensions.NV_conditional_render = true; + ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true; + ctx->Extensions.OES_standard_derivatives = true; + } + + if (intel->gen >= 3) { + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_depth_texture = true; + ctx->Extensions.ARB_fragment_program = true; + ctx->Extensions.ARB_shadow = true; + ctx->Extensions.ARB_texture_non_power_of_two = true; + ctx->Extensions.EXT_texture_sRGB = true; + ctx->Extensions.EXT_texture_sRGB_decode = true; + ctx->Extensions.EXT_shadow_funcs = true; + ctx->Extensions.EXT_stencil_two_side = true; + ctx->Extensions.ATI_separate_stencil = true; + ctx->Extensions.ATI_texture_env_combine3 = true; + ctx->Extensions.NV_texture_env_combine4 = true; + ctx->Extensions.ARB_fragment_shader = true; + ctx->Extensions.ARB_occlusion_query = true; + } + + if (intel->ctx.Mesa_DXTn + || driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) + ctx->Extensions.EXT_texture_compression_s3tc = true; + + ctx->Extensions.ANGLE_texture_compression_dxt = true; + + if (intel->gen >= 4) { + ctx->Extensions.NV_primitive_restart = true; + } +} diff --git a/src/mesa/drivers/dri/i965/intel_extensions.h b/src/mesa/drivers/dri/i965/intel_extensions.h new file mode 100644 index 00000000000..9991c000108 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_extensions.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_EXTENSIONS_H +#define INTEL_EXTENSIONS_H + + +extern void +intelInitExtensions(struct gl_context *ctx); + +extern void +intelInitExtensionsES1(struct gl_context *ctx); + +extern void +intelInitExtensionsES2(struct gl_context *ctx); + + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index a19f86dcc57..d16523bbbf6 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -1 +1,949 @@ -../intel/intel_fbo.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/enums.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/context.h" +#include "main/teximage.h" +#include "main/image.h" + +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_blit.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_tex.h" +#ifndef I915 +#include "brw_context.h" +#endif + +#define FILE_DEBUG_FLAG DEBUG_FBO + +static struct gl_renderbuffer * +intel_new_renderbuffer(struct gl_context * ctx, GLuint name); + +struct intel_region* +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) +{ + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); + if (irb && irb->mt) { + if (attIndex == BUFFER_STENCIL && irb->mt->stencil_mt) + return irb->mt->stencil_mt->region; + else + return irb->mt->region; + } else + return NULL; +} + +/** + * Create a new framebuffer object. + */ +static struct gl_framebuffer * +intel_new_framebuffer(struct gl_context * ctx, GLuint name) +{ + /* Only drawable state in intel_framebuffer at this time, just use Mesa's + * class + */ + return _mesa_new_framebuffer(ctx, name); +} + + +/** Called by gl_renderbuffer::Delete() */ +static void +intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + ASSERT(irb); + + intel_miptree_release(&irb->mt); + + _mesa_delete_renderbuffer(ctx, rb); +} + +/** + * \see dd_function_table::MapRenderbuffer + */ +static void +intel_map_renderbuffer(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint x, GLuint y, GLuint w, GLuint h, + GLbitfield mode, + GLubyte **out_map, + GLint *out_stride) +{ + struct intel_context *intel = intel_context(ctx); + struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + void *map; + int stride; + + if (srb->Buffer) { + /* this is a malloc'd renderbuffer (accum buffer), not an irb */ + GLint bpp = _mesa_get_format_bytes(rb->Format); + GLint rowStride = srb->RowStride; + *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp; + *out_stride = rowStride; + return; + } + + intel_prepare_render(intel); + + /* For a window-system renderbuffer, we need to flip the mapping we receive + * upside-down. So we need to ask for a rectangle on flipped vertically, and + * we then return a pointer to the bottom of it with a negative stride. + */ + if (rb->Name == 0) { + y = rb->Height - y - h; + } + + intel_miptree_map(intel, irb->mt, irb->mt_level, irb->mt_layer, + x, y, w, h, mode, &map, &stride); + + if (rb->Name == 0) { + map += (h - 1) * stride; + stride = -stride; + } + + DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%d\n", + __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format), + x, y, w, h, map, stride); + + *out_map = map; + *out_stride = stride; +} + +/** + * \see dd_function_table::UnmapRenderbuffer + */ +static void +intel_unmap_renderbuffer(struct gl_context *ctx, + struct gl_renderbuffer *rb) +{ + struct intel_context *intel = intel_context(ctx); + struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + DBG("%s: rb %d (%s)\n", __FUNCTION__, + rb->Name, _mesa_get_format_name(rb->Format)); + + if (srb->Buffer) { + /* this is a malloc'd renderbuffer (accum buffer) */ + /* nothing to do */ + return; + } + + intel_miptree_unmap(intel, irb->mt, irb->mt_level, irb->mt_layer); +} + + +/** + * Round up the requested multisample count to the next supported sample size. + */ +unsigned +intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples) +{ + switch (intel->gen) { + case 6: + /* Gen6 supports only 4x multisampling. */ + if (num_samples > 0) + return 4; + else + return 0; + case 7: + /* Gen7 supports 4x and 8x multisampling. */ + if (num_samples > 4) + return 8; + else if (num_samples > 0) + return 4; + else + return 0; + return 0; + default: + /* MSAA unsupported. */ + return 0; + } +} + + +/** + * Called via glRenderbufferStorageEXT() to set the format and allocate + * storage for a user-created renderbuffer. + */ +static GLboolean +intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_screen *screen = intel->intelScreen; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + rb->NumSamples = intel_quantize_num_samples(screen, rb->NumSamples); + + switch (internalFormat) { + default: + /* Use the same format-choice logic as for textures. + * Renderbuffers aren't any different from textures for us, + * except they're less useful because you can't texture with + * them. + */ + rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D, + internalFormat, + GL_NONE, GL_NONE); + break; + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX1_EXT: + case GL_STENCIL_INDEX4_EXT: + case GL_STENCIL_INDEX8_EXT: + case GL_STENCIL_INDEX16_EXT: + /* These aren't actual texture formats, so force them here. */ + if (intel->has_separate_stencil) { + rb->Format = MESA_FORMAT_S8; + } else { + assert(!intel->must_use_separate_stencil); + rb->Format = MESA_FORMAT_S8_Z24; + } + break; + } + + rb->Width = width; + rb->Height = height; + rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + + intel_miptree_release(&irb->mt); + + DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(internalFormat), + _mesa_get_format_name(rb->Format), width, height); + + if (width == 0 || height == 0) + return true; + + irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format, + width, height, + rb->NumSamples); + if (!irb->mt) + return false; + + return true; +} + + +static void +intel_image_target_renderbuffer_storage(struct gl_context *ctx, + struct gl_renderbuffer *rb, + void *image_handle) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *irb; + __DRIscreen *screen; + __DRIimage *image; + + screen = intel->intelScreen->driScrnPriv; + image = screen->dri2.image->lookupEGLImage(screen, image_handle, + screen->loaderPrivate); + if (image == NULL) + return; + + /* __DRIimage is opaque to the core so it has to be checked here */ + switch (image->format) { + case MESA_FORMAT_RGBA8888_REV: + _mesa_error(&intel->ctx, GL_INVALID_OPERATION, + "glEGLImageTargetRenderbufferStorage(unsupported image format"); + return; + break; + default: + break; + } + + irb = intel_renderbuffer(rb); + intel_miptree_release(&irb->mt); + irb->mt = intel_miptree_create_for_bo(intel, + image->region->bo, + image->format, + image->offset, + image->region->width, + image->region->height, + image->region->pitch, + image->region->tiling); + if (!irb->mt) + return; + + rb->InternalFormat = image->internal_format; + rb->Width = image->region->width; + rb->Height = image->region->height; + rb->Format = image->format; + rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx, + image->internal_format); + rb->NeedsFinishRenderTexture = true; +} + +/** + * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a + * window system framebuffer is resized. + * + * Any actual buffer reallocations for hardware renderbuffers (which would + * have triggered _mesa_resize_framebuffer()) were done by + * intel_process_dri2_buffer(). + */ +static GLboolean +intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + ASSERT(rb->Name == 0); + rb->Width = width; + rb->Height = height; + rb->InternalFormat = internalFormat; + + return true; +} + +/** Dummy function for gl_renderbuffer::AllocStorage() */ +static GLboolean +intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + _mesa_problem(ctx, "intel_op_alloc_storage should never be called."); + return false; +} + +/** + * Create a new intel_renderbuffer which corresponds to an on-screen window, + * not a user-created renderbuffer. + * + * \param num_samples must be quantized. + */ +struct intel_renderbuffer * +intel_create_renderbuffer(gl_format format, unsigned num_samples) +{ + struct intel_renderbuffer *irb; + struct gl_renderbuffer *rb; + + GET_CURRENT_CONTEXT(ctx); + + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + rb = &irb->Base.Base; + + _mesa_init_renderbuffer(rb, 0); + rb->ClassID = INTEL_RB_CLASS; + rb->_BaseFormat = _mesa_get_format_base_format(format); + rb->Format = format; + rb->InternalFormat = rb->_BaseFormat; + rb->NumSamples = num_samples; + + /* intel-specific methods */ + rb->Delete = intel_delete_renderbuffer; + rb->AllocStorage = intel_alloc_window_storage; + + return irb; +} + +/** + * Private window-system buffers (as opposed to ones shared with the display + * server created with intel_create_renderbuffer()) are most similar in their + * handling to user-created renderbuffers, but they have a resize handler that + * may be called at intel_update_renderbuffers() time. + * + * \param num_samples must be quantized. + */ +struct intel_renderbuffer * +intel_create_private_renderbuffer(gl_format format, unsigned num_samples) +{ + struct intel_renderbuffer *irb; + + irb = intel_create_renderbuffer(format, num_samples); + irb->Base.Base.AllocStorage = intel_alloc_renderbuffer_storage; + + return irb; +} + +/** + * Create a new renderbuffer object. + * Typically called via glBindRenderbufferEXT(). + */ +static struct gl_renderbuffer * +intel_new_renderbuffer(struct gl_context * ctx, GLuint name) +{ + /*struct intel_context *intel = intel_context(ctx); */ + struct intel_renderbuffer *irb; + struct gl_renderbuffer *rb; + + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + rb = &irb->Base.Base; + + _mesa_init_renderbuffer(rb, name); + rb->ClassID = INTEL_RB_CLASS; + + /* intel-specific methods */ + rb->Delete = intel_delete_renderbuffer; + rb->AllocStorage = intel_alloc_renderbuffer_storage; + /* span routines set in alloc_storage function */ + + return rb; +} + + +/** + * Called via glBindFramebufferEXT(). + */ +static void +intel_bind_framebuffer(struct gl_context * ctx, GLenum target, + struct gl_framebuffer *fb, struct gl_framebuffer *fbread) +{ + if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { + intel_draw_buffer(ctx); + } + else { + /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */ + } +} + + +/** + * Called via glFramebufferRenderbufferEXT(). + */ +static void +intel_framebuffer_renderbuffer(struct gl_context * ctx, + struct gl_framebuffer *fb, + GLenum attachment, struct gl_renderbuffer *rb) +{ + DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0); + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + intel_draw_buffer(ctx); +} + +static bool +intel_renderbuffer_update_wrapper(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct gl_texture_image *image, + uint32_t layer) +{ + struct gl_renderbuffer *rb = &irb->Base.Base; + struct intel_texture_image *intel_image = intel_texture_image(image); + struct intel_mipmap_tree *mt = intel_image->mt; + int level = image->Level; + + rb->Depth = image->Depth; + + rb->AllocStorage = intel_nop_alloc_storage; + + intel_miptree_check_level_layer(mt, level, layer); + irb->mt_level = level; + + switch (mt->msaa_layout) { + case INTEL_MSAA_LAYOUT_UMS: + case INTEL_MSAA_LAYOUT_CMS: + irb->mt_layer = layer * mt->num_samples; + break; + + default: + irb->mt_layer = layer; + } + + intel_miptree_reference(&irb->mt, mt); + + intel_renderbuffer_set_draw_offset(irb); + + if (mt->hiz_mt == NULL && + intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { + intel_miptree_alloc_hiz(intel, mt); + if (!mt->hiz_mt) + return false; + } + + return true; +} + +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb) +{ + unsigned int dst_x, dst_y; + + /* compute offset of the particular 2D image within the texture region */ + intel_miptree_get_image_offset(irb->mt, + irb->mt_level, + irb->mt_layer, + &dst_x, &dst_y); + + irb->draw_x = dst_x; + irb->draw_y = dst_y; +} + +/** + * Called by glFramebufferTexture[123]DEXT() (and other places) to + * prepare for rendering into texture memory. This might be called + * many times to choose different texture levels, cube faces, etc + * before intel_finish_render_texture() is ever called. + */ +static void +intel_render_texture(struct gl_context * ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_renderbuffer *rb = att->Renderbuffer; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct gl_texture_image *image = rb->TexImage; + struct intel_texture_image *intel_image = intel_texture_image(image); + struct intel_mipmap_tree *mt = intel_image->mt; + int layer; + + (void) fb; + + if (att->CubeMapFace > 0) { + assert(att->Zoffset == 0); + layer = att->CubeMapFace; + } else { + layer = att->Zoffset; + } + + if (!intel_image->mt) { + /* Fallback on drawing to a texture that doesn't have a miptree + * (has a border, width/height 0, etc.) + */ + _swrast_render_texture(ctx, fb, att); + return; + } + + intel_miptree_check_level_layer(mt, att->TextureLevel, layer); + + if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) { + _swrast_render_texture(ctx, fb, att); + return; + } + + DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n", + _mesa_get_format_name(image->TexFormat), + att->Texture->Name, image->Width, image->Height, image->Depth, + rb->RefCount); + + /* update drawing region, etc */ + intel_draw_buffer(ctx); +} + + +/** + * Called by Mesa when rendering to a texture is done. + */ +static void +intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb) +{ + struct intel_context *intel = intel_context(ctx); + + DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format)); + + /* Since we've (probably) rendered to the texture and will (likely) use + * it in the texture domain later on in this batchbuffer, flush the + * batch. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer like GEM does in the kernel. + */ + intel_batchbuffer_emit_mi_flush(intel); +} + +#define fbo_incomplete(fb, ...) do { \ + static GLuint msg_id = 0; \ + if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ + _mesa_gl_debug(ctx, &msg_id, \ + MESA_DEBUG_TYPE_OTHER, \ + MESA_DEBUG_SEVERITY_MEDIUM, \ + __VA_ARGS__); \ + } \ + DBG(__VA_ARGS__); \ + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED; \ + } while (0) + +/** + * Do additional "completeness" testing of a framebuffer object. + */ +static void +intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *depthRb = + intel_get_renderbuffer(fb, BUFFER_DEPTH); + struct intel_renderbuffer *stencilRb = + intel_get_renderbuffer(fb, BUFFER_STENCIL); + struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; + int i; + + DBG("%s() on fb %p (%s)\n", __FUNCTION__, + fb, (fb == ctx->DrawBuffer ? "drawbuffer" : + (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer"))); + + if (depthRb) + depth_mt = depthRb->mt; + if (stencilRb) { + stencil_mt = stencilRb->mt; + if (stencil_mt->stencil_mt) + stencil_mt = stencil_mt->stencil_mt; + } + + if (depth_mt && stencil_mt) { + if (depth_mt == stencil_mt) { + /* For true packed depth/stencil (not faked on prefers-separate-stencil + * hardware) we need to be sure they're the same level/layer, since + * we'll be emitting a single packet describing the packed setup. + */ + if (depthRb->mt_level != stencilRb->mt_level || + depthRb->mt_layer != stencilRb->mt_layer) { + fbo_incomplete(fb, + "FBO incomplete: depth image level/layer %d/%d != " + "stencil image %d/%d\n", + depthRb->mt_level, + depthRb->mt_layer, + stencilRb->mt_level, + stencilRb->mt_layer); + } + } else { + if (!intel->has_separate_stencil) { + fbo_incomplete(fb, "FBO incomplete: separate stencil " + "unsupported\n"); + } + if (stencil_mt->format != MESA_FORMAT_S8) { + fbo_incomplete(fb, "FBO incomplete: separate stencil is %s " + "instead of S8\n", + _mesa_get_format_name(stencil_mt->format)); + } + if (intel->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) { + /* Before Gen7, separate depth and stencil buffers can be used + * only if HiZ is enabled. From the Sandybridge PRM, Volume 2, + * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable: + * [DevSNB]: This field must be set to the same value (enabled + * or disabled) as Hierarchical Depth Buffer Enable. + */ + fbo_incomplete(fb, "FBO incomplete: separate stencil " + "without HiZ\n"); + } + } + } + + for (i = 0; i < Elements(fb->Attachment); i++) { + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + if (fb->Attachment[i].Type == GL_NONE) + continue; + + /* A supported attachment will have a Renderbuffer set either + * from being a Renderbuffer or being a texture that got the + * intel_wrap_texture() treatment. + */ + rb = fb->Attachment[i].Renderbuffer; + if (rb == NULL) { + fbo_incomplete(fb, "FBO incomplete: attachment without " + "renderbuffer\n"); + continue; + } + + if (fb->Attachment[i].Type == GL_TEXTURE) { + if (rb->TexImage->Border) { + fbo_incomplete(fb, "FBO incomplete: texture with border\n"); + continue; + } + } + + irb = intel_renderbuffer(rb); + if (irb == NULL) { + fbo_incomplete(fb, "FBO incomplete: software rendering " + "renderbuffer\n"); + continue; + } + + if (!intel->vtbl.render_target_supported(intel, rb)) { + fbo_incomplete(fb, "FBO incomplete: Unsupported HW " + "texture/renderbuffer format attached: %s\n", + _mesa_get_format_name(intel_rb_format(irb))); + } + } +} + +/** + * Try to do a glBlitFramebuffer using glCopyTexSubImage2D + * We can do this when the dst renderbuffer is actually a texture and + * there is no scaling, mirroring or scissoring. + * + * \return new buffer mask indicating the buffers left to blit using the + * normal path. + */ +static GLbitfield +intel_blit_framebuffer_with_blitter(struct gl_context *ctx, + GLint srcX0, GLint srcY0, + GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, + GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + struct intel_context *intel = intel_context(ctx); + + if (mask & GL_COLOR_BUFFER_BIT) { + GLint i; + const struct gl_framebuffer *drawFb = ctx->DrawBuffer; + const struct gl_framebuffer *readFb = ctx->ReadBuffer; + struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer; + struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb); + + if (!src_irb) { + perf_debug("glBlitFramebuffer(): missing src renderbuffer. " + "Falling back to software rendering.\n"); + return mask; + } + + /* If the source and destination are the same size with no mirroring, + * the rectangles are within the size of the texture and there is no + * scissor, then we can probably use the blit engine. + */ + if (!(srcX0 - srcX1 == dstX0 - dstX1 && + srcY0 - srcY1 == dstY0 - dstY1 && + srcX1 >= srcX0 && + srcY1 >= srcY0 && + srcX0 >= 0 && srcX1 <= readFb->Width && + srcY0 >= 0 && srcY1 <= readFb->Height && + dstX0 >= 0 && dstX1 <= drawFb->Width && + dstY0 >= 0 && dstY1 <= drawFb->Height && + !ctx->Scissor.Enabled)) { + perf_debug("glBlitFramebuffer(): non-1:1 blit. " + "Falling back to software rendering.\n"); + return mask; + } + + /* Blit to all active draw buffers. We don't do any pre-checking, + * because we assume that copying to MRTs is rare, and failure midway + * through copying is even more rare. Even if it was to occur, it's + * safe to let meta start the copy over from scratch, because + * glBlitFramebuffer completely overwrites the destination pixels, and + * results are undefined if any destination pixels have a dependency on + * source pixels. + */ + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *dst_rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *dst_irb = intel_renderbuffer(dst_rb); + + if (!dst_irb) { + perf_debug("glBlitFramebuffer(): missing dst renderbuffer. " + "Falling back to software rendering.\n"); + return mask; + } + + gl_format src_format = _mesa_get_srgb_format_linear(src_rb->Format); + gl_format dst_format = _mesa_get_srgb_format_linear(dst_rb->Format); + if (src_format != dst_format) { + perf_debug("glBlitFramebuffer(): unsupported blit from %s to %s. " + "Falling back to software rendering.\n", + _mesa_get_format_name(src_format), + _mesa_get_format_name(dst_format)); + return mask; + } + + if (!intel_miptree_blit(intel, + src_irb->mt, + src_irb->mt_level, src_irb->mt_layer, + srcX0, srcY0, src_rb->Name == 0, + dst_irb->mt, + dst_irb->mt_level, dst_irb->mt_layer, + dstX0, dstY0, dst_rb->Name == 0, + dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) { + perf_debug("glBlitFramebuffer(): unknown blit failure. " + "Falling back to software rendering.\n"); + return mask; + } + } + + mask &= ~GL_COLOR_BUFFER_BIT; + } + + return mask; +} + +static void +intel_blit_framebuffer(struct gl_context *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ +#ifndef I915 + mask = brw_blorp_framebuffer(intel_context(ctx), + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); + if (mask == 0x0) + return; +#endif + + /* Try using the BLT engine. */ + mask = intel_blit_framebuffer_with_blitter(ctx, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); + if (mask == 0x0) + return; + + + _mesa_meta_BlitFramebuffer(ctx, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); +} + +/** + * This is a no-op except on multisample buffers shared with DRI2. + */ +void +intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb) +{ + if (irb->mt && irb->mt->singlesample_mt) + irb->mt->need_downsample = true; +} + +/** + * Does the renderbuffer have hiz enabled? + */ +bool +intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb) +{ + return intel_miptree_slice_has_hiz(irb->mt, irb->mt_level, irb->mt_layer); +} + +void +intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb) +{ + if (irb->mt) { + intel_miptree_slice_set_needs_hiz_resolve(irb->mt, + irb->mt_level, + irb->mt_layer); + } +} + +void +intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb) +{ + if (irb->mt) { + intel_miptree_slice_set_needs_depth_resolve(irb->mt, + irb->mt_level, + irb->mt_layer); + } +} + +bool +intel_renderbuffer_resolve_hiz(struct intel_context *intel, + struct intel_renderbuffer *irb) +{ + if (irb->mt) + return intel_miptree_slice_resolve_hiz(intel, + irb->mt, + irb->mt_level, + irb->mt_layer); + + return false; +} + +bool +intel_renderbuffer_resolve_depth(struct intel_context *intel, + struct intel_renderbuffer *irb) +{ + if (irb->mt) + return intel_miptree_slice_resolve_depth(intel, + irb->mt, + irb->mt_level, + irb->mt_layer); + + return false; +} + +void +intel_renderbuffer_move_to_temp(struct intel_context *intel, + struct intel_renderbuffer *irb, + bool invalidate) +{ + struct gl_renderbuffer *rb =&irb->Base.Base; + struct intel_texture_image *intel_image = intel_texture_image(rb->TexImage); + struct intel_mipmap_tree *new_mt; + int width, height, depth; + + intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth); + + new_mt = intel_miptree_create(intel, rb->TexImage->TexObject->Target, + intel_image->base.Base.TexFormat, + intel_image->base.Base.Level, + intel_image->base.Base.Level, + width, height, depth, + true, + irb->mt->num_samples, + INTEL_MIPTREE_TILING_ANY); + + if (intel->vtbl.is_hiz_depth_format(intel, new_mt->format)) { + intel_miptree_alloc_hiz(intel, new_mt); + } + + intel_miptree_copy_teximage(intel, intel_image, new_mt, invalidate); + + intel_miptree_reference(&irb->mt, intel_image->mt); + intel_renderbuffer_set_draw_offset(irb); + intel_miptree_release(&new_mt); +} + +/** + * Do one-time context initializations related to GL_EXT_framebuffer_object. + * Hook in device driver functions. + */ +void +intel_fbo_init(struct intel_context *intel) +{ + intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer; + intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer; + intel->ctx.Driver.MapRenderbuffer = intel_map_renderbuffer; + intel->ctx.Driver.UnmapRenderbuffer = intel_unmap_renderbuffer; + intel->ctx.Driver.BindFramebuffer = intel_bind_framebuffer; + intel->ctx.Driver.FramebufferRenderbuffer = intel_framebuffer_renderbuffer; + intel->ctx.Driver.RenderTexture = intel_render_texture; + intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; + intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; + intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; + intel->ctx.Driver.EGLImageTargetRenderbufferStorage = + intel_image_target_renderbuffer_storage; +} diff --git a/src/mesa/drivers/dri/i965/intel_fbo.h b/src/mesa/drivers/dri/i965/intel_fbo.h new file mode 100644 index 00000000000..5f40d352836 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_fbo.h @@ -0,0 +1,211 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_FBO_H +#define INTEL_FBO_H + +#include <stdbool.h> +#include <assert.h> +#include "main/formats.h" +#include "main/macros.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_screen.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct intel_context; +struct intel_mipmap_tree; +struct intel_texture_image; + +/** + * Intel renderbuffer, derived from gl_renderbuffer. + */ +struct intel_renderbuffer +{ + struct swrast_renderbuffer Base; + struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */ + + /** + * \name Miptree view + * \{ + * + * Multiple renderbuffers may simultaneously wrap a single texture and each + * provide a different view into that texture. The fields below indicate + * which miptree slice is wrapped by this renderbuffer. The fields' values + * are consistent with the 'level' and 'layer' parameters of + * glFramebufferTextureLayer(). + * + * For renderbuffers not created with glFramebufferTexture*(), mt_level and + * mt_layer are 0. + */ + unsigned int mt_level; + unsigned int mt_layer; + /** \} */ + + GLuint draw_x, draw_y; /**< Offset of drawing within the region */ +}; + + +/** + * gl_renderbuffer is a base class which we subclass. The Class field + * is used for simple run-time type checking. + */ +#define INTEL_RB_CLASS 0x12345678 + + +/** + * Return a gl_renderbuffer ptr casted to intel_renderbuffer. + * NULL will be returned if the rb isn't really an intel_renderbuffer. + * This is determined by checking the ClassID. + */ +static INLINE struct intel_renderbuffer * +intel_renderbuffer(struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; + if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS) { + /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/ + return irb; + } + else + return NULL; +} + + +/** + * \brief Return the framebuffer attachment specified by attIndex. + * + * If the framebuffer lacks the specified attachment, then return null. + * + * If the attached renderbuffer is a wrapper, then return wrapped + * renderbuffer. + */ +static INLINE struct intel_renderbuffer * +intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex) +{ + struct gl_renderbuffer *rb; + + assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment)); + + rb = fb->Attachment[attIndex].Renderbuffer; + if (!rb) + return NULL; + + return intel_renderbuffer(rb); +} + + +static INLINE gl_format +intel_rb_format(const struct intel_renderbuffer *rb) +{ + return rb->Base.Base.Format; +} + +extern struct intel_renderbuffer * +intel_create_renderbuffer(gl_format format, unsigned num_samples); + +struct intel_renderbuffer * +intel_create_private_renderbuffer(gl_format format, unsigned num_samples); + +struct gl_renderbuffer* +intel_create_wrapped_renderbuffer(struct gl_context * ctx, + int width, int height, + gl_format format); + +extern void +intel_fbo_init(struct intel_context *intel); + + +extern void +intel_flip_renderbuffers(struct gl_framebuffer *fb); + +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb); + +static inline uint32_t +intel_renderbuffer_get_tile_offsets(struct intel_renderbuffer *irb, + uint32_t *tile_x, + uint32_t *tile_y) +{ + return intel_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer, + tile_x, tile_y); +} + +struct intel_region* +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex); + +void +intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb); + +bool +intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb); + +void +intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb); + +void +intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb); + + +/** + * \brief Perform a HiZ resolve on the renderbuffer. + * + * It is safe to call this function on a renderbuffer without HiZ. In that + * case, the function is a no-op. + * + * \return false if no resolve was needed + */ +bool +intel_renderbuffer_resolve_hiz(struct intel_context *intel, + struct intel_renderbuffer *irb); + +/** + * \brief Perform a depth resolve on the renderbuffer. + * + * It is safe to call this function on a renderbuffer without HiZ. In that + * case, the function is a no-op. + * + * \return false if no resolve was needed + */ +bool +intel_renderbuffer_resolve_depth(struct intel_context *intel, + struct intel_renderbuffer *irb); + +void intel_renderbuffer_move_to_temp(struct intel_context *intel, + struct intel_renderbuffer *irb, + bool invalidate); + +unsigned +intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples); + +#ifdef __cplusplus +} +#endif + +#endif /* INTEL_FBO_H */ diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 242fed0b6ae..1776a4b34c5 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1 +1,2349 @@ -../intel/intel_mipmap_tree.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <GL/gl.h> +#include <GL/internal/dri_interface.h> + +#include "intel_batchbuffer.h" +#include "intel_chipset.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_resolve_map.h" +#include "intel_tex_layout.h" +#include "intel_tex.h" +#include "intel_blit.h" + +#ifndef I915 +#include "brw_blorp.h" +#endif + +#include "main/enums.h" +#include "main/formats.h" +#include "main/glformats.h" +#include "main/texcompress_etc.h" +#include "main/teximage.h" + +#define FILE_DEBUG_FLAG DEBUG_MIPTREE + +static GLenum +target_to_target(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return GL_TEXTURE_CUBE_MAP_ARB; + default: + return target; + } +} + + +/** + * Determine which MSAA layout should be used by the MSAA surface being + * created, based on the chip generation and the surface type. + */ +static enum intel_msaa_layout +compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target) +{ + /* Prior to Gen7, all MSAA surfaces used IMS layout. */ + if (intel->gen < 7) + return INTEL_MSAA_LAYOUT_IMS; + + /* In Gen7, IMS layout is only used for depth and stencil buffers. */ + switch (_mesa_get_format_base_format(format)) { + case GL_DEPTH_COMPONENT: + case GL_STENCIL_INDEX: + case GL_DEPTH_STENCIL: + return INTEL_MSAA_LAYOUT_IMS; + default: + /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): + * + * This field must be set to 0 for all SINT MSRTs when all RT channels + * are not written + * + * In practice this means that we have to disable MCS for all signed + * integer MSAA buffers. The alternative, to disable MCS only when one + * of the render target channels is disabled, is impractical because it + * would require converting between CMS and UMS MSAA layouts on the fly, + * which is expensive. + */ + if (_mesa_get_format_datatype(format) == GL_INT) { + /* TODO: is this workaround needed for future chipsets? */ + assert(intel->gen == 7); + return INTEL_MSAA_LAYOUT_UMS; + } else { + /* For now, if we're going to be texturing from this surface, + * force UMS, so that the shader doesn't have to do different things + * based on whether there's a multisample control surface needing sampled first. + * We can't just blindly read the MCS surface in all cases because: + * + * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): + * + * If this field is disabled and the sampling engine <ld_mcs> message + * is issued on this surface, the MCS surface may be accessed. Software + * must ensure that the surface is defined to avoid GTT errors. + */ + if (target == GL_TEXTURE_2D_MULTISAMPLE || + target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + return INTEL_MSAA_LAYOUT_UMS; + } else { + return INTEL_MSAA_LAYOUT_CMS; + } + } + } +} + + +/** + * For single-sampled render targets ("non-MSRT"), the MCS buffer is a + * scaled-down bitfield representation of the color buffer which is capable of + * recording when blocks of the color buffer are equal to the clear value. + * This function returns the block size that will be used by the MCS buffer + * corresponding to a certain color miptree. + * + * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", + * beneath the "Fast Color Clear" bullet (p327): + * + * The following table describes the RT alignment + * + * Pixels Lines + * TiledY RT CL + * bpp + * 32 8 4 + * 64 4 4 + * 128 2 4 + * TiledX RT CL + * bpp + * 32 16 2 + * 64 8 2 + * 128 4 2 + * + * This alignment has the following uses: + * + * - For figuring out the size of the MCS buffer. Each 4k tile in the MCS + * buffer contains 128 blocks horizontally and 256 blocks vertically. + * + * - For figuring out alignment restrictions for a fast clear operation. Fast + * clear operations must always clear aligned multiples of 16 blocks + * horizontally and 32 blocks vertically. + * + * - For scaling down the coordinates sent through the render pipeline during + * a fast clear. X coordinates must be scaled down by 8 times the block + * width, and Y coordinates by 16 times the block height. + * + * - For scaling down the coordinates sent through the render pipeline during + * a "Render Target Resolve" operation. X coordinates must be scaled down + * by half the block width, and Y coordinates by half the block height. + */ +void +intel_get_non_msrt_mcs_alignment(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned *width_px, unsigned *height) +{ + switch (mt->region->tiling) { + default: + assert(!"Non-MSRT MCS requires X or Y tiling"); + /* In release builds, fall through */ + case I915_TILING_Y: + *width_px = 32 / mt->cpp; + *height = 4; + break; + case I915_TILING_X: + *width_px = 64 / mt->cpp; + *height = 2; + } +} + + +/** + * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer + * can be used. + * + * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", + * beneath the "Fast Color Clear" bullet (p326): + * + * - Support is limited to tiled render targets. + * - Support is for non-mip-mapped and non-array surface types only. + * + * And then later, on p327: + * + * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, + * 64bpp, and 128bpp. + */ +bool +intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* MCS is not supported on the i915 (pre-Gen4) driver */ + return false; +#else + struct brw_context *brw = brw_context(&intel->ctx); + + /* MCS support does not exist prior to Gen7 */ + if (intel->gen < 7) + return false; + + /* MCS is only supported for color buffers */ + switch (_mesa_get_format_base_format(mt->format)) { + case GL_DEPTH_COMPONENT: + case GL_DEPTH_STENCIL: + case GL_STENCIL_INDEX: + return false; + } + + if (mt->region->tiling != I915_TILING_X && + mt->region->tiling != I915_TILING_Y) + return false; + if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) + return false; + if (mt->first_level != 0 || mt->last_level != 0) + return false; + if (mt->physical_depth0 != 1) + return false; + + /* There's no point in using an MCS buffer if the surface isn't in a + * renderable format. + */ + if (!brw->format_supported_as_render_target[mt->format]) + return false; + + return true; +#endif +} + + +/** + * @param for_bo Indicates that the caller is + * intel_miptree_create_for_bo(). If true, then do not create + * \c stencil_mt. + */ +struct intel_mipmap_tree * +intel_miptree_create_layout(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool for_bo, + GLuint num_samples) +{ + struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); + + DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + _mesa_get_format_name(format), + first_level, last_level, mt); + + mt->target = target_to_target(target); + mt->format = format; + mt->first_level = first_level; + mt->last_level = last_level; + mt->logical_width0 = width0; + mt->logical_height0 = height0; + mt->logical_depth0 = depth0; +#ifndef I915 + mt->mcs_state = INTEL_MCS_STATE_NONE; +#endif + + /* The cpp is bytes per (1, blockheight)-sized block for compressed + * textures. This is why you'll see divides by blockheight all over + */ + unsigned bw, bh; + _mesa_get_format_block_size(format, &bw, &bh); + assert(_mesa_get_format_bytes(mt->format) % bw == 0); + mt->cpp = _mesa_get_format_bytes(mt->format) / bw; + + mt->num_samples = num_samples; + mt->compressed = _mesa_is_format_compressed(format); + mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE; + mt->refcount = 1; + + if (num_samples > 1) { + /* Adjust width/height/depth for MSAA */ + mt->msaa_layout = compute_msaa_layout(intel, format, mt->target); + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) { + /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says: + * + * "Any of the other messages (sample*, LOD, load4) used with a + * (4x) multisampled surface will in-effect sample a surface with + * double the height and width as that indicated in the surface + * state. Each pixel position on the original-sized surface is + * replaced with a 2x2 of samples with the following arrangement: + * + * sample 0 sample 2 + * sample 1 sample 3" + * + * Thus, when sampling from a multisampled texture, it behaves as + * though the layout in memory for (x,y,sample) is: + * + * (0,0,0) (0,0,2) (1,0,0) (1,0,2) + * (0,0,1) (0,0,3) (1,0,1) (1,0,3) + * + * (0,1,0) (0,1,2) (1,1,0) (1,1,2) + * (0,1,1) (0,1,3) (1,1,1) (1,1,3) + * + * However, the actual layout of multisampled data in memory is: + * + * (0,0,0) (1,0,0) (0,0,1) (1,0,1) + * (0,1,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * This pattern repeats for each 2x2 pixel block. + * + * As a result, when calculating the size of our 4-sample buffer for + * an odd width or height, we have to align before scaling up because + * sample 3 is in that bottom right 2x2 block. + */ + switch (num_samples) { + case 4: + width0 = ALIGN(width0, 2) * 2; + height0 = ALIGN(height0, 2) * 2; + break; + case 8: + width0 = ALIGN(width0, 2) * 4; + height0 = ALIGN(height0, 2) * 2; + break; + default: + /* num_samples should already have been quantized to 0, 1, 4, or + * 8. + */ + assert(false); + } + } else { + /* Non-interleaved */ + depth0 *= num_samples; + } + } + + /* array_spacing_lod0 is only used for non-IMS MSAA surfaces. TODO: can we + * use it elsewhere? + */ + switch (mt->msaa_layout) { + case INTEL_MSAA_LAYOUT_NONE: + case INTEL_MSAA_LAYOUT_IMS: + mt->array_spacing_lod0 = false; + break; + case INTEL_MSAA_LAYOUT_UMS: + case INTEL_MSAA_LAYOUT_CMS: + mt->array_spacing_lod0 = true; + break; + } + + if (target == GL_TEXTURE_CUBE_MAP) { + assert(depth0 == 1); + depth0 = 6; + } + + mt->physical_width0 = width0; + mt->physical_height0 = height0; + mt->physical_depth0 = depth0; + + if (!for_bo && + _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && + (intel->must_use_separate_stencil || + (intel->has_separate_stencil && + intel->vtbl.is_hiz_depth_format(intel, format)))) { + mt->stencil_mt = intel_miptree_create(intel, + mt->target, + MESA_FORMAT_S8, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + true, + num_samples, + INTEL_MIPTREE_TILING_ANY); + if (!mt->stencil_mt) { + intel_miptree_release(&mt); + return NULL; + } + + /* Fix up the Z miptree format for how we're splitting out separate + * stencil. Gen7 expects there to be no stencil bits in its depth buffer. + */ + if (mt->format == MESA_FORMAT_S8_Z24) { + mt->format = MESA_FORMAT_X8_Z24; + } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) { + mt->format = MESA_FORMAT_Z32_FLOAT; + mt->cpp = 4; + } else { + _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n", + _mesa_get_format_name(mt->format)); + } + } + + intel_get_texture_alignment_unit(intel, mt->format, + &mt->align_w, &mt->align_h); + +#ifdef I915 + (void) intel; + if (intel->is_945) + i945_miptree_layout(mt); + else + i915_miptree_layout(mt); +#else + brw_miptree_layout(intel, mt); +#endif + + return mt; +} + +/** + * \brief Helper function for intel_miptree_create(). + */ +static uint32_t +intel_miptree_choose_tiling(struct intel_context *intel, + gl_format format, + uint32_t width0, + uint32_t num_samples, + enum intel_miptree_tiling_mode requested, + struct intel_mipmap_tree *mt) +{ + + if (format == MESA_FORMAT_S8) { + /* The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + */ + return I915_TILING_NONE; + } + + /* Some usages may want only one type of tiling, like depth miptrees (Y + * tiled), or temporary BOs for uploading data once (linear). + */ + switch (requested) { + case INTEL_MIPTREE_TILING_ANY: + break; + case INTEL_MIPTREE_TILING_Y: + return I915_TILING_Y; + case INTEL_MIPTREE_TILING_NONE: + return I915_TILING_NONE; + } + + if (num_samples > 1) { + /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled + * Surface"): + * + * [DevSNB+]: For multi-sample render targets, this field must be + * 1. MSRTs can only be tiled. + * + * Our usual reason for preferring X tiling (fast blits using the + * blitting engine) doesn't apply to MSAA, since we'll generally be + * downsampling or upsampling when blitting between the MSAA buffer + * and another buffer, and the blitting engine doesn't support that. + * So use Y tiling, since it makes better use of the cache. + */ + return I915_TILING_Y; + } + + GLenum base_format = _mesa_get_format_base_format(format); + if (intel->gen >= 4 && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT)) + return I915_TILING_Y; + + int minimum_pitch = mt->total_width * mt->cpp; + + /* If the width is much smaller than a tile, don't bother tiling. */ + if (minimum_pitch < 64) + return I915_TILING_NONE; + + if (ALIGN(minimum_pitch, 512) >= 32768) { + perf_debug("%dx%d miptree too large to blit, falling back to untiled", + mt->total_width, mt->total_height); + return I915_TILING_NONE; + } + + /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ + if (intel->gen < 6) + return I915_TILING_X; + + return I915_TILING_Y | I915_TILING_X; +} + +struct intel_mipmap_tree * +intel_miptree_create(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool expect_accelerated_upload, + GLuint num_samples, + enum intel_miptree_tiling_mode requested_tiling) +{ + struct intel_mipmap_tree *mt; + gl_format tex_format = format; + gl_format etc_format = MESA_FORMAT_NONE; + GLuint total_width, total_height; + + if (!intel->is_baytrail) { + switch (format) { + case MESA_FORMAT_ETC1_RGB8: + format = MESA_FORMAT_RGBX8888_REV; + break; + case MESA_FORMAT_ETC2_RGB8: + format = MESA_FORMAT_RGBX8888_REV; + break; + case MESA_FORMAT_ETC2_SRGB8: + case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: + case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: + format = MESA_FORMAT_SARGB8; + break; + case MESA_FORMAT_ETC2_RGBA8_EAC: + case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: + format = MESA_FORMAT_RGBA8888_REV; + break; + case MESA_FORMAT_ETC2_R11_EAC: + format = MESA_FORMAT_R16; + break; + case MESA_FORMAT_ETC2_SIGNED_R11_EAC: + format = MESA_FORMAT_SIGNED_R16; + break; + case MESA_FORMAT_ETC2_RG11_EAC: + format = MESA_FORMAT_GR1616; + break; + case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: + format = MESA_FORMAT_SIGNED_GR1616; + break; + default: + /* Non ETC1 / ETC2 format */ + break; + } + } + + etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE; + + mt = intel_miptree_create_layout(intel, target, format, + first_level, last_level, width0, + height0, depth0, + false, num_samples); + /* + * pitch == 0 || height == 0 indicates the null texture + */ + if (!mt || !mt->total_width || !mt->total_height) { + intel_miptree_release(&mt); + return NULL; + } + + total_width = mt->total_width; + total_height = mt->total_height; + + if (format == MESA_FORMAT_S8) { + /* Align to size of W tile, 64x64. */ + total_width = ALIGN(total_width, 64); + total_height = ALIGN(total_height, 64); + } + + uint32_t tiling = intel_miptree_choose_tiling(intel, format, width0, + num_samples, requested_tiling, + mt); + bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X); + + mt->etc_format = etc_format; + mt->region = intel_region_alloc(intel->intelScreen, + y_or_x ? I915_TILING_Y : tiling, + mt->cpp, + total_width, + total_height, + expect_accelerated_upload); + + /* If the region is too large to fit in the aperture, we need to use the + * BLT engine to support it. The BLT paths can't currently handle Y-tiling, + * so we need to fall back to X. + */ + if (y_or_x && mt->region->bo->size >= intel->max_gtt_map_object_size) { + perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", + mt->total_width, mt->total_height); + intel_region_release(&mt->region); + + mt->region = intel_region_alloc(intel->intelScreen, + I915_TILING_X, + mt->cpp, + total_width, + total_height, + expect_accelerated_upload); + } + + mt->offset = 0; + + if (!mt->region) { + intel_miptree_release(&mt); + return NULL; + } + +#ifndef I915 + /* If this miptree is capable of supporting fast color clears, set + * mcs_state appropriately to ensure that fast clears will occur. + * Allocation of the MCS miptree will be deferred until the first fast + * clear actually occurs. + */ + if (intel_is_non_msrt_mcs_buffer_supported(intel, mt)) + mt->mcs_state = INTEL_MCS_STATE_RESOLVED; +#endif + + return mt; +} + +struct intel_mipmap_tree * +intel_miptree_create_for_bo(struct intel_context *intel, + drm_intel_bo *bo, + gl_format format, + uint32_t offset, + uint32_t width, + uint32_t height, + int pitch, + uint32_t tiling) +{ + struct intel_mipmap_tree *mt; + + struct intel_region *region = calloc(1, sizeof(*region)); + if (!region) + return NULL; + + /* Nothing will be able to use this miptree with the BO if the offset isn't + * aligned. + */ + if (tiling != I915_TILING_NONE) + assert(offset % 4096 == 0); + + /* miptrees can't handle negative pitch. If you need flipping of images, + * that's outside of the scope of the mt. + */ + assert(pitch >= 0); + + mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format, + 0, 0, + width, height, 1, + true, 0 /* num_samples */); + if (!mt) + return mt; + + region->cpp = mt->cpp; + region->width = width; + region->height = height; + region->pitch = pitch; + region->refcount = 1; + drm_intel_bo_reference(bo); + region->bo = bo; + region->tiling = tiling; + + mt->region = region; + mt->offset = offset; + + return mt; +} + + +/** + * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree. + * + * For a multisample DRI2 buffer, this wraps the given region with + * a singlesample miptree, then creates a multisample miptree into which the + * singlesample miptree is embedded as a child. + */ +struct intel_mipmap_tree* +intel_miptree_create_for_dri2_buffer(struct intel_context *intel, + unsigned dri_attachment, + gl_format format, + uint32_t num_samples, + struct intel_region *region) +{ + struct intel_mipmap_tree *singlesample_mt = NULL; + struct intel_mipmap_tree *multisample_mt = NULL; + + /* Only the front and back buffers, which are color buffers, are shared + * through DRI2. + */ + assert(dri_attachment == __DRI_BUFFER_BACK_LEFT || + dri_attachment == __DRI_BUFFER_FRONT_LEFT || + dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT); + assert(_mesa_get_format_base_format(format) == GL_RGB || + _mesa_get_format_base_format(format) == GL_RGBA); + + singlesample_mt = intel_miptree_create_for_bo(intel, + region->bo, + format, + 0, + region->width, + region->height, + region->pitch, + region->tiling); + if (!singlesample_mt) + return NULL; + singlesample_mt->region->name = region->name; + +#ifndef I915 + /* If this miptree is capable of supporting fast color clears, set + * mcs_state appropriately to ensure that fast clears will occur. + * Allocation of the MCS miptree will be deferred until the first fast + * clear actually occurs. + */ + if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt)) + singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED; +#endif + + if (num_samples == 0) + return singlesample_mt; + + multisample_mt = intel_miptree_create_for_renderbuffer(intel, + format, + region->width, + region->height, + num_samples); + if (!multisample_mt) { + intel_miptree_release(&singlesample_mt); + return NULL; + } + + multisample_mt->singlesample_mt = singlesample_mt; + multisample_mt->need_downsample = false; + + if (intel->is_front_buffer_rendering && + (dri_attachment == __DRI_BUFFER_FRONT_LEFT || + dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) { + intel_miptree_upsample(intel, multisample_mt); + } + + return multisample_mt; +} + +struct intel_mipmap_tree* +intel_miptree_create_for_renderbuffer(struct intel_context *intel, + gl_format format, + uint32_t width, + uint32_t height, + uint32_t num_samples) +{ + struct intel_mipmap_tree *mt; + uint32_t depth = 1; + bool ok; + + mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0, + width, height, depth, true, num_samples, + INTEL_MIPTREE_TILING_ANY); + if (!mt) + goto fail; + + if (intel->vtbl.is_hiz_depth_format(intel, format)) { + ok = intel_miptree_alloc_hiz(intel, mt); + if (!ok) + goto fail; + } + + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + ok = intel_miptree_alloc_mcs(intel, mt, num_samples); + if (!ok) + goto fail; + } + + return mt; + +fail: + intel_miptree_release(&mt); + return NULL; +} + +void +intel_miptree_reference(struct intel_mipmap_tree **dst, + struct intel_mipmap_tree *src) +{ + if (*dst == src) + return; + + intel_miptree_release(dst); + + if (src) { + src->refcount++; + DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount); + } + + *dst = src; +} + + +void +intel_miptree_release(struct intel_mipmap_tree **mt) +{ + if (!*mt) + return; + + DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1); + if (--(*mt)->refcount <= 0) { + GLuint i; + + DBG("%s deleting %p\n", __FUNCTION__, *mt); + + intel_region_release(&((*mt)->region)); + intel_miptree_release(&(*mt)->stencil_mt); + intel_miptree_release(&(*mt)->hiz_mt); +#ifndef I915 + intel_miptree_release(&(*mt)->mcs_mt); +#endif + intel_miptree_release(&(*mt)->singlesample_mt); + intel_resolve_map_clear(&(*mt)->hiz_map); + + for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { + free((*mt)->level[i].slice); + } + + free(*mt); + } + *mt = NULL; +} + +void +intel_miptree_get_dimensions_for_image(struct gl_texture_image *image, + int *width, int *height, int *depth) +{ + switch (image->TexObject->Target) { + case GL_TEXTURE_1D_ARRAY: + *width = image->Width; + *height = 1; + *depth = image->Height; + break; + default: + *width = image->Width; + *height = image->Height; + *depth = image->Depth; + break; + } +} + +/** + * Can the image be pulled into a unified mipmap tree? This mirrors + * the completeness test in a lot of ways. + * + * Not sure whether I want to pass gl_texture_image here. + */ +bool +intel_miptree_match_image(struct intel_mipmap_tree *mt, + struct gl_texture_image *image) +{ + struct intel_texture_image *intelImage = intel_texture_image(image); + GLuint level = intelImage->base.Base.Level; + int width, height, depth; + + /* glTexImage* choose the texture object based on the target passed in, and + * objects can't change targets over their lifetimes, so this should be + * true. + */ + assert(target_to_target(image->TexObject->Target) == mt->target); + + gl_format mt_format = mt->format; + if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt) + mt_format = MESA_FORMAT_S8_Z24; + if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt) + mt_format = MESA_FORMAT_Z32_FLOAT_X24S8; + if (mt->etc_format != MESA_FORMAT_NONE) + mt_format = mt->etc_format; + + if (image->TexFormat != mt_format) + return false; + + intel_miptree_get_dimensions_for_image(image, &width, &height, &depth); + + if (mt->target == GL_TEXTURE_CUBE_MAP) + depth = 6; + + /* Test image dimensions against the base level image adjusted for + * minification. This will also catch images not present in the + * tree, changed targets, etc. + */ + if (mt->target == GL_TEXTURE_2D_MULTISAMPLE || + mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + /* nonzero level here is always bogus */ + assert(level == 0); + + if (width != mt->logical_width0 || + height != mt->logical_height0 || + depth != mt->logical_depth0) { + return false; + } + } + else { + /* all normal textures, renderbuffers, etc */ + if (width != mt->level[level].width || + height != mt->level[level].height || + depth != mt->level[level].depth) { + return false; + } + } + + if (image->NumSamples != mt->num_samples) + return false; + + return true; +} + + +void +intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + mt->level[level].width = w; + mt->level[level].height = h; + mt->level[level].depth = d; + mt->level[level].level_x = x; + mt->level[level].level_y = y; + + DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__, + level, w, h, d, x, y); + + assert(mt->level[level].slice == NULL); + + mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice)); + mt->level[level].slice[0].x_offset = mt->level[level].level_x; + mt->level[level].slice[0].y_offset = mt->level[level].level_y; +} + + +void +intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint img, + GLuint x, GLuint y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < mt->level[level].depth); + + mt->level[level].slice[img].x_offset = mt->level[level].level_x + x; + mt->level[level].slice[img].y_offset = mt->level[level].level_y + y; + + DBG("%s level %d img %d pos %d,%d\n", + __FUNCTION__, level, img, + mt->level[level].slice[img].x_offset, + mt->level[level].slice[img].y_offset); +} + +void +intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + GLuint *x, GLuint *y) +{ + assert(slice < mt->level[level].depth); + + *x = mt->level[level].slice[slice].x_offset; + *y = mt->level[level].slice[slice].y_offset; +} + +/** + * Rendering with tiled buffers requires that the base address of the buffer + * be aligned to a page boundary. For renderbuffers, and sometimes with + * textures, we may want the surface to point at a texture image level that + * isn't at a page boundary. + * + * This function returns an appropriately-aligned base offset + * according to the tiling restrictions, plus any required x/y offset + * from there. + */ +uint32_t +intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + uint32_t *tile_x, + uint32_t *tile_y) +{ + struct intel_region *region = mt->region; + uint32_t x, y; + uint32_t mask_x, mask_y; + + intel_region_get_tile_masks(region, &mask_x, &mask_y, false); + intel_miptree_get_image_offset(mt, level, slice, &x, &y); + + *tile_x = x & mask_x; + *tile_y = y & mask_y; + + return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y, + false); +} + +static void +intel_miptree_copy_slice_sw(struct intel_context *intel, + struct intel_mipmap_tree *dst_mt, + struct intel_mipmap_tree *src_mt, + int level, + int slice, + int width, + int height) +{ + void *src, *dst; + int src_stride, dst_stride; + int cpp = dst_mt->cpp; + + intel_miptree_map(intel, src_mt, + level, slice, + 0, 0, + width, height, + GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, + &src, &src_stride); + + intel_miptree_map(intel, dst_mt, + level, slice, + 0, 0, + width, height, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | + BRW_MAP_DIRECT_BIT, + &dst, &dst_stride); + + DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n", + _mesa_get_format_name(src_mt->format), + src_mt, src, src_stride, + _mesa_get_format_name(dst_mt->format), + dst_mt, dst, dst_stride, + width, height); + + int row_size = cpp * width; + if (src_stride == row_size && + dst_stride == row_size) { + memcpy(dst, src, row_size * height); + } else { + for (int i = 0; i < height; i++) { + memcpy(dst, src, row_size); + dst += dst_stride; + src += src_stride; + } + } + + intel_miptree_unmap(intel, dst_mt, level, slice); + intel_miptree_unmap(intel, src_mt, level, slice); + + /* Don't forget to copy the stencil data over, too. We could have skipped + * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map + * shuffling the two data sources in/out of temporary storage instead of + * the direct mapping we get this way. + */ + if (dst_mt->stencil_mt) { + assert(src_mt->stencil_mt); + intel_miptree_copy_slice_sw(intel, dst_mt->stencil_mt, src_mt->stencil_mt, + level, slice, width, height); + } +} + +static void +intel_miptree_copy_slice(struct intel_context *intel, + struct intel_mipmap_tree *dst_mt, + struct intel_mipmap_tree *src_mt, + int level, + int face, + int depth) + +{ + gl_format format = src_mt->format; + uint32_t width = src_mt->level[level].width; + uint32_t height = src_mt->level[level].height; + int slice; + + if (face > 0) + slice = face; + else + slice = depth; + + assert(depth < src_mt->level[level].depth); + assert(src_mt->format == dst_mt->format); + + if (dst_mt->compressed) { + height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; + width = ALIGN(width, dst_mt->align_w); + } + + /* If it's a packed depth/stencil buffer with separate stencil, the blit + * below won't apply since we can't do the depth's Y tiling or the + * stencil's W tiling in the blitter. + */ + if (src_mt->stencil_mt) { + intel_miptree_copy_slice_sw(intel, + dst_mt, src_mt, + level, slice, + width, height); + return; + } + + uint32_t dst_x, dst_y, src_x, src_y; + intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y); + intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y); + + DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", + _mesa_get_format_name(src_mt->format), + src_mt, src_x, src_y, src_mt->region->pitch, + _mesa_get_format_name(dst_mt->format), + dst_mt, dst_x, dst_y, dst_mt->region->pitch, + width, height); + + if (!intel_miptree_blit(intel, + src_mt, level, slice, 0, 0, false, + dst_mt, level, slice, 0, 0, false, + width, height, GL_COPY)) { + perf_debug("miptree validate blit for %s failed\n", + _mesa_get_format_name(format)); + + intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice, + width, height); + } +} + +/** + * Copies the image's current data to the given miptree, and associates that + * miptree with the image. + * + * If \c invalidate is true, then the actual image data does not need to be + * copied, but the image still needs to be associated to the new miptree (this + * is set to true if we're about to clear the image). + */ +void +intel_miptree_copy_teximage(struct intel_context *intel, + struct intel_texture_image *intelImage, + struct intel_mipmap_tree *dst_mt, + bool invalidate) +{ + struct intel_mipmap_tree *src_mt = intelImage->mt; + struct intel_texture_object *intel_obj = + intel_texture_object(intelImage->base.Base.TexObject); + int level = intelImage->base.Base.Level; + int face = intelImage->base.Base.Face; + GLuint depth = intelImage->base.Base.Depth; + + if (!invalidate) { + for (int slice = 0; slice < depth; slice++) { + intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice); + } + } + + intel_miptree_reference(&intelImage->mt, dst_mt); + intel_obj->needs_validate = true; +} + +bool +intel_miptree_alloc_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt, + GLuint num_samples) +{ + assert(intel->gen >= 7); /* MCS only used on Gen7+ */ +#ifdef I915 + return false; +#else + assert(mt->mcs_mt == NULL); + + /* Choose the correct format for the MCS buffer. All that really matters + * is that we allocate the right buffer size, since we'll always be + * accessing this miptree using MCS-specific hardware mechanisms, which + * infer the correct format based on num_samples. + */ + gl_format format; + switch (num_samples) { + case 4: + /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for + * each sample). + */ + format = MESA_FORMAT_R8; + break; + case 8: + /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits + * for each sample, plus 8 padding bits). + */ + format = MESA_FORMAT_R_UINT32; + break; + default: + assert(!"Unrecognized sample count in intel_miptree_alloc_mcs"); + return false; + }; + + /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address": + * + * "The MCS surface must be stored as Tile Y." + */ + mt->mcs_state = INTEL_MCS_STATE_MSAA; + mt->mcs_mt = intel_miptree_create(intel, + mt->target, + format, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_Y); + + /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: + * + * When MCS buffer is enabled and bound to MSRT, it is required that it + * is cleared prior to any rendering. + * + * Since we don't use the MCS buffer for any purpose other than rendering, + * it makes sense to just clear it immediately upon allocation. + * + * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. + */ + void *data = intel_miptree_map_raw(intel, mt->mcs_mt); + memset(data, 0xff, mt->mcs_mt->region->bo->size); + intel_miptree_unmap_raw(intel, mt->mcs_mt); + + return mt->mcs_mt; +#endif +} + + +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + assert(!"MCS not supported on i915"); + return false; +#else + assert(mt->mcs_mt == NULL); + + /* The format of the MCS buffer is opaque to the driver; all that matters + * is that we get its size and pitch right. We'll pretend that the format + * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled + * R32 buffer is 32 pixels across, we'll need to scale the width down by + * the block width and then a further factor of 4. Since an MCS tile + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high, + * we'll need to scale the height down by the block height and then a + * further factor of 8. + */ + const gl_format format = MESA_FORMAT_R_UINT32; + unsigned block_width_px; + unsigned block_height; + intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height); + unsigned width_divisor = block_width_px * 4; + unsigned height_divisor = block_height * 8; + unsigned mcs_width = + ALIGN(mt->logical_width0, width_divisor) / width_divisor; + unsigned mcs_height = + ALIGN(mt->logical_height0, height_divisor) / height_divisor; + assert(mt->logical_depth0 == 1); + mt->mcs_mt = intel_miptree_create(intel, + mt->target, + format, + mt->first_level, + mt->last_level, + mcs_width, + mcs_height, + mt->logical_depth0, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_Y); + + return mt->mcs_mt; +#endif +} + + +/** + * Helper for intel_miptree_alloc_hiz() that sets + * \c mt->level[level].slice[layer].has_hiz. Return true if and only if + * \c has_hiz was set. + */ +static bool +intel_miptree_slice_enable_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + assert(mt->hiz_mt); + + if (intel->is_haswell) { + /* Disable HiZ for some slices to work around a hardware bug. + * + * Haswell hardware fails to respect + * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y when during HiZ + * ambiguate operations. The failure is inconsistent and affected by + * other GPU contexts. Running a heavy GPU workload in a separate + * process causes the failure rate to drop to nearly 0. + * + * To workaround the bug, we enable HiZ only when we can guarantee that + * the Depth Coordinate Offset fields will be set to 0. The function + * brw_get_depthstencil_tile_masks() is used to calculate the fields, + * and the function is sometimes called in such a way that the presence + * of an attached stencil buffer changes the fuction's return value. + * + * The largest tile size considered by brw_get_depthstencil_tile_masks() + * is that of the stencil buffer. Therefore, if this hiz slice's + * corresponding depth slice has an offset that is aligned to the + * stencil buffer tile size, 64x64 pixels, then + * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y is set to 0. + */ + uint32_t depth_x_offset = mt->level[level].slice[layer].x_offset; + uint32_t depth_y_offset = mt->level[level].slice[layer].y_offset; + if ((depth_x_offset & 63) || (depth_y_offset & 63)) { + return false; + } + } + + mt->level[level].slice[layer].has_hiz = true; + return true; +} + + + +bool +intel_miptree_alloc_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + assert(mt->hiz_mt == NULL); + mt->hiz_mt = intel_miptree_create(intel, + mt->target, + mt->format, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + true, + mt->num_samples, + INTEL_MIPTREE_TILING_ANY); + + if (!mt->hiz_mt) + return false; + + /* Mark that all slices need a HiZ resolve. */ + struct intel_resolve_map *head = &mt->hiz_map; + for (int level = mt->first_level; level <= mt->last_level; ++level) { + for (int layer = 0; layer < mt->level[level].depth; ++layer) { + if (!intel_miptree_slice_enable_hiz(intel, mt, level, layer)) + continue; + + head->next = malloc(sizeof(*head->next)); + head->next->prev = head; + head->next->next = NULL; + head = head->next; + + head->level = level; + head->layer = layer; + head->need = GEN6_HIZ_OP_HIZ_RESOLVE; + } + } + + return true; +} + +/** + * Does the miptree slice have hiz enabled? + */ +bool +intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + intel_miptree_check_level_layer(mt, level, layer); + return mt->level[level].slice[layer].has_hiz; +} + +void +intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + if (!intel_miptree_slice_has_hiz(mt, level, layer)) + return; + + intel_resolve_map_set(&mt->hiz_map, + level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + + +void +intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + if (!intel_miptree_slice_has_hiz(mt, level, layer)) + return; + + intel_resolve_map_set(&mt->hiz_map, + level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} + +static bool +intel_miptree_slice_resolve(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + enum gen6_hiz_op need) +{ + intel_miptree_check_level_layer(mt, level, layer); + + struct intel_resolve_map *item = + intel_resolve_map_get(&mt->hiz_map, level, layer); + + if (!item || item->need != need) + return false; + + intel_hiz_exec(intel, mt, level, layer, need); + intel_resolve_map_remove(item); + return true; +} + +bool +intel_miptree_slice_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + return intel_miptree_slice_resolve(intel, mt, level, layer, + GEN6_HIZ_OP_HIZ_RESOLVE); +} + +bool +intel_miptree_slice_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + return intel_miptree_slice_resolve(intel, mt, level, layer, + GEN6_HIZ_OP_DEPTH_RESOLVE); +} + +static bool +intel_miptree_all_slices_resolve(struct intel_context *intel, + struct intel_mipmap_tree *mt, + enum gen6_hiz_op need) +{ + bool did_resolve = false; + struct intel_resolve_map *i, *next; + + for (i = mt->hiz_map.next; i; i = next) { + next = i->next; + if (i->need != need) + continue; + + intel_hiz_exec(intel, mt, i->level, i->layer, need); + intel_resolve_map_remove(i); + did_resolve = true; + } + + return did_resolve; +} + +bool +intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + return intel_miptree_all_slices_resolve(intel, mt, + GEN6_HIZ_OP_HIZ_RESOLVE); +} + +bool +intel_miptree_all_slices_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + return intel_miptree_all_slices_resolve(intel, mt, + GEN6_HIZ_OP_DEPTH_RESOLVE); +} + + +void +intel_miptree_resolve_color(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* Fast color clear is not supported on the i915 (pre-Gen4) driver */ +#else + switch (mt->mcs_state) { + case INTEL_MCS_STATE_NONE: + case INTEL_MCS_STATE_MSAA: + case INTEL_MCS_STATE_RESOLVED: + /* No resolve needed */ + break; + case INTEL_MCS_STATE_UNRESOLVED: + case INTEL_MCS_STATE_CLEAR: + brw_blorp_resolve_color(intel, mt); + break; + } +#endif +} + + +/** + * Make it possible to share the region backing the given miptree with another + * process or another miptree. + * + * Fast color clears are unsafe with shared buffers, so we need to resolve and + * then discard the MCS buffer, if present. We also set the mcs_state to + * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the + * future. + */ +void +intel_miptree_make_shareable(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* Nothing needs to be done for I915 */ + (void) intel; + (void) mt; +#else + /* MCS buffers are also used for multisample buffers, but we can't resolve + * away a multisample MCS buffer because it's an integral part of how the + * pixel data is stored. Fortunately this code path should never be + * reached for multisample buffers. + */ + assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE); + + if (mt->mcs_mt) { + intel_miptree_resolve_color(intel, mt); + intel_miptree_release(&mt->mcs_mt); + mt->mcs_state = INTEL_MCS_STATE_NONE; + } +#endif +} + + +/** + * \brief Get pointer offset into stencil buffer. + * + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. + * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm + * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) + */ +static intptr_t +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) +{ + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + if (swizzled) { + /* adjust for bit6 swizzling */ + if (((byte_x / 8) % 2) == 1) { + if (((byte_y / 8) % 2) == 0) { + u += 64; + } else { + u -= 64; + } + } + } + + return u; +} + +static void +intel_miptree_updownsample(struct intel_context *intel, + struct intel_mipmap_tree *src, + struct intel_mipmap_tree *dst, + unsigned width, + unsigned height) +{ +#ifndef I915 + int src_x0 = 0; + int src_y0 = 0; + int dst_x0 = 0; + int dst_y0 = 0; + + brw_blorp_blit_miptrees(intel, + src, 0 /* level */, 0 /* layer */, + dst, 0 /* level */, 0 /* layer */, + src_x0, src_y0, + width, height, + dst_x0, dst_y0, + width, height, + false, false /*mirror x, y*/); + + if (src->stencil_mt) { + brw_blorp_blit_miptrees(intel, + src->stencil_mt, 0 /* level */, 0 /* layer */, + dst->stencil_mt, 0 /* level */, 0 /* layer */, + src_x0, src_y0, + width, height, + dst_x0, dst_y0, + width, height, + false, false /*mirror x, y*/); + } +#endif /* I915 */ +} + +static void +assert_is_flat(struct intel_mipmap_tree *mt) +{ + assert(mt->target == GL_TEXTURE_2D); + assert(mt->first_level == 0); + assert(mt->last_level == 0); +} + +/** + * \brief Downsample from mt to mt->singlesample_mt. + * + * If the miptree needs no downsample, then skip. + */ +void +intel_miptree_downsample(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + /* Only flat, renderbuffer-like miptrees are supported. */ + assert_is_flat(mt); + + if (!mt->need_downsample) + return; + intel_miptree_updownsample(intel, + mt, mt->singlesample_mt, + mt->logical_width0, + mt->logical_height0); + mt->need_downsample = false; +} + +/** + * \brief Upsample from mt->singlesample_mt to mt. + * + * The upsample is done unconditionally. + */ +void +intel_miptree_upsample(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + /* Only flat, renderbuffer-like miptrees are supported. */ + assert_is_flat(mt); + assert(!mt->need_downsample); + + intel_miptree_updownsample(intel, + mt->singlesample_mt, mt, + mt->logical_width0, + mt->logical_height0); +} + +void * +intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) +{ + /* CPU accesses to color buffers don't understand fast color clears, so + * resolve any pending fast color clears before we map. + */ + intel_miptree_resolve_color(intel, mt); + + drm_intel_bo *bo = mt->region->bo; + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (drm_intel_bo_busy(bo)) { + perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); + } + } + + intel_flush(&intel->ctx); + + if (mt->region->tiling != I915_TILING_NONE) + drm_intel_gem_bo_map_gtt(bo); + else + drm_intel_bo_map(bo, true); + + return bo->virtual; +} + +void +intel_miptree_unmap_raw(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + drm_intel_bo_unmap(mt->region->bo); +} + +static void +intel_miptree_map_gtt(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + unsigned int bw, bh; + void *base; + unsigned int image_x, image_y; + int x = map->x; + int y = map->y; + + /* For compressed formats, the stride is the number of bytes per + * row of blocks. intel_miptree_get_image_offset() already does + * the divide. + */ + _mesa_get_format_block_size(mt->format, &bw, &bh); + assert(y % bh == 0); + y /= bh; + + base = intel_miptree_map_raw(intel, mt) + mt->offset; + + if (base == NULL) + map->ptr = NULL; + else { + /* Note that in the case of cube maps, the caller must have passed the + * slice number referencing the face. + */ + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + x += image_x; + y += image_y; + + map->stride = mt->region->pitch; + map->ptr = base + y * map->stride + x * mt->cpp; + } + + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, _mesa_get_format_name(mt->format), + x, y, map->ptr, map->stride); +} + +static void +intel_miptree_unmap_gtt(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + intel_miptree_unmap_raw(intel, mt); +} + +static void +intel_miptree_map_blit(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + map->mt = intel_miptree_create(intel, GL_TEXTURE_2D, mt->format, + 0, 0, + map->w, map->h, 1, + false, 0, + INTEL_MIPTREE_TILING_NONE); + if (!map->mt) { + fprintf(stderr, "Failed to allocate blit temporary\n"); + goto fail; + } + map->stride = map->mt->region->pitch; + + if (!intel_miptree_blit(intel, + mt, level, slice, + map->x, map->y, false, + map->mt, 0, 0, + 0, 0, false, + map->w, map->h, GL_COPY)) { + fprintf(stderr, "Failed to blit\n"); + goto fail; + } + + intel_batchbuffer_flush(intel); + map->ptr = intel_miptree_map_raw(intel, map->mt); + + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, _mesa_get_format_name(mt->format), + level, slice, map->ptr, map->stride); + + return; + +fail: + intel_miptree_release(&map->mt); + map->ptr = NULL; + map->stride = 0; +} + +static void +intel_miptree_unmap_blit(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + struct gl_context *ctx = &intel->ctx; + + intel_miptree_unmap_raw(intel, map->mt); + + if (map->mode & GL_MAP_WRITE_BIT) { + bool ok = intel_miptree_blit(intel, + map->mt, 0, 0, + 0, 0, false, + mt, level, slice, + map->x, map->y, false, + map->w, map->h, GL_COPY); + WARN_ONCE(!ok, "Failed to blit from linear temporary mapping"); + } + + intel_miptree_release(&map->mt); +} + +static void +intel_miptree_map_s8(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + map->stride = map->w; + map->buffer = map->ptr = malloc(map->stride * map->h); + if (!map->buffer) + return; + + /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no + * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless + * invalidate is set, since we'll be writing the whole rectangle from our + * temporary buffer back out. + */ + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { + uint8_t *untiled_s8_map = map->ptr; + uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt); + unsigned int image_x, image_y; + + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + ptrdiff_t offset = intel_offset_S8(mt->region->pitch, + x + image_x + map->x, + y + image_y + map->y, + intel->has_swizzling); + untiled_s8_map[y * map->w + x] = tiled_s8_map[offset]; + } + } + + intel_miptree_unmap_raw(intel, mt); + + DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, map->x + image_x, map->y + image_y, map->ptr, map->stride); + } else { + DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, map->ptr, map->stride); + } +} + +static void +intel_miptree_unmap_s8(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + if (map->mode & GL_MAP_WRITE_BIT) { + unsigned int image_x, image_y; + uint8_t *untiled_s8_map = map->ptr; + uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt); + + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + ptrdiff_t offset = intel_offset_S8(mt->region->pitch, + x + map->x, + y + map->y, + intel->has_swizzling); + tiled_s8_map[offset] = untiled_s8_map[y * map->w + x]; + } + } + + intel_miptree_unmap_raw(intel, mt); + } + + free(map->buffer); +} + +static void +intel_miptree_map_etc(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + assert(mt->etc_format != MESA_FORMAT_NONE); + if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) { + assert(mt->format == MESA_FORMAT_RGBX8888_REV); + } + + assert(map->mode & GL_MAP_WRITE_BIT); + assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT); + + map->stride = _mesa_format_row_stride(mt->etc_format, map->w); + map->buffer = malloc(_mesa_format_image_size(mt->etc_format, + map->w, map->h, 1)); + map->ptr = map->buffer; +} + +static void +intel_miptree_unmap_etc(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + uint32_t image_x; + uint32_t image_y; + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + + image_x += map->x; + image_y += map->y; + + uint8_t *dst = intel_miptree_map_raw(intel, mt) + + image_y * mt->region->pitch + + image_x * mt->region->cpp; + + if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) + _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch, + map->ptr, map->stride, + map->w, map->h); + else + _mesa_unpack_etc2_format(dst, mt->region->pitch, + map->ptr, map->stride, + map->w, map->h, mt->etc_format); + + intel_miptree_unmap_raw(intel, mt); + free(map->buffer); +} + +/** + * Mapping function for packed depth/stencil miptrees backed by real separate + * miptrees for depth and stencil. + * + * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer + * separate from the depth buffer. Yet at the GL API level, we have to expose + * packed depth/stencil textures and FBO attachments, and Mesa core expects to + * be able to map that memory for texture storage and glReadPixels-type + * operations. We give Mesa core that access by mallocing a temporary and + * copying the data between the actual backing store and the temporary. + */ +static void +intel_miptree_map_depthstencil(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + struct intel_mipmap_tree *z_mt = mt; + struct intel_mipmap_tree *s_mt = mt->stencil_mt; + bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT; + int packed_bpp = map_z32f_x24s8 ? 8 : 4; + + map->stride = map->w * packed_bpp; + map->buffer = map->ptr = malloc(map->stride * map->h); + if (!map->buffer) + return; + + /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no + * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless + * invalidate is set, since we'll be writing the whole rectangle from our + * temporary buffer back out. + */ + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { + uint32_t *packed_map = map->ptr; + uint8_t *s_map = intel_miptree_map_raw(intel, s_mt); + uint32_t *z_map = intel_miptree_map_raw(intel, z_mt); + unsigned int s_image_x, s_image_y; + unsigned int z_image_x, z_image_y; + + intel_miptree_get_image_offset(s_mt, level, slice, + &s_image_x, &s_image_y); + intel_miptree_get_image_offset(z_mt, level, slice, + &z_image_x, &z_image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + int map_x = map->x + x, map_y = map->y + y; + ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch, + map_x + s_image_x, + map_y + s_image_y, + intel->has_swizzling); + ptrdiff_t z_offset = ((map_y + z_image_y) * + (z_mt->region->pitch / 4) + + (map_x + z_image_x)); + uint8_t s = s_map[s_offset]; + uint32_t z = z_map[z_offset]; + + if (map_z32f_x24s8) { + packed_map[(y * map->w + x) * 2 + 0] = z; + packed_map[(y * map->w + x) * 2 + 1] = s; + } else { + packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff); + } + } + } + + intel_miptree_unmap_raw(intel, s_mt); + intel_miptree_unmap_raw(intel, z_mt); + + DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", + __FUNCTION__, + map->x, map->y, map->w, map->h, + z_mt, map->x + z_image_x, map->y + z_image_y, + s_mt, map->x + s_image_x, map->y + s_image_y, + map->ptr, map->stride); + } else { + DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, map->ptr, map->stride); + } +} + +static void +intel_miptree_unmap_depthstencil(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + struct intel_mipmap_tree *z_mt = mt; + struct intel_mipmap_tree *s_mt = mt->stencil_mt; + bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT; + + if (map->mode & GL_MAP_WRITE_BIT) { + uint32_t *packed_map = map->ptr; + uint8_t *s_map = intel_miptree_map_raw(intel, s_mt); + uint32_t *z_map = intel_miptree_map_raw(intel, z_mt); + unsigned int s_image_x, s_image_y; + unsigned int z_image_x, z_image_y; + + intel_miptree_get_image_offset(s_mt, level, slice, + &s_image_x, &s_image_y); + intel_miptree_get_image_offset(z_mt, level, slice, + &z_image_x, &z_image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch, + x + s_image_x + map->x, + y + s_image_y + map->y, + intel->has_swizzling); + ptrdiff_t z_offset = ((y + z_image_y) * + (z_mt->region->pitch / 4) + + (x + z_image_x)); + + if (map_z32f_x24s8) { + z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; + s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1]; + } else { + uint32_t packed = packed_map[y * map->w + x]; + s_map[s_offset] = packed >> 24; + z_map[z_offset] = packed; + } + } + } + + intel_miptree_unmap_raw(intel, s_mt); + intel_miptree_unmap_raw(intel, z_mt); + + DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", + __FUNCTION__, + map->x, map->y, map->w, map->h, + z_mt, _mesa_get_format_name(z_mt->format), + map->x + z_image_x, map->y + z_image_y, + s_mt, map->x + s_image_x, map->y + s_image_y, + map->ptr, map->stride); + } + + free(map->buffer); +} + +/** + * Create and attach a map to the miptree at (level, slice). Return the + * attached map. + */ +static struct intel_miptree_map* +intel_miptree_attach_map(struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode) +{ + struct intel_miptree_map *map = calloc(1, sizeof(*map)); + + if (!map) + return NULL; + + assert(mt->level[level].slice[slice].map == NULL); + mt->level[level].slice[slice].map = map; + + map->mode = mode; + map->x = x; + map->y = y; + map->w = w; + map->h = h; + + return map; +} + +/** + * Release the map at (level, slice). + */ +static void +intel_miptree_release_map(struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map **map; + + map = &mt->level[level].slice[slice].map; + free(*map); + *map = NULL; +} + +static void +intel_miptree_map_singlesample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride) +{ + struct intel_miptree_map *map; + + assert(mt->num_samples <= 1); + + map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode); + if (!map){ + *out_ptr = NULL; + *out_stride = 0; + return; + } + + intel_miptree_slice_resolve_depth(intel, mt, level, slice); + if (map->mode & GL_MAP_WRITE_BIT) { + intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice); + } + + if (mt->format == MESA_FORMAT_S8) { + intel_miptree_map_s8(intel, mt, map, level, slice); + } else if (mt->etc_format != MESA_FORMAT_NONE && + !(mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_map_etc(intel, mt, map, level, slice); + } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_map_depthstencil(intel, mt, map, level, slice); + } + /* See intel_miptree_blit() for details on the 32k pitch limit. */ + else if (intel->has_llc && + !(mode & GL_MAP_WRITE_BIT) && + !mt->compressed && + (mt->region->tiling == I915_TILING_X || + (intel->gen >= 6 && mt->region->tiling == I915_TILING_Y)) && + mt->region->pitch < 32768) { + intel_miptree_map_blit(intel, mt, map, level, slice); + } else if (mt->region->tiling != I915_TILING_NONE && + mt->region->bo->size >= intel->max_gtt_map_object_size) { + assert(mt->region->pitch < 32768); + intel_miptree_map_blit(intel, mt, map, level, slice); + } else { + intel_miptree_map_gtt(intel, mt, map, level, slice); + } + + *out_ptr = map->ptr; + *out_stride = map->stride; + + if (map->ptr == NULL) + intel_miptree_release_map(mt, level, slice); +} + +static void +intel_miptree_unmap_singlesample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map *map = mt->level[level].slice[slice].map; + + assert(mt->num_samples <= 1); + + if (!map) + return; + + DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__, + mt, _mesa_get_format_name(mt->format), level, slice); + + if (mt->format == MESA_FORMAT_S8) { + intel_miptree_unmap_s8(intel, mt, map, level, slice); + } else if (mt->etc_format != MESA_FORMAT_NONE && + !(map->mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_unmap_etc(intel, mt, map, level, slice); + } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_unmap_depthstencil(intel, mt, map, level, slice); + } else if (map->mt) { + intel_miptree_unmap_blit(intel, mt, map, level, slice); + } else { + intel_miptree_unmap_gtt(intel, mt, map, level, slice); + } + + intel_miptree_release_map(mt, level, slice); +} + +static void +intel_miptree_map_multisample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride) +{ + struct intel_miptree_map *map; + + assert(mt->num_samples > 1); + + /* Only flat, renderbuffer-like miptrees are supported. */ + if (mt->target != GL_TEXTURE_2D || + mt->first_level != 0 || + mt->last_level != 0) { + _mesa_problem(&intel->ctx, "attempt to map a multisample miptree for " + "which (target, first_level, last_level != " + "(GL_TEXTURE_2D, 0, 0)"); + goto fail; + } + + map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode); + if (!map) + goto fail; + + if (!mt->singlesample_mt) { + mt->singlesample_mt = + intel_miptree_create_for_renderbuffer(intel, + mt->format, + mt->logical_width0, + mt->logical_height0, + 0 /*num_samples*/); + if (!mt->singlesample_mt) + goto fail; + + map->singlesample_mt_is_tmp = true; + mt->need_downsample = true; + } + + intel_miptree_downsample(intel, mt); + intel_miptree_map_singlesample(intel, mt->singlesample_mt, + level, slice, + x, y, w, h, + mode, + out_ptr, out_stride); + return; + +fail: + intel_miptree_release_map(mt, level, slice); + *out_ptr = NULL; + *out_stride = 0; +} + +static void +intel_miptree_unmap_multisample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map *map = mt->level[level].slice[slice].map; + + assert(mt->num_samples > 1); + + if (!map) + return; + + intel_miptree_unmap_singlesample(intel, mt->singlesample_mt, level, slice); + + mt->need_downsample = false; + if (map->mode & GL_MAP_WRITE_BIT) + intel_miptree_upsample(intel, mt); + + if (map->singlesample_mt_is_tmp) + intel_miptree_release(&mt->singlesample_mt); + + intel_miptree_release_map(mt, level, slice); +} + +void +intel_miptree_map(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride) +{ + if (mt->num_samples <= 1) + intel_miptree_map_singlesample(intel, mt, + level, slice, + x, y, w, h, + mode, + out_ptr, out_stride); + else + intel_miptree_map_multisample(intel, mt, + level, slice, + x, y, w, h, + mode, + out_ptr, out_stride); +} + +void +intel_miptree_unmap(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + if (mt->num_samples <= 1) + intel_miptree_unmap_singlesample(intel, mt, level, slice); + else + intel_miptree_unmap_multisample(intel, mt, level, slice); +} diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h new file mode 100644 index 00000000000..6dab092c8f3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -0,0 +1,788 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_MIPMAP_TREE_H +#define INTEL_MIPMAP_TREE_H + +#include <assert.h> + +#include "intel_regions.h" +#include "intel_resolve_map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* A layer on top of the intel_regions code which adds: + * + * - Code to size and layout a region to hold a set of mipmaps. + * - Query to determine if a new image fits in an existing tree. + * - More refcounting + * - maybe able to remove refcounting from intel_region? + * - ? + * + * The fixed mipmap layout of intel hardware where one offset + * specifies the position of all images in a mipmap hierachy + * complicates the implementation of GL texture image commands, + * compared to hardware where each image is specified with an + * independent offset. + * + * In an ideal world, each texture object would be associated with a + * single bufmgr buffer or 2d intel_region, and all the images within + * the texture object would slot into the tree as they arrive. The + * reality can be a little messier, as images can arrive from the user + * with sizes that don't fit in the existing tree, or in an order + * where the tree layout cannot be guessed immediately. + * + * This structure encodes an idealized mipmap tree. The GL image + * commands build these where possible, otherwise store the images in + * temporary system buffers. + */ + +struct intel_resolve_map; +struct intel_texture_image; + +/** + * When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a + * depthstencil-split-to-separate-stencil miptree, we'll normally make a + * tmeporary and recreate the kind of data requested by Mesa core, since we're + * satisfying some glGetTexImage() request or something. + * + * However, occasionally you want to actually map the miptree's current data + * without transcoding back. This flag to intel_miptree_map() gets you that. + */ +#define BRW_MAP_DIRECT_BIT 0x80000000 + +struct intel_miptree_map { + /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */ + GLbitfield mode; + /** Region of interest for the map. */ + int x, y, w, h; + /** Possibly malloced temporary buffer for the mapping. */ + void *buffer; + /** Possible pointer to a temporary linear miptree for the mapping. */ + struct intel_mipmap_tree *mt; + /** Pointer to the start of (map_x, map_y) returned by the mapping. */ + void *ptr; + /** Stride of the mapping. */ + int stride; + + /** + * intel_mipmap_tree::singlesample_mt is temporary storage that persists + * only for the duration of the map. + */ + bool singlesample_mt_is_tmp; +}; + +/** + * Describes the location of each texture image within a texture region. + */ +struct intel_mipmap_level +{ + /** Offset to this miptree level, used in computing x_offset. */ + GLuint level_x; + /** Offset to this miptree level, used in computing y_offset. */ + GLuint level_y; + GLuint width; + GLuint height; + + /** + * \brief Number of 2D slices in this miplevel. + * + * The exact semantics of depth varies according to the texture target: + * - For GL_TEXTURE_CUBE_MAP, depth is 6. + * - For GL_TEXTURE_2D_ARRAY, depth is the number of array slices. It is + * identical for all miplevels in the texture. + * - For GL_TEXTURE_3D, it is the texture's depth at this miplevel. Its + * value, like width and height, varies with miplevel. + * - For other texture types, depth is 1. + */ + GLuint depth; + + /** + * \brief List of 2D images in this mipmap level. + * + * This may be a list of cube faces, array slices in 2D array texture, or + * layers in a 3D texture. The list's length is \c depth. + */ + struct intel_mipmap_slice { + /** + * \name Offset to slice + * \{ + * + * Hardware formats are so diverse that that there is no unified way to + * compute the slice offsets, so we store them in this table. + * + * The (x, y) offset to slice \c s at level \c l relative the miptrees + * base address is + * \code + * x = mt->level[l].slice[s].x_offset + * y = mt->level[l].slice[s].y_offset + */ + GLuint x_offset; + GLuint y_offset; + /** \} */ + + /** + * Mapping information. Persistent for the duration of + * intel_miptree_map/unmap on this slice. + */ + struct intel_miptree_map *map; + + /** + * \brief Is HiZ enabled for this slice? + * + * If \c mt->level[l].slice[s].has_hiz is set, then (1) \c mt->hiz_mt + * has been allocated and (2) the HiZ memory corresponding to this slice + * resides at \c mt->hiz_mt->level[l].slice[s]. + */ + bool has_hiz; + } *slice; +}; + +/** + * Enum for keeping track of the different MSAA layouts supported by Gen7. + */ +enum intel_msaa_layout +{ + /** + * Ordinary surface with no MSAA. + */ + INTEL_MSAA_LAYOUT_NONE, + + /** + * Interleaved Multisample Surface. The additional samples are + * accommodated by scaling up the width and the height of the surface so + * that all the samples corresponding to a pixel are located at nearby + * memory locations. + */ + INTEL_MSAA_LAYOUT_IMS, + + /** + * Uncompressed Multisample Surface. The surface is stored as a 2D array, + * with array slice n containing all pixel data for sample n. + */ + INTEL_MSAA_LAYOUT_UMS, + + /** + * Compressed Multisample Surface. The surface is stored as in + * INTEL_MSAA_LAYOUT_UMS, but there is an additional buffer called the MCS + * (Multisample Control Surface) buffer. Each pixel in the MCS buffer + * indicates the mapping from sample number to array slice. This allows + * the common case (where all samples constituting a pixel have the same + * color value) to be stored efficiently by just using a single array + * slice. + */ + INTEL_MSAA_LAYOUT_CMS, +}; + + +#ifndef I915 +/** + * Enum for keeping track of the state of an MCS buffer associated with a + * miptree. This determines when fast clear related operations are needed. + * + * Fast clear works by deferring the memory writes that would be used to clear + * the buffer, so that instead of performing them at the time of the clear + * operation, the hardware automatically performs them at the time that the + * buffer is later accessed for rendering. The MCS buffer keeps track of + * which regions of the buffer still have pending clear writes. + * + * This enum keeps track of the driver's knowledge of the state of the MCS + * buffer. + * + * MCS buffers only exist on Gen7+. + */ +enum intel_mcs_state +{ + /** + * There is no MCS buffer for this miptree, and one should never be + * allocated. + */ + INTEL_MCS_STATE_NONE, + + /** + * An MCS buffer exists for this miptree, and it is used for MSAA purposes. + */ + INTEL_MCS_STATE_MSAA, + + /** + * No deferred clears are pending for this miptree, and the contents of the + * color buffer are entirely correct. An MCS buffer may or may not exist + * for this miptree. If it does exist, it is entirely in the "no deferred + * clears pending" state. If it does not exist, it will be created the + * first time a fast color clear is executed. + * + * In this state, the color buffer can be used for purposes other than + * rendering without needing a render target resolve. + */ + INTEL_MCS_STATE_RESOLVED, + + /** + * An MCS buffer exists for this miptree, and deferred clears are pending + * for some regions of the color buffer, as indicated by the MCS buffer. + * The contents of the color buffer are only correct for the regions where + * the MCS buffer doesn't indicate a deferred clear. + * + * In this state, a render target resolve must be performed before the + * color buffer can be used for purposes other than rendering. + */ + INTEL_MCS_STATE_UNRESOLVED, + + /** + * An MCS buffer exists for this miptree, and deferred clears are pending + * for the entire color buffer, and the contents of the MCS buffer reflect + * this. The contents of the color buffer are undefined. + * + * In this state, a render target resolve must be performed before the + * color buffer can be used for purposes other than rendering. + * + * If the client attempts to clear a buffer which is already in this state, + * the clear can be safely skipped, since the buffer is already clear. + */ + INTEL_MCS_STATE_CLEAR, +}; +#endif + +struct intel_mipmap_tree +{ + /* Effectively the key: + */ + GLenum target; + + /** + * Generally, this is just the same as the gl_texture_image->TexFormat or + * gl_renderbuffer->Format. + * + * However, for textures and renderbuffers with packed depth/stencil formats + * on hardware where we want or need to use separate stencil, there will be + * two miptrees for storing the data. If the depthstencil texture or rb is + * MESA_FORMAT_Z32_FLOAT_X24S8, then mt->format will be + * MESA_FORMAT_Z32_FLOAT, otherwise for MESA_FORMAT_S8_Z24 objects it will be + * MESA_FORMAT_X8_Z24. + * + * For ETC1/ETC2 textures, this is one of the uncompressed mesa texture + * formats if the hardware lacks support for ETC1/ETC2. See @ref wraps_etc. + */ + gl_format format; + + /** This variable stores the value of ETC compressed texture format */ + gl_format etc_format; + + /** + * The X offset of each image in the miptree must be aligned to this. See + * the "Alignment Unit Size" section of the BSpec. + */ + unsigned int align_w; + unsigned int align_h; /**< \see align_w */ + + GLuint first_level; + GLuint last_level; + + /** + * Level zero image dimensions. These dimensions correspond to the + * physical layout of data in memory. Accordingly, they account for the + * extra width, height, and or depth that must be allocated in order to + * accommodate multisample formats, and they account for the extra factor + * of 6 in depth that must be allocated in order to accommodate cubemap + * textures. + */ + GLuint physical_width0, physical_height0, physical_depth0; + + GLuint cpp; + GLuint num_samples; + bool compressed; + + /** + * Level zero image dimensions. These dimensions correspond to the + * logical width, height, and depth of the region as seen by client code. + * Accordingly, they do not account for the extra width, height, and/or + * depth that must be allocated in order to accommodate multisample + * formats, nor do they account for the extra factor of 6 in depth that + * must be allocated in order to accommodate cubemap textures. + */ + uint32_t logical_width0, logical_height0, logical_depth0; + + /** + * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true + * if the surface only contains LOD 0, and hence no space is for LOD's + * other than 0 in between array slices. + * + * Corresponds to the surface_array_spacing bit in gen7_surface_state. + */ + bool array_spacing_lod0; + + /** + * MSAA layout used by this buffer. + */ + enum intel_msaa_layout msaa_layout; + + /* Derived from the above: + */ + GLuint total_width; + GLuint total_height; + + /* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to + * this depth mipmap tree, if any. + */ + uint32_t depth_clear_value; + + /* Includes image offset tables: + */ + struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct intel_region *region; + + /* Offset into region bo where miptree starts: + */ + uint32_t offset; + + /** + * \brief Singlesample miptree. + * + * This is used under two cases. + * + * --- Case 1: As persistent singlesample storage for multisample window + * system front and back buffers --- + * + * Suppose that the window system FBO was created with a multisample + * config. Let `back_irb` be the `intel_renderbuffer` for the FBO's back + * buffer. Then `back_irb` contains two miptrees: a parent multisample + * miptree (back_irb->mt) and a child singlesample miptree + * (back_irb->mt->singlesample_mt). The DRM buffer shared with DRI2 + * belongs to `back_irb->mt->singlesample_mt` and contains singlesample + * data. The singlesample miptree is created at the same time as and + * persists for the lifetime of its parent multisample miptree. + * + * When access to the singlesample data is needed, such as at + * eglSwapBuffers and glReadPixels, an automatic downsample occurs from + * `back_rb->mt` to `back_rb->mt->singlesample_mt` when necessary. + * + * This description of the back buffer applies analogously to the front + * buffer. + * + * + * --- Case 2: As temporary singlesample storage for mapping multisample + * miptrees --- + * + * Suppose the intel_miptree_map is called on a multisample miptree, `mt`, + * for which case 1 does not apply (that is, `mt` does not belong to + * a front or back buffer). Then `mt->singlesample_mt` is null at the + * start of the call. intel_miptree_map will create a temporary + * singlesample miptree, store it at `mt->singlesample_mt`, downsample from + * `mt` to `mt->singlesample_mt` if necessary, then map + * `mt->singlesample_mt`. The temporary miptree is later deleted during + * intel_miptree_unmap. + */ + struct intel_mipmap_tree *singlesample_mt; + + /** + * \brief A downsample is needed from this miptree to singlesample_mt. + */ + bool need_downsample; + + /** + * \brief HiZ miptree + * + * The hiz miptree contains the miptree's hiz buffer. To allocate the hiz + * miptree, use intel_miptree_alloc_hiz(). + * + * To determine if hiz is enabled, do not check this pointer. Instead, use + * intel_miptree_slice_has_hiz(). + */ + struct intel_mipmap_tree *hiz_mt; + + /** + * \brief Map of miptree slices to needed resolves. + * + * This is used only when the miptree has a child HiZ miptree. + * + * Let \c mt be a depth miptree with HiZ enabled. Then the resolve map is + * \c mt->hiz_map. The resolve map of the child HiZ miptree, \c + * mt->hiz_mt->hiz_map, is unused. + */ + struct intel_resolve_map hiz_map; + + /** + * \brief Stencil miptree for depthstencil textures. + * + * This miptree is used for depthstencil textures and renderbuffers that + * require separate stencil. It always has the true copy of the stencil + * bits, regardless of mt->format. + * + * \see intel_miptree_map_depthstencil() + * \see intel_miptree_unmap_depthstencil() + */ + struct intel_mipmap_tree *stencil_mt; + +#ifndef I915 + /** + * \brief MCS miptree. + * + * This miptree contains the "multisample control surface", which stores + * the necessary information to implement compressed MSAA + * (INTEL_MSAA_FORMAT_CMS) and "fast color clear" behaviour on Gen7+. + * + * NULL if no MCS miptree is in use for this surface. + */ + struct intel_mipmap_tree *mcs_mt; + + /** + * MCS state for this buffer. + */ + enum intel_mcs_state mcs_state; +#endif + + /** + * The SURFACE_STATE bits associated with the last fast color clear to this + * color mipmap tree, if any. + * + * This value will only ever contain ones in bits 28-31, so it is safe to + * OR into dword 7 of SURFACE_STATE. + */ + uint32_t fast_clear_color_value; + + /* These are also refcounted: + */ + GLuint refcount; +}; + +enum intel_miptree_tiling_mode { + INTEL_MIPTREE_TILING_ANY, + INTEL_MIPTREE_TILING_Y, + INTEL_MIPTREE_TILING_NONE, +}; + +bool +intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_get_non_msrt_mcs_alignment(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned *width_px, unsigned *height); + +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool expect_accelerated_upload, + GLuint num_samples, + enum intel_miptree_tiling_mode); + +struct intel_mipmap_tree * +intel_miptree_create_layout(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool for_bo, + GLuint num_samples); + +struct intel_mipmap_tree * +intel_miptree_create_for_bo(struct intel_context *intel, + drm_intel_bo *bo, + gl_format format, + uint32_t offset, + uint32_t width, + uint32_t height, + int pitch, + uint32_t tiling); + +struct intel_mipmap_tree* +intel_miptree_create_for_dri2_buffer(struct intel_context *intel, + unsigned dri_attachment, + gl_format format, + uint32_t num_samples, + struct intel_region *region); + +/** + * Create a miptree appropriate as the storage for a non-texture renderbuffer. + * The miptree has the following properties: + * - The target is GL_TEXTURE_2D. + * - There are no levels other than the base level 0. + * - Depth is 1. + */ +struct intel_mipmap_tree* +intel_miptree_create_for_renderbuffer(struct intel_context *intel, + gl_format format, + uint32_t width, + uint32_t height, + uint32_t num_samples); + +/** \brief Assert that the level and layer are valid for the miptree. */ +static inline void +intel_miptree_check_level_layer(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + assert(level >= mt->first_level); + assert(level <= mt->last_level); + assert(layer < mt->level[level].depth); +} + +int intel_miptree_pitch_align (struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling, + int pitch); + +void intel_miptree_reference(struct intel_mipmap_tree **dst, + struct intel_mipmap_tree *src); + +void intel_miptree_release(struct intel_mipmap_tree **mt); + +/* Check if an image fits an existing mipmap tree layout + */ +bool intel_miptree_match_image(struct intel_mipmap_tree *mt, + struct gl_texture_image *image); + +void +intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + GLuint *x, GLuint *y); + +void +intel_miptree_get_dimensions_for_image(struct gl_texture_image *image, + int *width, int *height, int *depth); + +uint32_t +intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + uint32_t *tile_x, + uint32_t *tile_y); + +void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d); + +void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, + GLuint img, GLuint x, GLuint y); + +void +intel_miptree_copy_teximage(struct intel_context *intel, + struct intel_texture_image *intelImage, + struct intel_mipmap_tree *dst_mt, bool invalidate); + +/** + * Copy the stencil data from \c mt->stencil_mt->region to \c mt->region for + * the given miptree slice. + * + * \see intel_mipmap_tree::stencil_mt + */ +void +intel_miptree_s8z24_scatter(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t slice); + +/** + * Copy the stencil data in \c mt->stencil_mt->region to \c mt->region for the + * given miptree slice. + * + * \see intel_mipmap_tree::stencil_mt + */ +void +intel_miptree_s8z24_gather(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +bool +intel_miptree_alloc_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt, + GLuint num_samples); + +/** + * \name Miptree HiZ functions + * \{ + * + * It is safe to call the "slice_set_need_resolve" and "slice_resolve" + * functions on a miptree without HiZ. In that case, each function is a no-op. + */ + +/** + * \brief Allocate the miptree's embedded HiZ miptree. + * \see intel_mipmap_tree:hiz_mt + * \return false if allocation failed + */ + +bool +intel_miptree_alloc_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +bool +intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +void +intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t depth); +void +intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_slice_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_slice_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_all_slices_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/**\}*/ + +/** + * Update the fast clear state for a miptree to indicate that it has been used + * for rendering. + */ +static inline void +intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* Nothing needs to be done for I915, since it doesn't support fast + * clear. + */ +#else + /* If the buffer was previously in fast clear state, change it to + * unresolved state, since it won't be guaranteed to be clear after + * rendering occurs. + */ + if (mt->mcs_state == INTEL_MCS_STATE_CLEAR) + mt->mcs_state = INTEL_MCS_STATE_UNRESOLVED; +#endif +} + +void +intel_miptree_resolve_color(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_make_shareable(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_downsample(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_upsample(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/* i915_mipmap_tree.c: + */ +void i915_miptree_layout(struct intel_mipmap_tree *mt); +void i945_miptree_layout(struct intel_mipmap_tree *mt); +void brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void *intel_miptree_map_raw(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void intel_miptree_unmap_raw(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_map(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride); + +void +intel_miptree_unmap(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice); + +#ifdef I915 +static inline void +intel_hiz_exec(struct intel_context *intel, struct intel_mipmap_tree *mt, + unsigned int level, unsigned int layer, enum gen6_hiz_op op) +{ + /* Stub on i915. It would be nice if we didn't execute resolve code at all + * there. + */ +} +#else +void +intel_hiz_exec(struct intel_context *intel, struct intel_mipmap_tree *mt, + unsigned int level, unsigned int layer, enum gen6_hiz_op op); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c index d733c5e8745..f58cb855e60 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_pixel.c +++ b/src/mesa/drivers/dri/i965/intel_pixel.c @@ -1 +1,135 @@ -../intel/intel_pixel.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/accum.h" +#include "main/enums.h" +#include "main/state.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "swrast/swrast.h" + +#include "intel_context.h" +#include "intel_pixel.h" +#include "intel_regions.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +static GLenum +effective_func(GLenum func, bool src_alpha_is_one) +{ + if (src_alpha_is_one) { + if (func == GL_SRC_ALPHA) + return GL_ONE; + if (func == GL_ONE_MINUS_SRC_ALPHA) + return GL_ZERO; + } + + return func; +} + +/** + * Check if any fragment operations are in effect which might effect + * glDraw/CopyPixels. + */ +bool +intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one) +{ + if (ctx->NewState) + _mesa_update_state(ctx); + + if (ctx->FragmentProgram._Enabled) { + DBG("fallback due to fragment program\n"); + return false; + } + + if (ctx->Color.BlendEnabled && + (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO || + ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD || + effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO || + ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) { + DBG("fallback due to blend\n"); + return false; + } + + if (ctx->Texture._EnabledUnits) { + DBG("fallback due to texturing\n"); + return false; + } + + if (!(ctx->Color.ColorMask[0][0] && + ctx->Color.ColorMask[0][1] && + ctx->Color.ColorMask[0][2] && + ctx->Color.ColorMask[0][3])) { + DBG("fallback due to color masking\n"); + return false; + } + + if (ctx->Color.AlphaEnabled) { + DBG("fallback due to alpha\n"); + return false; + } + + if (ctx->Depth.Test) { + DBG("fallback due to depth test\n"); + return false; + } + + if (ctx->Fog.Enabled) { + DBG("fallback due to fog\n"); + return false; + } + + if (ctx->_ImageTransferState) { + DBG("fallback due to image transfer\n"); + return false; + } + + if (ctx->Stencil._Enabled) { + DBG("fallback due to image stencil\n"); + return false; + } + + if (ctx->RenderMode != GL_RENDER) { + DBG("fallback due to render mode\n"); + return false; + } + + return true; +} + +void +intelInitPixelFuncs(struct dd_function_table *functions) +{ + functions->Accum = _mesa_accum; + functions->Bitmap = intelBitmap; + functions->CopyPixels = intelCopyPixels; + functions->DrawPixels = intelDrawPixels; + functions->ReadPixels = intelReadPixels; +} + diff --git a/src/mesa/drivers/dri/i965/intel_pixel.h b/src/mesa/drivers/dri/i965/intel_pixel.h new file mode 100644 index 00000000000..7578e6f0add --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_PIXEL_H +#define INTEL_PIXEL_H + +#include "main/mtypes.h" + +void intelInitPixelFuncs(struct dd_function_table *functions); +bool intel_check_blit_fragment_ops(struct gl_context * ctx, + bool src_alpha_is_one); + +void intelReadPixels(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid * pixels); + +void intelDrawPixels(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, + GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels); + +void intelCopyPixels(struct gl_context * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint destx, GLint desty, GLenum type); + +void intelBitmap(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index 9085c7b0397..8c0edf2dd85 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -1 +1,363 @@ -../intel/intel_pixel_bitmap.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/colormac.h" +#include "main/condrender.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/pbo.h" +#include "main/bufferobj.h" +#include "main/state.h" +#include "main/texobj.h" +#include "main/context.h" +#include "main/fbobject.h" +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_fbo.h" +#include "intel_regions.h" +#include "intel_buffers.h" +#include "intel_pixel.h" +#include "intel_reg.h" + + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + + +/* Unlike the other intel_pixel_* functions, the expectation here is + * that the incoming data is not in a PBO. With the XY_TEXT blit + * method, there's no benefit haveing it in a PBO, but we could + * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit + * PBO bitmaps. I think they are probably pretty rare though - I + * wonder if Xgl uses them? + */ +static const GLubyte *map_pbo( struct gl_context *ctx, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + GLubyte *buf; + + if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, + GL_COLOR_INDEX, GL_BITMAP, + INT_MAX, (const GLvoid *) bitmap)) { + _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)"); + return NULL; + } + + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); + if (!buf) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); + return NULL; + } + + return ADD_POINTERS(buf, bitmap); +} + +static bool test_bit( const GLubyte *src, GLuint bit ) +{ + return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; +} + +static void set_bit( GLubyte *dest, GLuint bit ) +{ + dest[bit/8] |= 1 << (bit % 8); +} + +/* Extract a rectangle's worth of data from the bitmap. Called + * per chunk of HW-sized bitmap. + */ +static GLuint get_bitmap_rect(GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap, + GLuint x, GLuint y, + GLuint w, GLuint h, + GLubyte *dest, + GLuint row_align, + bool invert) +{ + GLuint src_offset = (x + unpack->SkipPixels) & 0x7; + GLuint mask = unpack->LsbFirst ? 0 : 7; + GLuint bit = 0; + GLint row, col; + GLint first, last; + GLint incr; + GLuint count = 0; + + DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", + __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); + + if (invert) { + first = h-1; + last = 0; + incr = -1; + } + else { + first = 0; + last = h-1; + incr = 1; + } + + /* Require that dest be pre-zero'd. + */ + for (row = first; row != (last+incr); row += incr) { + const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, + width, height, + GL_COLOR_INDEX, GL_BITMAP, + y + row, x); + + for (col = 0; col < w; col++, bit++) { + if (test_bit(rowsrc, (col + src_offset) ^ mask)) { + set_bit(dest, bit ^ 7); + count++; + } + } + + if (row_align) + bit = ALIGN(bit, row_align); + } + + return count; +} + +/** + * Returns the low Y value of the vertical range given, flipped according to + * whether the framebuffer is or not. + */ +static INLINE int +y_flip(struct gl_framebuffer *fb, int y, int height) +{ + if (_mesa_is_user_fbo(fb)) + return y; + else + return fb->Height - y - height; +} + +/* + * Render a bitmap. + */ +static bool +do_blit_bitmap( struct gl_context *ctx, + GLint dstx, GLint dsty, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_renderbuffer *irb; + GLfloat tmpColor[4]; + GLubyte ubcolor[4]; + GLuint color; + GLsizei bitmap_width = width; + GLsizei bitmap_height = height; + GLint px, py; + GLuint stipple[32]; + GLint orig_dstx = dstx; + GLint orig_dsty = dsty; + + /* Update draw buffer bounds */ + _mesa_update_state(ctx); + + if (ctx->Depth.Test) { + /* The blit path produces incorrect results when depth testing is on. + * It seems the blit Z coord is always 1.0 (the far plane) so fragments + * will likely be obscured by other, closer geometry. + */ + return false; + } + + intel_prepare_render(intel); + + if (fb->_NumColorDrawBuffers != 1) { + perf_debug("accelerated glBitmap() only supports rendering to a " + "single color buffer\n"); + return false; + } + + irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + + if (_mesa_is_bufferobj(unpack->BufferObj)) { + bitmap = map_pbo(ctx, width, height, unpack, bitmap); + if (bitmap == NULL) + return true; /* even though this is an error, we're done */ + } + + COPY_4V(tmpColor, ctx->Current.RasterColor); + + if (_mesa_need_secondary_color(ctx)) { + ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor); + } + + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); + + switch (irb->mt->format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]); + break; + case MESA_FORMAT_RGB565: + color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]); + break; + default: + perf_debug("Unsupported format %s in accelerated glBitmap()\n", + _mesa_get_format_name(irb->mt->format)); + return false; + } + + if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) + return false; + + /* Clip to buffer bounds and scissor. */ + if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, + fb->_Xmax, fb->_Ymax, + &dstx, &dsty, &width, &height)) + goto out; + + dsty = y_flip(fb, dsty, height); + +#define DY 32 +#define DX 32 + + /* The blitter has no idea about fast color clears, so we need to resolve + * the miptree before we do anything. + */ + intel_miptree_resolve_color(intel, irb->mt); + + /* Chop it all into chunks that can be digested by hardware: */ + for (py = 0; py < height; py += DY) { + for (px = 0; px < width; px += DX) { + int h = MIN2(DY, height - py); + int w = MIN2(DX, width - px); + GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; + GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY; + + assert(sz <= sizeof(stipple)); + memset(stipple, 0, sz); + + /* May need to adjust this when padding has been introduced in + * sz above: + * + * Have to translate destination coordinates back into source + * coordinates. + */ + int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack, + bitmap, + -orig_dstx + (dstx + px), + -orig_dsty + y_flip(fb, dsty + py, h), + w, h, + (GLubyte *)stipple, + 8, + _mesa_is_winsys_fbo(fb)); + if (count == 0) + continue; + + if (!intelEmitImmediateColorExpandBlit(intel, + irb->mt->cpp, + (GLubyte *)stipple, + sz, + color, + irb->mt->region->pitch, + irb->mt->region->bo, + 0, + irb->mt->region->tiling, + dstx + px, + dsty + py, + w, h, + logic_op)) { + return false; + } + + if (ctx->Query.CurrentOcclusionObject) + ctx->Query.CurrentOcclusionObject->Result += count; + } + } +out: + + if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) + intel_batchbuffer_flush(intel); + + if (_mesa_is_bufferobj(unpack->BufferObj)) { + /* done with PBO so unmap it now */ + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); + } + + intel_check_front_buffer_rendering(intel); + + return true; +} + + +/* There are a large number of possible ways to implement bitmap on + * this hardware, most of them have some sort of drawback. Here are a + * few that spring to mind: + * + * Blit: + * - XY_MONO_SRC_BLT_CMD + * - use XY_SETUP_CLIP_BLT for cliprect clipping. + * - XY_TEXT_BLT + * - XY_TEXT_IMMEDIATE_BLT + * - blit per cliprect, subject to maximum immediate data size. + * - XY_COLOR_BLT + * - per pixel or run of pixels + * - XY_PIXEL_BLT + * - good for sparse bitmaps + * + * 3D engine: + * - Point per pixel + * - Translate bitmap to an alpha texture and render as a quad + * - Chop bitmap up into 32x32 squares and render w/polygon stipple. + */ +void +intelBitmap(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels) +{ + if (!_mesa_check_conditional_render(ctx)) + return; + + if (do_blit_bitmap(ctx, x, y, width, height, + unpack, pixels)) + return; + + _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels); +} diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c index ee433605904..ba8e06f1f95 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c @@ -1 +1,210 @@ -../intel/intel_pixel_copy.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/image.h" +#include "main/state.h" +#include "main/mtypes.h" +#include "main/condrender.h" +#include "main/fbobject.h" +#include "drivers/common/meta.h" + +#include "intel_context.h" +#include "intel_buffers.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_fbo.h" +#include "intel_blit.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +/** + * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. + */ +static bool +do_blit_copypixels(struct gl_context * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct gl_framebuffer *read_fb = ctx->ReadBuffer; + GLint orig_dstx; + GLint orig_dsty; + GLint orig_srcx; + GLint orig_srcy; + struct intel_renderbuffer *draw_irb = NULL; + struct intel_renderbuffer *read_irb = NULL; + + /* Update draw buffer bounds */ + _mesa_update_state(ctx); + + switch (type) { + case GL_COLOR: + if (fb->_NumColorDrawBuffers != 1) { + perf_debug("glCopyPixels() fallback: MRT\n"); + return false; + } + + draw_irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + read_irb = intel_renderbuffer(read_fb->_ColorReadBuffer); + break; + case GL_DEPTH_STENCIL_EXT: + draw_irb = intel_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); + read_irb = + intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); + break; + case GL_DEPTH: + perf_debug("glCopyPixels() fallback: GL_DEPTH\n"); + return false; + case GL_STENCIL: + perf_debug("glCopyPixels() fallback: GL_STENCIL\n"); + return false; + default: + perf_debug("glCopyPixels(): Unknown type\n"); + return false; + } + + if (!draw_irb) { + perf_debug("glCopyPixels() fallback: missing draw buffer\n"); + return false; + } + + if (!read_irb) { + perf_debug("glCopyPixels() fallback: missing read buffer\n"); + return false; + } + + if (ctx->_ImageTransferState) { + perf_debug("glCopyPixels(): Unsupported image transfer state\n"); + return false; + } + + if (ctx->Depth.Test) { + perf_debug("glCopyPixels(): Unsupported depth test state\n"); + return false; + } + + if (ctx->Stencil._Enabled) { + perf_debug("glCopyPixels(): Unsupported stencil test state\n"); + return false; + } + + if (ctx->Fog.Enabled || + ctx->Texture._EnabledUnits || + ctx->FragmentProgram._Enabled) { + perf_debug("glCopyPixels(): Unsupported fragment shader state\n"); + return false; + } + + if (ctx->Color.AlphaEnabled || + ctx->Color.BlendEnabled) { + perf_debug("glCopyPixels(): Unsupported blend state\n"); + return false; + } + + if (!ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3]) { + perf_debug("glCopyPixels(): Unsupported color mask state\n"); + return false; + } + + if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) { + perf_debug("glCopyPixles(): Unsupported pixel zoom\n"); + return false; + } + + intel_prepare_render(intel); + + intel_flush(&intel->ctx); + + /* Clip to destination buffer. */ + orig_dstx = dstx; + orig_dsty = dsty; + if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, + fb->_Xmax, fb->_Ymax, + &dstx, &dsty, &width, &height)) + goto out; + /* Adjust src coords for our post-clipped destination origin */ + srcx += dstx - orig_dstx; + srcy += dsty - orig_dsty; + + /* Clip to source buffer. */ + orig_srcx = srcx; + orig_srcy = srcy; + if (!_mesa_clip_to_region(0, 0, + read_fb->Width, read_fb->Height, + &srcx, &srcy, &width, &height)) + goto out; + /* Adjust dst coords for our post-clipped source origin */ + dstx += srcx - orig_srcx; + dsty += srcy - orig_srcy; + + if (!intel_miptree_blit(intel, + read_irb->mt, read_irb->mt_level, read_irb->mt_layer, + srcx, srcy, _mesa_is_winsys_fbo(read_fb), + draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer, + dstx, dsty, _mesa_is_winsys_fbo(fb), + width, height, + (ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY))) { + DBG("%s: blit failure\n", __FUNCTION__); + return false; + } + + if (ctx->Query.CurrentOcclusionObject) + ctx->Query.CurrentOcclusionObject->Result += width * height; + +out: + intel_check_front_buffer_rendering(intel); + + DBG("%s: success\n", __FUNCTION__); + return true; +} + + +void +intelCopyPixels(struct gl_context * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint destx, GLint desty, GLenum type) +{ + DBG("%s\n", __FUNCTION__); + + if (!_mesa_check_conditional_render(ctx)) + return; + + if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) + return; + + /* this will use swrast if needed */ + _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); +} diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index 8431a24edfc..2ec7ed8e269 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -1 +1,58 @@ -../intel/intel_pixel_draw.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mtypes.h" +#include "main/teximage.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + +#include "intel_context.h" +#include "intel_pixel.h" + +void +intelDrawPixels(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, + GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels) +{ + if (format == GL_STENCIL_INDEX) { + _swrast_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; + } + + _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); +} diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index cc4589f4d42..26eb4965b8d 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -1 +1,202 @@ -../intel/intel_pixel_read.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/fbobject.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/readpix.h" +#include "main/state.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_buffer_objects.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +/* For many applications, the new ability to pull the source buffers + * back out of the GTT and then do the packing/conversion operations + * in software will be as much of an improvement as trying to get the + * blitter and/or texture engine to do the work. + * + * This step is gated on private backbuffers. + * + * Obviously the frontbuffer can't be pulled back, so that is either + * an argument for blit/texture readpixels, or for blitting to a + * temporary and then pulling that back. + * + * When the destination is a pbo, however, it's not clear if it is + * ever going to be pulled to main memory (though the access param + * will be a good hint). So it sounds like we do want to be able to + * choose between blit/texture implementation on the gpu and pullback + * and cpu-based copying. + * + * Unless you can magically turn client memory into a PBO for the + * duration of this call, there will be a cpu-based copying step in + * any case. + */ + +static bool +do_blit_readpixels(struct gl_context * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); + GLuint dst_offset; + drm_intel_bo *dst_buffer; + bool all; + GLint dst_x, dst_y; + GLuint dirty; + + DBG("%s\n", __FUNCTION__); + + assert(_mesa_is_bufferobj(pack->BufferObj)); + + struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (ctx->_ImageTransferState || + !_mesa_format_matches_format_and_type(irb->mt->format, format, type, + false)) { + DBG("%s - bad format for blit\n", __FUNCTION__); + return false; + } + + if (pack->SwapBytes || pack->LsbFirst) { + DBG("%s: bad packing params\n", __FUNCTION__); + return false; + } + + int dst_stride = _mesa_image_row_stride(pack, width, format, type); + bool dst_flip = false; + /* Mesa flips the dst_stride for pack->Invert, but we want our mt to have a + * normal dst_stride. + */ + if (pack->Invert) { + dst_stride = -dst_stride; + dst_flip = true; + } + + dst_offset = (GLintptr)pixels; + dst_offset += _mesa_image_offset(2, pack, width, height, + format, type, 0, 0, 0); + + if (!_mesa_clip_copytexsubimage(ctx, + &dst_x, &dst_y, + &x, &y, + &width, &height)) { + return true; + } + + dirty = intel->front_buffer_dirty; + intel_prepare_render(intel); + intel->front_buffer_dirty = dirty; + + all = (width * height * irb->mt->cpp == dst->Base.Size && + x == 0 && dst_offset == 0); + + dst_buffer = intel_bufferobj_buffer(intel, dst, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); + + struct intel_mipmap_tree *pbo_mt = + intel_miptree_create_for_bo(intel, + dst_buffer, + irb->mt->format, + dst_offset, + width, height, + dst_stride, I915_TILING_NONE); + + if (!intel_miptree_blit(intel, + irb->mt, irb->mt_level, irb->mt_layer, + x, y, _mesa_is_winsys_fbo(ctx->ReadBuffer), + pbo_mt, 0, 0, + 0, 0, dst_flip, + width, height, GL_COPY)) { + return false; + } + + intel_miptree_release(&pbo_mt); + + DBG("%s - DONE\n", __FUNCTION__); + + return true; +} + +void +intelReadPixels(struct gl_context * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + bool dirty; + + intel_flush_rendering_to_batch(ctx); + + DBG("%s\n", __FUNCTION__); + + if (_mesa_is_bufferobj(pack->BufferObj)) { + /* Using PBOs, so try the BLT based path. */ + if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, + pixels)) { + return; + } + + perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); + } + + /* glReadPixels() wont dirty the front buffer, so reset the dirty + * flag after calling intel_prepare_render(). */ + dirty = intel->front_buffer_dirty; + intel_prepare_render(intel); + intel->front_buffer_dirty = dirty; + + /* Update Mesa state before calling _mesa_readpixels(). + * XXX this may not be needed since ReadPixels no longer uses the + * span code. + */ + + if (ctx->NewState) + _mesa_update_state(ctx); + + _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); + + /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ + intel->front_buffer_dirty = dirty; +} diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h new file mode 100644 index 00000000000..dd91a1586c7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_NOOP (CMD_MI | 0) + +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) + +#define MI_FLUSH (CMD_MI | (4 << 23)) +#define FLUSH_MAP_CACHE (1 << 0) +#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) + +#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23)) + +#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23)) +# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3) + +/** @{ + * + * PIPE_CONTROL operation, a combination MI_FLUSH and register write with + * additional flushing control. + */ +#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24)) +#define PIPE_CONTROL_CS_STALL (1 << 20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19) +#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18) +#define PIPE_CONTROL_SYNC_GFDT (1 << 17) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16) +#define PIPE_CONTROL_NO_WRITE (0 << 14) +#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) +#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) +#define PIPE_CONTROL_TC_FLUSH (1 << 10) /* GM45+ only */ +#define PIPE_CONTROL_ISP_DIS (1 << 9) +#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +/* GT */ +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) +#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) +#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) + +/** @} */ + +/** @{ + * 915 definitions + * + * 915 documents say that bits 31:28 and 1 are "undefined, must be zero." + */ +#define S0_VB_OFFSET_MASK 0x0ffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) +/** @} */ + +/** @{ + * 830 definitions + */ +#define S0_VB_OFFSET_MASK_830 0xffffff80 +#define S0_VB_PITCH_SHIFT_830 1 +#define S0_VB_ENABLE_830 (1<<0) +/** @} */ + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) +#define S2_TEX_COUNT_SHIFT_830 12 +#define S2_VERTEX_1_WIDTH_SHIFT_830 0 +#define S2_VERTEX_0_WIDTH_SHIFT_830 6 +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22)) + +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22)) + +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22)) + +#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) +# define XY_TEXT_BYTE_PACKED (1 << 16) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_8 (0x0 << 24) +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + +/* Pipeline Statistics Counter Registers */ +#define IA_VERTICES_COUNT 0x2310 +#define IA_PRIMITIVES_COUNT 0x2318 +#define VS_INVOCATION_COUNT 0x2320 +#define HS_INVOCATION_COUNT 0x2300 +#define DS_INVOCATION_COUNT 0x2308 +#define GS_INVOCATION_COUNT 0x2328 +#define GS_PRIMITIVES_COUNT 0x2330 +#define CL_INVOCATION_COUNT 0x2338 +#define CL_PRIMITIVES_COUNT 0x2340 +#define PS_INVOCATION_COUNT 0x2348 +#define PS_DEPTH_COUNT 0x2350 + +#define SO_NUM_PRIM_STORAGE_NEEDED 0x2280 +#define SO_PRIM_STORAGE_NEEDED0_IVB 0x5240 +#define SO_PRIM_STORAGE_NEEDED1_IVB 0x5248 +#define SO_PRIM_STORAGE_NEEDED2_IVB 0x5250 +#define SO_PRIM_STORAGE_NEEDED3_IVB 0x5258 + +#define SO_NUM_PRIMS_WRITTEN 0x2288 +#define SO_NUM_PRIMS_WRITTEN0_IVB 0x5200 +#define SO_NUM_PRIMS_WRITTEN1_IVB 0x5208 +#define SO_NUM_PRIMS_WRITTEN2_IVB 0x5210 +#define SO_NUM_PRIMS_WRITTEN3_IVB 0x5218 + +#define GEN7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4) + +#define TIMESTAMP 0x2358 + +#define BCS_SWCTRL 0x22200 +# define BCS_SWCTRL_SRC_Y (1 << 0) +# define BCS_SWCTRL_DST_Y (1 << 1) diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c index 89b2f15c10f..44f7030c712 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_regions.c +++ b/src/mesa/drivers/dri/i965/intel_regions.c @@ -1 +1,353 @@ -../intel/intel_regions.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Provide additional functionality on top of bufmgr buffers: + * - 2d semantics and blit operations + * - refcounting of buffers for multiple images in a buffer. + * - refcounting of buffer mappings. + * - some logic for moving the buffers to the best memory pools for + * given operations. + * + * Most of this is to make it easier to implement the fixed-layout + * mipmap tree required by intel hardware in the face of GL's + * programming interface where each image can be specifed in random + * order and it isn't clear what layout the tree should have until the + * last moment. + */ + +#include <sys/ioctl.h> +#include <errno.h> + +#include "main/hash.h" +#include "intel_context.h" +#include "intel_regions.h" +#include "intel_blit.h" +#include "intel_buffer_objects.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" + +#define FILE_DEBUG_FLAG DEBUG_REGION + +/* This should be set to the maximum backtrace size desired. + * Set it to 0 to disable backtrace debugging. + */ +#define DEBUG_BACKTRACE_SIZE 0 + +#if DEBUG_BACKTRACE_SIZE == 0 +/* Use the standard debug output */ +#define _DBG(...) DBG(__VA_ARGS__) +#else +/* Use backtracing debug output */ +#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);} + +/* Backtracing debug support */ +#include <execinfo.h> + +static void +debug_backtrace(void) +{ + void *trace[DEBUG_BACKTRACE_SIZE]; + char **strings = NULL; + int traceSize; + register int i; + + traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE); + strings = backtrace_symbols(trace, traceSize); + if (strings == NULL) { + DBG("no backtrace:"); + return; + } + + /* Spit out all the strings with a colon separator. Ignore + * the first, since we don't really care about the call + * to debug_backtrace() itself. Skip until the final "/" in + * the trace to avoid really long lines. + */ + for (i = 1; i < traceSize; i++) { + char *p = strings[i], *slash = strings[i]; + while (*p) { + if (*p++ == '/') { + slash = p; + } + } + + DBG("%s:", slash); + } + + /* Free up the memory, and we're done */ + free(strings); +} + +#endif + +static struct intel_region * +intel_region_alloc_internal(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + uint32_t tiling, drm_intel_bo *buffer) +{ + struct intel_region *region; + + region = calloc(sizeof(*region), 1); + if (region == NULL) + return region; + + region->cpp = cpp; + region->width = width; + region->height = height; + region->pitch = pitch; + region->refcount = 1; + region->bo = buffer; + region->tiling = tiling; + + _DBG("%s <-- %p\n", __FUNCTION__, region); + return region; +} + +struct intel_region * +intel_region_alloc(struct intel_screen *screen, + uint32_t tiling, + GLuint cpp, GLuint width, GLuint height, + bool expect_accelerated_upload) +{ + drm_intel_bo *buffer; + unsigned long flags = 0; + unsigned long aligned_pitch; + struct intel_region *region; + + if (expect_accelerated_upload) + flags |= BO_ALLOC_FOR_RENDER; + + buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region", + width, height, cpp, + &tiling, &aligned_pitch, flags); + if (buffer == NULL) + return NULL; + + region = intel_region_alloc_internal(screen, cpp, width, height, + aligned_pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + return region; +} + +bool +intel_region_flink(struct intel_region *region, uint32_t *name) +{ + if (region->name == 0) { + if (drm_intel_bo_flink(region->bo, ®ion->name)) + return false; + } + + *name = region->name; + + return true; +} + +struct intel_region * +intel_region_alloc_for_handle(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + GLuint handle, const char *name) +{ + struct intel_region *region; + drm_intel_bo *buffer; + int ret; + uint32_t bit_6_swizzle, tiling; + + buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle); + if (buffer == NULL) + return NULL; + ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle); + if (ret != 0) { + fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", + handle, name, strerror(-ret)); + drm_intel_bo_unreference(buffer); + return NULL; + } + + region = intel_region_alloc_internal(screen, cpp, + width, height, pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + region->name = handle; + + return region; +} + +struct intel_region * +intel_region_alloc_for_fd(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + int fd, const char *name) +{ + struct intel_region *region; + drm_intel_bo *buffer; + int ret; + uint32_t bit_6_swizzle, tiling; + + buffer = drm_intel_bo_gem_create_from_prime(screen->bufmgr, + fd, height * pitch); + if (buffer == NULL) + return NULL; + ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle); + if (ret != 0) { + fprintf(stderr, "Couldn't get tiling of buffer (%s): %s\n", + name, strerror(-ret)); + drm_intel_bo_unreference(buffer); + return NULL; + } + + region = intel_region_alloc_internal(screen, cpp, + width, height, pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + return region; +} + +void +intel_region_reference(struct intel_region **dst, struct intel_region *src) +{ + _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, + *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); + + if (src != *dst) { + if (*dst) + intel_region_release(dst); + + if (src) + src->refcount++; + *dst = src; + } +} + +void +intel_region_release(struct intel_region **region_handle) +{ + struct intel_region *region = *region_handle; + + if (region == NULL) { + _DBG("%s NULL\n", __FUNCTION__); + return; + } + + _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + + ASSERT(region->refcount > 0); + region->refcount--; + + if (region->refcount == 0) { + drm_intel_bo_unreference(region->bo); + + free(region); + } + *region_handle = NULL; +} + +/** + * This function computes masks that may be used to select the bits of the X + * and Y coordinates that indicate the offset within a tile. If the region is + * untiled, the masks are set to 0. + */ +void +intel_region_get_tile_masks(struct intel_region *region, + uint32_t *mask_x, uint32_t *mask_y, + bool map_stencil_as_y_tiled) +{ + int cpp = region->cpp; + uint32_t tiling = region->tiling; + + if (map_stencil_as_y_tiled) + tiling = I915_TILING_Y; + + switch (tiling) { + default: + assert(false); + case I915_TILING_NONE: + *mask_x = *mask_y = 0; + break; + case I915_TILING_X: + *mask_x = 512 / cpp - 1; + *mask_y = 7; + break; + case I915_TILING_Y: + *mask_x = 128 / cpp - 1; + *mask_y = 31; + break; + } +} + +/** + * Compute the offset (in bytes) from the start of the region to the given x + * and y coordinate. For tiled regions, caller must ensure that x and y are + * multiples of the tile size. + */ +uint32_t +intel_region_get_aligned_offset(struct intel_region *region, uint32_t x, + uint32_t y, bool map_stencil_as_y_tiled) +{ + int cpp = region->cpp; + uint32_t pitch = region->pitch; + uint32_t tiling = region->tiling; + + if (map_stencil_as_y_tiled) { + tiling = I915_TILING_Y; + + /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile + * gets transformed into a 32-high Y-tile. Accordingly, the pitch of + * the resulting region is twice the pitch of the original region, since + * each row in the Y-tiled view corresponds to two rows in the actual + * W-tiled surface. So we need to correct the pitch before computing + * the offsets. + */ + pitch *= 2; + } + + switch (tiling) { + default: + assert(false); + case I915_TILING_NONE: + return y * pitch + x * cpp; + case I915_TILING_X: + assert((x % (512 / cpp)) == 0); + assert((y % 8) == 0); + return y * pitch + x / (512 / cpp) * 4096; + case I915_TILING_Y: + assert((x % (128 / cpp)) == 0); + assert((y % 32) == 0); + return y * pitch + x / (128 / cpp) * 4096; + } +} diff --git a/src/mesa/drivers/dri/i965/intel_regions.h b/src/mesa/drivers/dri/i965/intel_regions.h new file mode 100644 index 00000000000..1fb6b275772 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_regions.h @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_REGIONS_H +#define INTEL_REGIONS_H + +/** @file intel_regions.h + * + * Structure definitions and prototypes for intel_region handling, + * which is the basic structure for rectangular collections of pixels + * stored in a drm_intel_bo. + */ + +#include <stdbool.h> +#include <xf86drm.h> + +#include "main/mtypes.h" +#include "intel_bufmgr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct intel_context; +struct intel_screen; +struct intel_buffer_object; + +/** + * A layer on top of the bufmgr buffers that adds a few useful things: + * + * - Refcounting for local buffer references. + * - Refcounting for buffer maps + * - Buffer dimensions - pitch and height. + * - Blitter commands for copying 2D regions between buffers. (really???) + */ +struct intel_region +{ + drm_intel_bo *bo; /**< buffer manager's buffer */ + GLuint refcount; /**< Reference count for region */ + GLuint cpp; /**< bytes per pixel */ + GLuint width; /**< in pixels */ + GLuint height; /**< in pixels */ + GLuint pitch; /**< in bytes */ + + uint32_t tiling; /**< Which tiling mode the region is in */ + + uint32_t name; /**< Global name for the bo */ +}; + + +/* Allocate a refcounted region. Pointers to regions should only be + * copied by calling intel_reference_region(). + */ +struct intel_region *intel_region_alloc(struct intel_screen *screen, + uint32_t tiling, + GLuint cpp, GLuint width, + GLuint height, + bool expect_accelerated_upload); + +struct intel_region * +intel_region_alloc_for_handle(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + unsigned int handle, const char *name); + +struct intel_region * +intel_region_alloc_for_fd(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + int fd, const char *name); + +bool +intel_region_flink(struct intel_region *region, uint32_t *name); + +void intel_region_reference(struct intel_region **dst, + struct intel_region *src); + +void intel_region_release(struct intel_region **ib); + +void intel_recreate_static_regions(struct intel_context *intel); + +void +intel_region_get_tile_masks(struct intel_region *region, + uint32_t *mask_x, uint32_t *mask_y, + bool map_stencil_as_y_tiled); + +uint32_t +intel_region_get_aligned_offset(struct intel_region *region, uint32_t x, + uint32_t y, bool map_stencil_as_y_tiled); + +/** + * Used with images created with image_from_names + * to help support planar images. + */ +struct intel_image_format { + int fourcc; + int components; + int nplanes; + struct { + int buffer_index; + int width_shift; + int height_shift; + uint32_t dri_format; + int cpp; + } planes[3]; +}; + +struct __DRIimageRec { + struct intel_region *region; + GLenum internal_format; + uint32_t dri_format; + GLuint format; + uint32_t offset; + + /* + * Need to save these here between calls to + * image_from_names and calls to image_from_planar. + */ + uint32_t strides[3]; + uint32_t offsets[3]; + struct intel_image_format *planar_format; + + /* particular miptree level */ + GLuint width; + GLuint height; + GLuint tile_x; + GLuint tile_y; + bool has_depthstencil; + + void *data; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.c b/src/mesa/drivers/dri/i965/intel_resolve_map.c index 77e50fbaea4..04b5c942432 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_resolve_map.c +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.c @@ -1 +1,111 @@ -../intel/intel_resolve_map.c
\ No newline at end of file +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "intel_resolve_map.h" + +#include <assert.h> +#include <stdlib.h> + +/** + * \brief Set that the miptree slice at (level, layer) needs a resolve. + * + * If a map element already exists with the given key, then the value is + * changed to the given value of \c need. + */ +void +intel_resolve_map_set(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer, + enum gen6_hiz_op need) +{ + struct intel_resolve_map **tail = &head->next; + struct intel_resolve_map *prev = head; + + while (*tail) { + if ((*tail)->level == level && (*tail)->layer == layer) { + (*tail)->need = need; + return; + } + prev = *tail; + tail = &(*tail)->next; + } + + *tail = malloc(sizeof(**tail)); + (*tail)->prev = prev; + (*tail)->next = NULL; + (*tail)->level = level; + (*tail)->layer = layer; + (*tail)->need = need; +} + +/** + * \brief Get an element from the map. + * \return null if element is not contained in map. + */ +struct intel_resolve_map* +intel_resolve_map_get(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer) +{ + struct intel_resolve_map *item = head->next; + + while (item) { + if (item->level == level && item->layer == layer) + break; + else + item = item->next; + } + + return item; +} + +/** + * \brief Remove and free an element from the map. + */ +void +intel_resolve_map_remove(struct intel_resolve_map *elem) +{ + if (elem->prev) + elem->prev->next = elem->next; + if (elem->next) + elem->next->prev = elem->prev; + free(elem); +} + +/** + * \brief Remove and free all elements of the map. + */ +void +intel_resolve_map_clear(struct intel_resolve_map *head) +{ + struct intel_resolve_map *next = head->next; + struct intel_resolve_map *trash; + + while (next) { + trash = next; + next = next->next; + free(trash); + } + + head->next = NULL; +} diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.h b/src/mesa/drivers/dri/i965/intel_resolve_map.h new file mode 100644 index 00000000000..8504271f5cb --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.h @@ -0,0 +1,104 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * For an overview of the HiZ operations, see the following sections of the + * Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + * + * Of these, two get entered in the resolve map as needing to be done to the + * buffer: depth resolve and hiz resolve. + */ +enum gen6_hiz_op { + GEN6_HIZ_OP_DEPTH_CLEAR, + GEN6_HIZ_OP_DEPTH_RESOLVE, + GEN6_HIZ_OP_HIZ_RESOLVE, + GEN6_HIZ_OP_NONE, +}; + +/** + * \brief Map of miptree slices to needed resolves. + * + * The map is implemented as a linear doubly-linked list. + * + * In the intel_resolve_map*() functions, the \c head argument is not + * inspected for its data. It only serves as an anchor for the list. + * + * \par Design Discussion + * + * There are two possible ways to record which miptree slices need + * resolves. 1) Maintain a flag for every miptree slice in the texture, + * likely in intel_mipmap_level::slice, or 2) maintain a list of only + * those slices that need a resolve. + * + * Immediately before drawing, a full depth resolve performed on each + * enabled depth texture. If design 1 were chosen, then at each draw call + * it would be necessary to iterate over each miptree slice of each + * enabled depth texture in order to query if each slice needed a resolve. + * In the worst case, this would require 2^16 iterations: 16 texture + * units, 16 miplevels, and 256 depth layers (assuming maximums for OpenGL + * 2.1). + * + * By choosing design 2, the number of iterations is exactly the minimum + * necessary. + */ +struct intel_resolve_map { + uint32_t level; + uint32_t layer; + enum gen6_hiz_op need; + + struct intel_resolve_map *next; + struct intel_resolve_map *prev; +}; + +void +intel_resolve_map_set(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer, + enum gen6_hiz_op need); + +struct intel_resolve_map* +intel_resolve_map_get(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer); + +void +intel_resolve_map_remove(struct intel_resolve_map *elem); + +void +intel_resolve_map_clear(struct intel_resolve_map *head); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index f2db48272b9..60a69a60648 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1 +1,1409 @@ -../intel/intel_screen.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <errno.h> +#include <time.h> +#include "main/glheader.h" +#include "main/context.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/texobj.h" +#include "main/hash.h" +#include "main/fbobject.h" +#include "main/version.h" +#include "swrast/s_renderbuffer.h" + +#include "utils.h" +#include "xmlpool.h" + +PUBLIC const char __driConfigOptions[] = + DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC) + /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, + * DRI_CONF_BO_REUSE_ALL + */ + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") + DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") + DRI_CONF_ENUM(0, "Disable buffer object reuse") + DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") + DRI_CONF_DESC_END + DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(hiz, "true") + DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+") + DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(early_z, "false") + DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") + DRI_CONF_OPT_END + + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_FORCE_S3TC_ENABLE("false") + DRI_CONF_ALLOW_LARGE_TEXTURES(2) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST("false") + DRI_CONF_ALWAYS_FLUSH_BATCH("false") + DRI_CONF_ALWAYS_FLUSH_CACHE("false") + DRI_CONF_DISABLE_THROTTLING("false") + DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false") + DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false") + DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") + + DRI_CONF_OPT_BEGIN_B(shader_precompile, "true") + DRI_CONF_DESC(en, "Perform code generation at shader link time.") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END +DRI_CONF_END; + +const GLuint __driNConfigOptions = 14; + +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_bufmgr.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_screen.h" +#include "intel_tex.h" +#include "intel_regions.h" + +#ifndef I915 +#include "brw_context.h" +#endif + +#include "i915_drm.h" + +#ifdef USE_NEW_INTERFACE +static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; +#endif /*USE_NEW_INTERFACE */ + +/** + * For debugging purposes, this returns a time in seconds. + */ +double +get_time(void) +{ + struct timespec tp; + + clock_gettime(CLOCK_MONOTONIC, &tp); + + return tp.tv_sec + tp.tv_nsec / 1000000000.0; +} + +void +aub_dump_bmp(struct gl_context *ctx) +{ + struct gl_framebuffer *fb = ctx->DrawBuffer; + + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb = + intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb && irb->mt) { + enum aub_dump_bmp_format format; + + switch (irb->Base.Base.Format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + format = AUB_DUMP_BMP_FORMAT_ARGB_8888; + break; + default: + continue; + } + + assert(irb->mt->region->pitch % irb->mt->region->cpp == 0); + drm_intel_gem_bo_aub_dump_bmp(irb->mt->region->bo, + irb->draw_x, + irb->draw_y, + irb->Base.Base.Width, + irb->Base.Base.Height, + format, + irb->mt->region->pitch, + 0); + } + } +} + +static const __DRItexBufferExtension intelTexBufferExtension = { + .base = { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + + .setTexBuffer = intelSetTexBuffer, + .setTexBuffer2 = intelSetTexBuffer2, + .releaseTexBuffer = NULL, +}; + +static void +intelDRI2Flush(__DRIdrawable *drawable) +{ + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); + if (intel == NULL) + return; + + if (intel->gen < 4) + INTEL_FIREVERTICES(intel); + + intel_resolve_for_dri2_flush(intel, drawable); + intel->need_throttle = true; + + if (intel->batch.used) + intel_batchbuffer_flush(intel); + + if (INTEL_DEBUG & DEBUG_AUB) { + aub_dump_bmp(ctx); + } +} + +static const struct __DRI2flushExtensionRec intelFlushExtension = { + .base = { __DRI2_FLUSH, 3 }, + + .flush = intelDRI2Flush, + .invalidate = dri2InvalidateDrawable, +}; + +static struct intel_image_format intel_image_formats[] = { + { __DRI_IMAGE_FOURCC_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }, + + { __DRI_IMAGE_FOURCC_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } }, + + { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + + { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + + /* For YUYV buffers, we set up two overlapping DRI images and treat + * them as planar buffers in the compositors. Plane 0 is GR88 and + * samples YU or YV pairs and places Y into the R component, while + * plane 1 is ARGB and samples YUYV clusters and places pairs and + * places U into the G component and V into A. This lets the + * texture sampler interpolate the Y components correctly when + * sampling from plane 0, and interpolate U and V correctly when + * sampling from plane 1. */ + { __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, + { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } } +}; + +static __DRIimage * +intel_allocate_image(int dri_format, void *loaderPrivate) +{ + __DRIimage *image; + + image = calloc(1, sizeof *image); + if (image == NULL) + return NULL; + + image->dri_format = dri_format; + image->offset = 0; + + switch (dri_format) { + case __DRI_IMAGE_FORMAT_RGB565: + image->format = MESA_FORMAT_RGB565; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + image->format = MESA_FORMAT_XRGB8888; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + image->format = MESA_FORMAT_ARGB8888; + break; + case __DRI_IMAGE_FORMAT_ABGR8888: + image->format = MESA_FORMAT_RGBA8888_REV; + break; + case __DRI_IMAGE_FORMAT_XBGR8888: + image->format = MESA_FORMAT_RGBX8888_REV; + break; + case __DRI_IMAGE_FORMAT_R8: + image->format = MESA_FORMAT_R8; + break; + case __DRI_IMAGE_FORMAT_GR88: + image->format = MESA_FORMAT_GR88; + break; + case __DRI_IMAGE_FORMAT_NONE: + image->format = MESA_FORMAT_NONE; + break; + default: + free(image); + return NULL; + } + + image->internal_format = _mesa_get_format_base_format(image->format); + image->data = loaderPrivate; + + return image; +} + +/** + * Sets up a DRIImage structure to point to our shared image in a region + */ +static void +intel_setup_image_from_mipmap_tree(struct intel_context *intel, __DRIimage *image, + struct intel_mipmap_tree *mt, GLuint level, + GLuint zoffset) +{ + unsigned int draw_x, draw_y; + uint32_t mask_x, mask_y; + + intel_miptree_make_shareable(intel, mt); + + intel_miptree_check_level_layer(mt, level, zoffset); + + intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false); + intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y); + + image->width = mt->level[level].width; + image->height = mt->level[level].height; + image->tile_x = draw_x & mask_x; + image->tile_y = draw_y & mask_y; + + image->offset = intel_region_get_aligned_offset(mt->region, + draw_x & ~mask_x, + draw_y & ~mask_y, + false); + + intel_region_reference(&image->region, mt->region); +} + +static void +intel_setup_image_from_dimensions(__DRIimage *image) +{ + image->width = image->region->width; + image->height = image->region->height; + image->tile_x = 0; + image->tile_y = 0; + image->has_depthstencil = false; +} + +static inline uint32_t +intel_dri_format(GLuint format) +{ + switch (format) { + case MESA_FORMAT_RGB565: + return __DRI_IMAGE_FORMAT_RGB565; + case MESA_FORMAT_XRGB8888: + return __DRI_IMAGE_FORMAT_XRGB8888; + case MESA_FORMAT_ARGB8888: + return __DRI_IMAGE_FORMAT_ARGB8888; + case MESA_FORMAT_RGBA8888_REV: + return __DRI_IMAGE_FORMAT_ABGR8888; + case MESA_FORMAT_R8: + return __DRI_IMAGE_FORMAT_R8; + case MESA_FORMAT_RG88: + return __DRI_IMAGE_FORMAT_GR88; + } + + return MESA_FORMAT_NONE; +} + +static __DRIimage * +intel_create_image_from_name(__DRIscreen *screen, + int width, int height, int format, + int name, int pitch, void *loaderPrivate) +{ + struct intel_screen *intelScreen = screen->driverPrivate; + __DRIimage *image; + int cpp; + + image = intel_allocate_image(format, loaderPrivate); + if (image == NULL) + return NULL; + + if (image->format == MESA_FORMAT_NONE) + cpp = 1; + else + cpp = _mesa_get_format_bytes(image->format); + image->region = intel_region_alloc_for_handle(intelScreen, + cpp, width, height, + pitch * cpp, name, "image"); + if (image->region == NULL) { + free(image); + return NULL; + } + + intel_setup_image_from_dimensions(image); + + return image; +} + +static __DRIimage * +intel_create_image_from_renderbuffer(__DRIcontext *context, + int renderbuffer, void *loaderPrivate) +{ + __DRIimage *image; + struct intel_context *intel = context->driverPrivate; + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + rb = _mesa_lookup_renderbuffer(&intel->ctx, renderbuffer); + if (!rb) { + _mesa_error(&intel->ctx, + GL_INVALID_OPERATION, "glRenderbufferExternalMESA"); + return NULL; + } + + irb = intel_renderbuffer(rb); + intel_miptree_make_shareable(intel, irb->mt); + image = calloc(1, sizeof *image); + if (image == NULL) + return NULL; + + image->internal_format = rb->InternalFormat; + image->format = rb->Format; + image->offset = 0; + image->data = loaderPrivate; + intel_region_reference(&image->region, irb->mt->region); + intel_setup_image_from_dimensions(image); + image->dri_format = intel_dri_format(image->format); + image->has_depthstencil = irb->mt->stencil_mt? true : false; + + rb->NeedsFinishRenderTexture = true; + return image; +} + +static __DRIimage * +intel_create_image_from_texture(__DRIcontext *context, int target, + unsigned texture, int zoffset, + int level, + unsigned *error, + void *loaderPrivate) +{ + __DRIimage *image; + struct intel_context *intel = context->driverPrivate; + struct gl_texture_object *obj; + struct intel_texture_object *iobj; + GLuint face = 0; + + obj = _mesa_lookup_texture(&intel->ctx, texture); + if (!obj || obj->Target != target) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + if (target == GL_TEXTURE_CUBE_MAP) + face = zoffset; + + _mesa_test_texobj_completeness(&intel->ctx, obj); + iobj = intel_texture_object(obj); + if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + if (level < obj->BaseLevel || level > obj->_MaxLevel) { + *error = __DRI_IMAGE_ERROR_BAD_MATCH; + return NULL; + } + + if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) { + *error = __DRI_IMAGE_ERROR_BAD_MATCH; + return NULL; + } + image = calloc(1, sizeof *image); + if (image == NULL) { + *error = __DRI_IMAGE_ERROR_BAD_ALLOC; + return NULL; + } + + image->internal_format = obj->Image[face][level]->InternalFormat; + image->format = obj->Image[face][level]->TexFormat; + image->data = loaderPrivate; + intel_setup_image_from_mipmap_tree(intel, image, iobj->mt, level, zoffset); + image->dri_format = intel_dri_format(image->format); + image->has_depthstencil = iobj->mt->stencil_mt? true : false; + if (image->dri_format == MESA_FORMAT_NONE) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + free(image); + return NULL; + } + + *error = __DRI_IMAGE_ERROR_SUCCESS; + return image; +} + +static void +intel_destroy_image(__DRIimage *image) +{ + intel_region_release(&image->region); + free(image); +} + +static __DRIimage * +intel_create_image(__DRIscreen *screen, + int width, int height, int format, + unsigned int use, + void *loaderPrivate) +{ + __DRIimage *image; + struct intel_screen *intelScreen = screen->driverPrivate; + uint32_t tiling; + int cpp; + + tiling = I915_TILING_X; + if (use & __DRI_IMAGE_USE_CURSOR) { + if (width != 64 || height != 64) + return NULL; + tiling = I915_TILING_NONE; + } + + image = intel_allocate_image(format, loaderPrivate); + if (image == NULL) + return NULL; + + cpp = _mesa_get_format_bytes(image->format); + image->region = + intel_region_alloc(intelScreen, tiling, cpp, width, height, true); + if (image->region == NULL) { + free(image); + return NULL; + } + + intel_setup_image_from_dimensions(image); + + return image; +} + +static GLboolean +intel_query_image(__DRIimage *image, int attrib, int *value) +{ + switch (attrib) { + case __DRI_IMAGE_ATTRIB_STRIDE: + *value = image->region->pitch; + return true; + case __DRI_IMAGE_ATTRIB_HANDLE: + *value = image->region->bo->handle; + return true; + case __DRI_IMAGE_ATTRIB_NAME: + return intel_region_flink(image->region, (uint32_t *) value); + case __DRI_IMAGE_ATTRIB_FORMAT: + *value = image->dri_format; + return true; + case __DRI_IMAGE_ATTRIB_WIDTH: + *value = image->region->width; + return true; + case __DRI_IMAGE_ATTRIB_HEIGHT: + *value = image->region->height; + return true; + case __DRI_IMAGE_ATTRIB_COMPONENTS: + if (image->planar_format == NULL) + return false; + *value = image->planar_format->components; + return true; + case __DRI_IMAGE_ATTRIB_FD: + if (drm_intel_bo_gem_export_to_prime(image->region->bo, value) == 0) + return true; + return false; + default: + return false; + } +} + +static __DRIimage * +intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) +{ + __DRIimage *image; + + image = calloc(1, sizeof *image); + if (image == NULL) + return NULL; + + intel_region_reference(&image->region, orig_image->region); + if (image->region == NULL) { + free(image); + return NULL; + } + + image->internal_format = orig_image->internal_format; + image->planar_format = orig_image->planar_format; + image->dri_format = orig_image->dri_format; + image->format = orig_image->format; + image->offset = orig_image->offset; + image->width = orig_image->width; + image->height = orig_image->height; + image->tile_x = orig_image->tile_x; + image->tile_y = orig_image->tile_y; + image->has_depthstencil = orig_image->has_depthstencil; + image->data = loaderPrivate; + + memcpy(image->strides, orig_image->strides, sizeof(image->strides)); + memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets)); + + return image; +} + +static GLboolean +intel_validate_usage(__DRIimage *image, unsigned int use) +{ + if (use & __DRI_IMAGE_USE_CURSOR) { + if (image->region->width != 64 || image->region->height != 64) + return GL_FALSE; + } + + return GL_TRUE; +} + +static __DRIimage * +intel_create_image_from_names(__DRIscreen *screen, + int width, int height, int fourcc, + int *names, int num_names, + int *strides, int *offsets, + void *loaderPrivate) +{ + struct intel_image_format *f = NULL; + __DRIimage *image; + int i, index; + + if (screen == NULL || names == NULL || num_names != 1) + return NULL; + + for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { + if (intel_image_formats[i].fourcc == fourcc) { + f = &intel_image_formats[i]; + } + } + + if (f == NULL) + return NULL; + + image = intel_create_image_from_name(screen, width, height, + __DRI_IMAGE_FORMAT_NONE, + names[0], strides[0], + loaderPrivate); + + if (image == NULL) + return NULL; + + image->planar_format = f; + for (i = 0; i < f->nplanes; i++) { + index = f->planes[i].buffer_index; + image->offsets[index] = offsets[index]; + image->strides[index] = strides[index]; + } + + return image; +} + +static __DRIimage * +intel_create_image_from_fds(__DRIscreen *screen, + int width, int height, int fourcc, + int *fds, int num_fds, int *strides, int *offsets, + void *loaderPrivate) +{ + struct intel_screen *intelScreen = screen->driverPrivate; + struct intel_image_format *f = NULL; + __DRIimage *image; + int i, index; + + if (fds == NULL || num_fds != 1) + return NULL; + + for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { + if (intel_image_formats[i].fourcc == fourcc) { + f = &intel_image_formats[i]; + } + } + + if (f == NULL) + return NULL; + + image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate); + if (image == NULL) + return NULL; + + image->region = intel_region_alloc_for_fd(intelScreen, + 1, width, height, + strides[0], fds[0], "image"); + if (image->region == NULL) { + free(image); + return NULL; + } + + image->planar_format = f; + for (i = 0; i < f->nplanes; i++) { + index = f->planes[i].buffer_index; + image->offsets[index] = offsets[index]; + image->strides[index] = strides[index]; + } + + return image; +} + + +static __DRIimage * +intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) +{ + int width, height, offset, stride, dri_format, index; + struct intel_image_format *f; + uint32_t mask_x, mask_y; + __DRIimage *image; + + if (parent == NULL || parent->planar_format == NULL) + return NULL; + + f = parent->planar_format; + + if (plane >= f->nplanes) + return NULL; + + width = parent->region->width >> f->planes[plane].width_shift; + height = parent->region->height >> f->planes[plane].height_shift; + dri_format = f->planes[plane].dri_format; + index = f->planes[plane].buffer_index; + offset = parent->offsets[index]; + stride = parent->strides[index]; + + image = intel_allocate_image(dri_format, loaderPrivate); + if (image == NULL) + return NULL; + + if (offset + height * stride > parent->region->bo->size) { + _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds"); + free(image); + return NULL; + } + + image->region = calloc(sizeof(*image->region), 1); + if (image->region == NULL) { + free(image); + return NULL; + } + + image->region->cpp = _mesa_get_format_bytes(image->format); + image->region->width = width; + image->region->height = height; + image->region->pitch = stride; + image->region->refcount = 1; + image->region->bo = parent->region->bo; + drm_intel_bo_reference(image->region->bo); + image->region->tiling = parent->region->tiling; + image->offset = offset; + intel_setup_image_from_dimensions(image); + + intel_region_get_tile_masks(image->region, &mask_x, &mask_y, false); + if (offset & mask_x) + _mesa_warning(NULL, + "intel_create_sub_image: offset not on tile boundary"); + + return image; +} + +static struct __DRIimageExtensionRec intelImageExtension = { + .base = { __DRI_IMAGE, 7 }, + + .createImageFromName = intel_create_image_from_name, + .createImageFromRenderbuffer = intel_create_image_from_renderbuffer, + .destroyImage = intel_destroy_image, + .createImage = intel_create_image, + .queryImage = intel_query_image, + .dupImage = intel_dup_image, + .validateUsage = intel_validate_usage, + .createImageFromNames = intel_create_image_from_names, + .fromPlanar = intel_from_planar, + .createImageFromTexture = intel_create_image_from_texture, + .createImageFromFds = intel_create_image_from_fds +}; + +static const __DRIextension *intelScreenExtensions[] = { + &intelTexBufferExtension.base, + &intelFlushExtension.base, + &intelImageExtension.base, + &dri2ConfigQueryExtension.base, + NULL +}; + +static bool +intel_get_param(__DRIscreen *psp, int param, int *value) +{ + int ret; + struct drm_i915_getparam gp; + + memset(&gp, 0, sizeof(gp)); + gp.param = param; + gp.value = value; + + ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) { + if (ret != -EINVAL) + _mesa_warning(NULL, "drm_i915_getparam: %d", ret); + return false; + } + + return true; +} + +static bool +intel_get_boolean(__DRIscreen *psp, int param) +{ + int value = 0; + return intel_get_param(psp, param, &value) && value; +} + +static void +intelDestroyScreen(__DRIscreen * sPriv) +{ + struct intel_screen *intelScreen = sPriv->driverPrivate; + + dri_bufmgr_destroy(intelScreen->bufmgr); + driDestroyOptionInfo(&intelScreen->optionCache); + + free(intelScreen); + sPriv->driverPrivate = NULL; +} + + +/** + * This is called when we need to set up GL rendering to a new X window. + */ +static GLboolean +intelCreateBuffer(__DRIscreen * driScrnPriv, + __DRIdrawable * driDrawPriv, + const struct gl_config * mesaVis, GLboolean isPixmap) +{ + struct intel_renderbuffer *rb; + struct intel_screen *screen = (struct intel_screen*) driScrnPriv->driverPrivate; + gl_format rgbFormat; + unsigned num_samples = intel_quantize_num_samples(screen, mesaVis->samples); + struct gl_framebuffer *fb; + + if (isPixmap) + return false; + + fb = CALLOC_STRUCT(gl_framebuffer); + if (!fb) + return false; + + _mesa_initialize_window_framebuffer(fb, mesaVis); + + if (mesaVis->redBits == 5) + rgbFormat = MESA_FORMAT_RGB565; + else if (mesaVis->sRGBCapable) + rgbFormat = MESA_FORMAT_SARGB8; + else if (mesaVis->alphaBits == 0) + rgbFormat = MESA_FORMAT_XRGB8888; + else { + if (screen->gen >= 4) { + rgbFormat = MESA_FORMAT_SARGB8; + fb->Visual.sRGBCapable = true; + } else { + rgbFormat = MESA_FORMAT_ARGB8888; + } + + } + + /* setup the hardware-based renderbuffers */ + rb = intel_create_renderbuffer(rgbFormat, num_samples); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base.Base); + + if (mesaVis->doubleBufferMode) { + rb = intel_create_renderbuffer(rgbFormat, num_samples); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base.Base); + } + + /* + * Assert here that the gl_config has an expected depth/stencil bit + * combination: one of d24/s8, d16/s0, d0/s0. (See intelInitScreen2(), + * which constructs the advertised configs.) + */ + if (mesaVis->depthBits == 24) { + assert(mesaVis->stencilBits == 8); + + if (screen->hw_has_separate_stencil) { + rb = intel_create_private_renderbuffer(MESA_FORMAT_X8_Z24, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + rb = intel_create_private_renderbuffer(MESA_FORMAT_S8, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base); + } else { + /* + * Use combined depth/stencil. Note that the renderbuffer is + * attached to two attachment points. + */ + rb = intel_create_private_renderbuffer(MESA_FORMAT_S8_Z24, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base); + } + } + else if (mesaVis->depthBits == 16) { + assert(mesaVis->stencilBits == 0); + rb = intel_create_private_renderbuffer(MESA_FORMAT_Z16, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + } + else { + assert(mesaVis->depthBits == 0); + assert(mesaVis->stencilBits == 0); + } + + /* now add any/all software-based renderbuffers we may need */ + _swrast_add_soft_renderbuffers(fb, + false, /* never sw color */ + false, /* never sw depth */ + false, /* never sw stencil */ + mesaVis->accumRedBits > 0, + false, /* never sw alpha */ + false /* never sw aux */ ); + driDrawPriv->driverPrivate = fb; + + return true; +} + +static void +intelDestroyBuffer(__DRIdrawable * driDrawPriv) +{ + struct gl_framebuffer *fb = driDrawPriv->driverPrivate; + + _mesa_reference_framebuffer(&fb, NULL); +} + +/* There are probably better ways to do this, such as an + * init-designated function to register chipids and createcontext + * functions. + */ +extern bool +i830CreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + unsigned major_version, + unsigned minor_version, + unsigned *error, + void *sharedContextPrivate); + +extern bool +i915CreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + unsigned major_version, + unsigned minor_version, + unsigned *error, + void *sharedContextPrivate); +extern bool +brwCreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + unsigned major_version, + unsigned minor_version, + uint32_t flags, + unsigned *error, + void *sharedContextPrivate); + +static GLboolean +intelCreateContext(gl_api api, + const struct gl_config * mesaVis, + __DRIcontext * driContextPriv, + unsigned major_version, + unsigned minor_version, + uint32_t flags, + unsigned *error, + void *sharedContextPrivate) +{ + bool success = false; + +#ifdef I915 + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = sPriv->driverPrivate; + + if (IS_9XX(intelScreen->deviceID)) { + success = i915CreateContext(api, mesaVis, driContextPriv, + major_version, minor_version, error, + sharedContextPrivate); + } else { + intelScreen->no_vbo = true; + success = i830CreateContext(api, mesaVis, driContextPriv, + major_version, minor_version, error, + sharedContextPrivate); + } +#else + success = brwCreateContext(api, mesaVis, + driContextPriv, + major_version, minor_version, flags, + error, sharedContextPrivate); +#endif + + if (success) + return true; + + if (driContextPriv->driverPrivate != NULL) + intelDestroyContext(driContextPriv); + + return false; +} + +static bool +intel_init_bufmgr(struct intel_screen *intelScreen) +{ + __DRIscreen *spriv = intelScreen->driScrnPriv; + + intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; + + intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); + if (intelScreen->bufmgr == NULL) { + fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", + __func__, __LINE__); + return false; + } + + drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr); + + if (!intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA)) { + fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__); + return false; + } + + return true; +} + +/** + * Override intel_screen.hw_has_separate_stencil with environment variable + * INTEL_SEPARATE_STENCIL. + * + * Valid values for INTEL_SEPARATE_STENCIL are "0" and "1". If an invalid + * valid value is encountered, a warning is emitted and INTEL_SEPARATE_STENCIL + * is ignored. + */ +static void +intel_override_separate_stencil(struct intel_screen *screen) +{ + const char *s = getenv("INTEL_SEPARATE_STENCIL"); + if (!s) { + return; + } else if (!strncmp("0", s, 2)) { + screen->hw_has_separate_stencil = false; + } else if (!strncmp("1", s, 2)) { + screen->hw_has_separate_stencil = true; + } else { + fprintf(stderr, + "warning: env variable INTEL_SEPARATE_STENCIL=\"%s\" has " + "invalid value and is ignored", s); + } +} + +static bool +intel_detect_swizzling(struct intel_screen *screen) +{ + drm_intel_bo *buffer; + unsigned long flags = 0; + unsigned long aligned_pitch; + uint32_t tiling = I915_TILING_X; + uint32_t swizzle_mode = 0; + + buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test", + 64, 64, 4, + &tiling, &aligned_pitch, flags); + if (buffer == NULL) + return false; + + drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode); + drm_intel_bo_unreference(buffer); + + if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE) + return false; + else + return true; +} + +static __DRIconfig** +intel_screen_make_configs(__DRIscreen *dri_screen) +{ + static const gl_format formats[] = { + MESA_FORMAT_RGB565, + MESA_FORMAT_ARGB8888 + }; + + /* GLX_SWAP_COPY_OML is not supported due to page flipping. */ + static const GLenum back_buffer_modes[] = { + GLX_SWAP_UNDEFINED_OML, GLX_NONE, + }; + + static const uint8_t singlesample_samples[1] = {0}; + static const uint8_t multisample_samples[2] = {4, 8}; + + struct intel_screen *screen = dri_screen->driverPrivate; + uint8_t depth_bits[4], stencil_bits[4]; + __DRIconfig **configs = NULL; + + /* Generate singlesample configs without accumulation buffer. */ + for (int i = 0; i < ARRAY_SIZE(formats); i++) { + __DRIconfig **new_configs; + int num_depth_stencil_bits = 2; + + /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil + * buffer that has a different number of bits per pixel than the color + * buffer, gen >= 6 supports this. + */ + depth_bits[0] = 0; + stencil_bits[0] = 0; + + if (formats[i] == MESA_FORMAT_RGB565) { + depth_bits[1] = 16; + stencil_bits[1] = 0; + if (screen->gen >= 6) { + depth_bits[2] = 24; + stencil_bits[2] = 8; + num_depth_stencil_bits = 3; + } + } else { + depth_bits[1] = 24; + stencil_bits[1] = 8; + } + + new_configs = driCreateConfigs(formats[i], + depth_bits, + stencil_bits, + num_depth_stencil_bits, + back_buffer_modes, 2, + singlesample_samples, 1, + false); + configs = driConcatConfigs(configs, new_configs); + } + + /* Generate the minimum possible set of configs that include an + * accumulation buffer. + */ + for (int i = 0; i < ARRAY_SIZE(formats); i++) { + __DRIconfig **new_configs; + + if (formats[i] == MESA_FORMAT_RGB565) { + depth_bits[0] = 16; + stencil_bits[0] = 0; + } else { + depth_bits[0] = 24; + stencil_bits[0] = 8; + } + + new_configs = driCreateConfigs(formats[i], + depth_bits, stencil_bits, 1, + back_buffer_modes, 1, + singlesample_samples, 1, + true); + configs = driConcatConfigs(configs, new_configs); + } + + /* Generate multisample configs. + * + * This loop breaks early, and hence is a no-op, on gen < 6. + * + * Multisample configs must follow the singlesample configs in order to + * work around an X server bug present in 1.12. The X server chooses to + * associate the first listed RGBA888-Z24S8 config, regardless of its + * sample count, with the 32-bit depth visual used for compositing. + * + * Only doublebuffer configs with GLX_SWAP_UNDEFINED_OML behavior are + * supported. Singlebuffer configs are not supported because no one wants + * them. + */ + for (int i = 0; i < ARRAY_SIZE(formats); i++) { + if (screen->gen < 6) + break; + + __DRIconfig **new_configs; + const int num_depth_stencil_bits = 2; + int num_msaa_modes = 0; + + depth_bits[0] = 0; + stencil_bits[0] = 0; + + if (formats[i] == MESA_FORMAT_RGB565) { + depth_bits[1] = 16; + stencil_bits[1] = 0; + } else { + depth_bits[1] = 24; + stencil_bits[1] = 8; + } + + if (screen->gen >= 7) + num_msaa_modes = 2; + else if (screen->gen == 6) + num_msaa_modes = 1; + + new_configs = driCreateConfigs(formats[i], + depth_bits, + stencil_bits, + num_depth_stencil_bits, + back_buffer_modes, 1, + multisample_samples, + num_msaa_modes, + false); + configs = driConcatConfigs(configs, new_configs); + } + + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + return configs; +} + +static void +set_max_gl_versions(struct intel_screen *screen) +{ + int gl_version_override = _mesa_get_gl_version_override(); + + switch (screen->gen) { + case 7: + screen->max_gl_core_version = 31; + screen->max_gl_compat_version = 30; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 30; + break; + case 6: + screen->max_gl_core_version = 31; + screen->max_gl_compat_version = 30; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 30; + break; + case 5: + case 4: + screen->max_gl_core_version = 0; + screen->max_gl_compat_version = 21; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 20; + break; + case 3: { + screen->max_gl_core_version = 0; + screen->max_gl_es1_version = 11; + screen->max_gl_compat_version = 21; + screen->max_gl_es2_version = 20; + + break; + } + case 2: + screen->max_gl_core_version = 0; + screen->max_gl_compat_version = 13; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 0; + break; + default: + assert(!"unrecognized intel_screen::gen"); + break; + } + + if (gl_version_override >= 31) { + screen->max_gl_core_version = MAX2(screen->max_gl_core_version, + gl_version_override); + } else { + screen->max_gl_compat_version = MAX2(screen->max_gl_compat_version, + gl_version_override); + } + +#ifndef FEATURE_ES1 + screen->max_gl_es1_version = 0; +#endif + +#ifndef FEATURE_ES2 + screen->max_gl_es2_version = 0; +#endif +} + +/** + * This is the driver specific part of the createNewScreen entry point. + * Called when using DRI2. + * + * \return the struct gl_config supported by this driver + */ +static const +__DRIconfig **intelInitScreen2(__DRIscreen *psp) +{ + struct intel_screen *intelScreen; + + if (psp->dri2.loader->base.version <= 2 || + psp->dri2.loader->getBuffersWithFormat == NULL) { + fprintf(stderr, + "\nERROR! DRI2 loader with getBuffersWithFormat() " + "support required\n"); + return false; + } + + /* Allocate the private area */ + intelScreen = calloc(1, sizeof *intelScreen); + if (!intelScreen) { + fprintf(stderr, "\nERROR! Allocating private area failed\n"); + return false; + } + /* parse information in __driConfigOptions */ + driParseOptionInfo(&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + intelScreen->driScrnPriv = psp; + psp->driverPrivate = (void *) intelScreen; + + if (!intel_init_bufmgr(intelScreen)) + return false; + + intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr); + + if (IS_GEN7(intelScreen->deviceID)) { + intelScreen->gen = 7; + } else if (IS_GEN6(intelScreen->deviceID)) { + intelScreen->gen = 6; + } else if (IS_GEN5(intelScreen->deviceID)) { + intelScreen->gen = 5; + } else if (IS_965(intelScreen->deviceID)) { + intelScreen->gen = 4; + } else if (IS_9XX(intelScreen->deviceID)) { + intelScreen->gen = 3; + } else { + intelScreen->gen = 2; + } + + intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6; + intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7; + + int has_llc = 0; + bool success = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_HAS_LLC, + &has_llc); + if (success && has_llc) + intelScreen->hw_has_llc = true; + else if (!success && intelScreen->gen >= 6) + intelScreen->hw_has_llc = true; + + intel_override_separate_stencil(intelScreen); + + intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); + + set_max_gl_versions(intelScreen); + + psp->api_mask = (1 << __DRI_API_OPENGL); + if (intelScreen->max_gl_core_version > 0) + psp->api_mask |= (1 << __DRI_API_OPENGL_CORE); + if (intelScreen->max_gl_es1_version > 0) + psp->api_mask |= (1 << __DRI_API_GLES); + if (intelScreen->max_gl_es2_version > 0) + psp->api_mask |= (1 << __DRI_API_GLES2); + if (intelScreen->max_gl_es2_version >= 30) + psp->api_mask |= (1 << __DRI_API_GLES3); + + psp->extensions = intelScreenExtensions; + + return (const __DRIconfig**) intel_screen_make_configs(psp); +} + +struct intel_buffer { + __DRIbuffer base; + struct intel_region *region; +}; + +static __DRIbuffer * +intelAllocateBuffer(__DRIscreen *screen, + unsigned attachment, unsigned format, + int width, int height) +{ + struct intel_buffer *intelBuffer; + struct intel_screen *intelScreen = screen->driverPrivate; + + assert(attachment == __DRI_BUFFER_FRONT_LEFT || + attachment == __DRI_BUFFER_BACK_LEFT); + + intelBuffer = calloc(1, sizeof *intelBuffer); + if (intelBuffer == NULL) + return NULL; + + /* The front and back buffers are color buffers, which are X tiled. */ + intelBuffer->region = intel_region_alloc(intelScreen, + I915_TILING_X, + format / 8, + width, + height, + true); + + if (intelBuffer->region == NULL) { + free(intelBuffer); + return NULL; + } + + intel_region_flink(intelBuffer->region, &intelBuffer->base.name); + + intelBuffer->base.attachment = attachment; + intelBuffer->base.cpp = intelBuffer->region->cpp; + intelBuffer->base.pitch = intelBuffer->region->pitch; + + return &intelBuffer->base; +} + +static void +intelReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer) +{ + struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer; + + intel_region_release(&intelBuffer->region); + free(intelBuffer); +} + + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = intelInitScreen2, + .DestroyScreen = intelDestroyScreen, + .CreateContext = intelCreateContext, + .DestroyContext = intelDestroyContext, + .CreateBuffer = intelCreateBuffer, + .DestroyBuffer = intelDestroyBuffer, + .MakeCurrent = intelMakeCurrent, + .UnbindContext = intelUnbindContext, + .AllocateBuffer = intelAllocateBuffer, + .ReleaseBuffer = intelReleaseBuffer +}; + +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driDRI2Extension.base, + NULL +}; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h new file mode 100644 index 00000000000..188e2c09c99 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -0,0 +1,89 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_INIT_H_ +#define _INTEL_INIT_H_ + +#include <stdbool.h> +#include <sys/time.h> +#include "dri_util.h" +#include "intel_bufmgr.h" +#include "i915_drm.h" +#include "xmlconfig.h" + +struct intel_screen +{ + int deviceID; + int gen; + + int max_gl_core_version; + int max_gl_compat_version; + int max_gl_es1_version; + int max_gl_es2_version; + + __DRIscreen *driScrnPriv; + + bool no_hw; + + /* + * The hardware hiz and separate stencil fields are needed in intel_screen, + * rather than solely in intel_context, because glXCreatePbuffer and + * glXCreatePixmap are not passed a GLXContext. + */ + bool hw_has_separate_stencil; + bool hw_must_use_separate_stencil; + + bool hw_has_llc; + bool hw_has_swizzling; + + bool no_vbo; + dri_bufmgr *bufmgr; + + /** + * A unique ID for shader programs. + */ + unsigned program_id; + + /** + * Configuration cache with default values for all contexts + */ + driOptionCache optionCache; +}; + +extern void intelDestroyContext(__DRIcontext * driContextPriv); + +extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv); + +extern GLboolean +intelMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); + +double get_time(void); +void aub_dump_bmp(struct gl_context *ctx); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c index 519672fc359..6a817cdf3f6 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_state.c +++ b/src/mesa/drivers/dri/i965/intel_state.c @@ -1 +1,195 @@ -../intel/intel_state.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/enums.h" +#include "main/colormac.h" +#include "main/dd.h" + +#include "intel_screen.h" +#include "intel_context.h" + +int +intel_translate_shadow_compare_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return COMPAREFUNC_ALWAYS; + case GL_LESS: + return COMPAREFUNC_LEQUAL; + case GL_LEQUAL: + return COMPAREFUNC_LESS; + case GL_GREATER: + return COMPAREFUNC_GEQUAL; + case GL_GEQUAL: + return COMPAREFUNC_GREATER; + case GL_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case GL_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_ALWAYS: + return COMPAREFUNC_NEVER; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +int +intel_translate_compare_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return COMPAREFUNC_NEVER; + case GL_LESS: + return COMPAREFUNC_LESS; + case GL_LEQUAL: + return COMPAREFUNC_LEQUAL; + case GL_GREATER: + return COMPAREFUNC_GREATER; + case GL_GEQUAL: + return COMPAREFUNC_GEQUAL; + case GL_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_EQUAL: + return COMPAREFUNC_EQUAL; + case GL_ALWAYS: + return COMPAREFUNC_ALWAYS; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_ALWAYS; +} + +int +intel_translate_stencil_op(GLenum op) +{ + switch (op) { + case GL_KEEP: + return STENCILOP_KEEP; + case GL_ZERO: + return STENCILOP_ZERO; + case GL_REPLACE: + return STENCILOP_REPLACE; + case GL_INCR: + return STENCILOP_INCRSAT; + case GL_DECR: + return STENCILOP_DECRSAT; + case GL_INCR_WRAP: + return STENCILOP_INCR; + case GL_DECR_WRAP: + return STENCILOP_DECR; + case GL_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +int +intel_translate_blend_factor(GLenum factor) +{ + switch (factor) { + case GL_ZERO: + return BLENDFACT_ZERO; + case GL_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case GL_ONE: + return BLENDFACT_ONE; + case GL_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case GL_ONE_MINUS_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case GL_DST_COLOR: + return BLENDFACT_DST_COLR; + case GL_ONE_MINUS_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case GL_ONE_MINUS_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case GL_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case GL_ONE_MINUS_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case GL_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return BLENDFACT_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return BLENDFACT_CONST_ALPHA; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); + return BLENDFACT_ZERO; +} + +int +intel_translate_logic_op(GLenum opcode) +{ + switch (opcode) { + case GL_CLEAR: + return LOGICOP_CLEAR; + case GL_AND: + return LOGICOP_AND; + case GL_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case GL_COPY: + return LOGICOP_COPY; + case GL_COPY_INVERTED: + return LOGICOP_COPY_INV; + case GL_AND_INVERTED: + return LOGICOP_AND_INV; + case GL_NOOP: + return LOGICOP_NOOP; + case GL_XOR: + return LOGICOP_XOR; + case GL_OR: + return LOGICOP_OR; + case GL_OR_INVERTED: + return LOGICOP_OR_INV; + case GL_NOR: + return LOGICOP_NOR; + case GL_EQUIV: + return LOGICOP_EQUIV; + case GL_INVERT: + return LOGICOP_INV; + case GL_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case GL_NAND: + return LOGICOP_NAND; + case GL_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c index 0b2e56ab246..9657d9af959 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_syncobj.c +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -1 +1,124 @@ -../intel/intel_syncobj.c
\ No newline at end of file +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** @file intel_syncobj.c + * + * Support for ARB_sync + * + * ARB_sync is implemented by flushing the current batchbuffer and keeping a + * reference on it. We can then check for completion or wait for completion + * using the normal buffer object mechanisms. This does mean that if an + * application is using many sync objects, it will emit small batchbuffers + * which may end up being a significant overhead. In other tests of removing + * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant + * performance bottleneck, though. + */ + +#include "main/simple_list.h" +#include "main/imports.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +static struct gl_sync_object * +intel_new_sync_object(struct gl_context *ctx, GLuint id) +{ + struct intel_sync_object *sync; + + sync = calloc(1, sizeof(struct intel_sync_object)); + + return &sync->Base; +} + +static void +intel_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + drm_intel_bo_unreference(sync->bo); + free(sync); +} + +static void +intel_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, + GLenum condition, GLbitfield flags) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); + intel_batchbuffer_emit_mi_flush(intel); + + sync->bo = intel->batch.bo; + drm_intel_bo_reference(sync->bo); + + intel_flush(ctx); +} + +static void intel_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) { + s->StatusFlag = 1; + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + } +} + +/* We have nothing to do for WaitSync. Our GL command stream is sequential, + * so given that the sync object has already flushed the batchbuffer, + * any batchbuffers coming after this waitsync will naturally not occur until + * the previous one is done. + */ +static void intel_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ +} + +static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo && !drm_intel_bo_busy(sync->bo)) { + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + s->StatusFlag = 1; + } +} + +void intel_init_syncobj_functions(struct dd_function_table *functions) +{ + functions->NewSyncObject = intel_new_sync_object; + functions->DeleteSyncObject = intel_delete_sync_object; + functions->FenceSync = intel_fence_sync; + functions->CheckSync = intel_check_sync; + functions->ClientWaitSync = intel_client_wait_sync; + functions->ServerWaitSync = intel_server_wait_sync; +} diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index d77ce749a3e..24f13dfee89 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -1 +1,189 @@ -../intel/intel_tex.c
\ No newline at end of file +#include "swrast/swrast.h" +#include "main/renderbuffer.h" +#include "main/texobj.h" +#include "main/teximage.h" +#include "main/mipmap.h" +#include "drivers/common/meta.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_tex.h" +#include "intel_fbo.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static struct gl_texture_image * +intelNewTextureImage(struct gl_context * ctx) +{ + DBG("%s\n", __FUNCTION__); + (void) ctx; + return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image); +} + +static void +intelDeleteTextureImage(struct gl_context * ctx, struct gl_texture_image *img) +{ + /* nothing special (yet) for intel_texture_image */ + _mesa_delete_texture_image(ctx, img); +} + + +static struct gl_texture_object * +intelNewTextureObject(struct gl_context * ctx, GLuint name, GLenum target) +{ + struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object); + + (void) ctx; + + DBG("%s\n", __FUNCTION__); + + if (obj == NULL) + return NULL; + + _mesa_initialize_texture_object(&obj->base, name, target); + + obj->needs_validate = true; + + return &obj->base; +} + +static void +intelDeleteTextureObject(struct gl_context *ctx, + struct gl_texture_object *texObj) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + intel_miptree_release(&intelObj->mt); + _mesa_delete_texture_object(ctx, texObj); +} + +static GLboolean +intel_alloc_texture_image_buffer(struct gl_context *ctx, + struct gl_texture_image *image) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(image); + struct gl_texture_object *texobj = image->TexObject; + struct intel_texture_object *intel_texobj = intel_texture_object(texobj); + + assert(image->Border == 0); + + /* Quantize sample count */ + if (image->NumSamples) { + image->NumSamples = intel_quantize_num_samples(intel->intelScreen, image->NumSamples); + if (!image->NumSamples) + return false; + } + + /* Because the driver uses AllocTextureImageBuffer() internally, it may end + * up mismatched with FreeTextureImageBuffer(), but that is safe to call + * multiple times. + */ + ctx->Driver.FreeTextureImageBuffer(ctx, image); + + if (!_swrast_init_texture_image(image)) + return false; + + if (intel_texobj->mt && + intel_miptree_match_image(intel_texobj->mt, image)) { + intel_miptree_reference(&intel_image->mt, intel_texobj->mt); + DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n", + __FUNCTION__, texobj, image->Level, + image->Width, image->Height, image->Depth, intel_texobj->mt); + } else { + intel_image->mt = intel_miptree_create_for_teximage(intel, intel_texobj, + intel_image, + false); + + /* Even if the object currently has a mipmap tree associated + * with it, this one is a more likely candidate to represent the + * whole object since our level didn't fit what was there + * before, and any lower levels would fit into our miptree. + */ + intel_miptree_reference(&intel_texobj->mt, intel_image->mt); + + DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n", + __FUNCTION__, texobj, image->Level, + image->Width, image->Height, image->Depth, intel_image->mt); + } + + intel_texobj->needs_validate = true; + + return true; +} + +static void +intel_free_texture_image_buffer(struct gl_context * ctx, + struct gl_texture_image *texImage) +{ + struct intel_texture_image *intelImage = intel_texture_image(texImage); + + DBG("%s\n", __FUNCTION__); + + intel_miptree_release(&intelImage->mt); + + _swrast_free_texture_image_buffer(ctx, texImage); +} + +/** + * Map texture memory/buffer into user space. + * Note: the region of interest parameters are ignored here. + * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT + * \param mapOut returns start of mapping of region of interest + * \param rowStrideOut returns row stride in bytes + */ +static void +intel_map_texture_image(struct gl_context *ctx, + struct gl_texture_image *tex_image, + GLuint slice, + GLuint x, GLuint y, GLuint w, GLuint h, + GLbitfield mode, + GLubyte **map, + GLint *stride) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(tex_image); + struct intel_mipmap_tree *mt = intel_image->mt; + + /* Our texture data is always stored in a miptree. */ + assert(mt); + + /* Check that our caller wasn't confused about how to map a 1D texture. */ + assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || + h == 1); + + /* intel_miptree_map operates on a unified "slice" number that references the + * cube face, since it's all just slices to the miptree code. + */ + if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) + slice = tex_image->Face; + + intel_miptree_map(intel, mt, tex_image->Level, slice, x, y, w, h, mode, + (void **)map, stride); +} + +static void +intel_unmap_texture_image(struct gl_context *ctx, + struct gl_texture_image *tex_image, GLuint slice) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(tex_image); + struct intel_mipmap_tree *mt = intel_image->mt; + + if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) + slice = tex_image->Face; + + intel_miptree_unmap(intel, mt, tex_image->Level, slice); +} + +void +intelInitTextureFuncs(struct dd_function_table *functions) +{ + functions->NewTextureObject = intelNewTextureObject; + functions->NewTextureImage = intelNewTextureImage; + functions->DeleteTextureImage = intelDeleteTextureImage; + functions->DeleteTexture = intelDeleteTextureObject; + functions->AllocTextureImageBuffer = intel_alloc_texture_image_buffer; + functions->FreeTextureImageBuffer = intel_free_texture_image_buffer; + functions->MapTextureImage = intel_map_texture_image; + functions->UnmapTextureImage = intel_unmap_texture_image; +} diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h new file mode 100644 index 00000000000..a08fdf4aa71 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELTEX_INC +#define INTELTEX_INC + +#include "main/mtypes.h" +#include "main/formats.h" +#include "intel_context.h" + +struct intel_renderbuffer; + +void intelInitTextureFuncs(struct dd_function_table *functions); + +void intelInitTextureImageFuncs(struct dd_function_table *functions); + +void intelInitTextureSubImageFuncs(struct dd_function_table *functions); + +void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); + +void intelSetTexBuffer(__DRIcontext *pDRICtx, + GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); + +struct intel_mipmap_tree * +intel_miptree_create_for_teximage(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage, + bool expect_accelerated_upload); + +GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); + +void intel_tex_map_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level, + GLbitfield mode); + +void intel_tex_unmap_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level); + +bool +intel_tex_image_s8z24_create_renderbuffers(struct intel_context *intel, + struct intel_texture_image *image); + +bool +intel_texsubimage_tiled_memcpy(struct gl_context *ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + bool for_glTexImage); + +#endif diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c index 87196c5d1ed..d018cece1cb 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c @@ -1 +1,132 @@ -../intel/intel_tex_copy.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/mtypes.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/teximage.h" +#include "main/texstate.h" +#include "main/fbobject.h" + +#include "drivers/common/meta.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_fbo.h" +#include "intel_tex.h" +#include "intel_blit.h" +#ifndef I915 +#include "brw_context.h" +#endif + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + + +static bool +intel_copy_texsubimage(struct intel_context *intel, + struct intel_texture_image *intelImage, + GLint dstx, GLint dsty, GLint slice, + struct intel_renderbuffer *irb, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + const GLenum internalFormat = intelImage->base.Base.InternalFormat; + + intel_prepare_render(intel); + + /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if + * that renderbuffer is associated with the window system framebuffer), + * however the hardware blitter can't handle this case, so fall back to + * meta (which can, since it uses ReadPixels). + */ + if (irb->Base.Base.NumSamples != 0) + return false; + + /* glCopyTexSubImage() can't be called on a multisampled texture. */ + assert(intelImage->base.Base.NumSamples == 0); + + if (!intelImage->mt || !irb || !irb->mt) { + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + fprintf(stderr, "%s fail %p %p (0x%08x)\n", + __FUNCTION__, intelImage->mt, irb, internalFormat); + return false; + } + + /* blit from src buffer to texture */ + if (!intel_miptree_blit(intel, + irb->mt, irb->mt_level, irb->mt_layer, + x, y, irb->Base.Base.Name == 0, + intelImage->mt, intelImage->base.Base.Level, + intelImage->base.Base.Face + slice, + dstx, dsty, false, + width, height, GL_COPY)) { + return false; + } + + return true; +} + + +static void +intelCopyTexSubImage(struct gl_context *ctx, GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint slice, + struct gl_renderbuffer *rb, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct intel_context *intel = intel_context(ctx); + +#ifndef I915 + /* Try BLORP first. It can handle almost everything. */ + if (brw_blorp_copytexsubimage(intel, rb, texImage, slice, x, y, + xoffset, yoffset, width, height)) + return; +#endif + + /* Next, try the BLT engine. */ + if (intel_copy_texsubimage(intel, + intel_texture_image(texImage), + xoffset, yoffset, slice, + intel_renderbuffer(rb), x, y, width, height)) { + return; + } + + /* Finally, fall back to meta. This will likely be slow. */ + perf_debug("%s - fallback to swrast\n", __FUNCTION__); + _mesa_meta_CopyTexSubImage(ctx, dims, texImage, + xoffset, yoffset, slice, + rb, x, y, width, height); +} + + +void +intelInitTextureCopyImageFuncs(struct dd_function_table *functions) +{ + functions->CopyTexSubImage = intelCopyTexSubImage; +} diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 567abe4974e..b91b2b5dccb 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -1 +1,399 @@ -../intel/intel_tex_image.c
\ No newline at end of file + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/enums.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/formats.h" +#include "main/image.h" +#include "main/pbo.h" +#include "main/renderbuffer.h" +#include "main/texcompress.h" +#include "main/texgetimage.h" +#include "main/texobj.h" +#include "main/teximage.h" +#include "main/texstore.h" + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" +#include "intel_tex.h" +#include "intel_blit.h" +#include "intel_fbo.h" + +#ifndef I915 +#include "brw_context.h" +#endif + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/* Work back from the specified level of the image to the baselevel and create a + * miptree of that size. + */ +struct intel_mipmap_tree * +intel_miptree_create_for_teximage(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage, + bool expect_accelerated_upload) +{ + GLuint firstLevel; + GLuint lastLevel; + int width, height, depth; + GLuint i; + + intel_miptree_get_dimensions_for_image(&intelImage->base.Base, + &width, &height, &depth); + + DBG("%s\n", __FUNCTION__); + + if (intelImage->base.Base.Level > intelObj->base.BaseLevel && + (width == 1 || + (intelObj->base.Target != GL_TEXTURE_1D && height == 1) || + (intelObj->base.Target == GL_TEXTURE_3D && depth == 1))) { + /* For this combination, we're at some lower mipmap level and + * some important dimension is 1. We can't extrapolate up to a + * likely base level width/height/depth for a full mipmap stack + * from this info, so just allocate this one level. + */ + firstLevel = intelImage->base.Base.Level; + lastLevel = intelImage->base.Base.Level; + } else { + /* If this image disrespects BaseLevel, allocate from level zero. + * Usually BaseLevel == 0, so it's unlikely to happen. + */ + if (intelImage->base.Base.Level < intelObj->base.BaseLevel) + firstLevel = 0; + else + firstLevel = intelObj->base.BaseLevel; + + /* Figure out image dimensions at start level. */ + for (i = intelImage->base.Base.Level; i > firstLevel; i--) { + width <<= 1; + if (height != 1) + height <<= 1; + if (depth != 1) + depth <<= 1; + } + + /* Guess a reasonable value for lastLevel. This is probably going + * to be wrong fairly often and might mean that we have to look at + * resizable buffers, or require that buffers implement lazy + * pagetable arrangements. + */ + if ((intelObj->base.Sampler.MinFilter == GL_NEAREST || + intelObj->base.Sampler.MinFilter == GL_LINEAR) && + intelImage->base.Base.Level == firstLevel && + (intel->gen < 4 || firstLevel == 0)) { + lastLevel = firstLevel; + } else { + lastLevel = (firstLevel + + _mesa_get_tex_max_num_levels(intelObj->base.Target, + width, height, depth) - 1); + } + } + + return intel_miptree_create(intel, + intelObj->base.Target, + intelImage->base.Base.TexFormat, + firstLevel, + lastLevel, + width, + height, + depth, + expect_accelerated_upload, + intelImage->base.Base.NumSamples, + INTEL_MIPTREE_TILING_ANY); +} + +/* XXX: Do this for TexSubImage also: + */ +static bool +try_pbo_upload(struct gl_context *ctx, + struct gl_texture_image *image, + const struct gl_pixelstore_attrib *unpack, + GLenum format, GLenum type, const void *pixels) +{ + struct intel_texture_image *intelImage = intel_texture_image(image); + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); + GLuint src_offset; + drm_intel_bo *src_buffer; + + if (!_mesa_is_bufferobj(unpack->BufferObj)) + return false; + + DBG("trying pbo upload\n"); + + if (intel->ctx._ImageTransferState || + unpack->SkipPixels || unpack->SkipRows) { + DBG("%s: image transfer\n", __FUNCTION__); + return false; + } + + ctx->Driver.AllocTextureImageBuffer(ctx, image); + + if (!intelImage->mt) { + DBG("%s: no miptree\n", __FUNCTION__); + return false; + } + + if (!_mesa_format_matches_format_and_type(intelImage->mt->format, + format, type, false)) { + DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n", + __FUNCTION__, _mesa_get_format_name(intelImage->mt->format), + format, type); + return false; + } + + if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY || + image->TexObject->Target == GL_TEXTURE_2D_ARRAY) { + DBG("%s: no support for array textures\n", __FUNCTION__); + return false; + } + + src_buffer = intel_bufferobj_source(intel, pbo, 64, &src_offset); + /* note: potential 64-bit ptr to 32-bit int cast */ + src_offset += (GLuint) (unsigned long) pixels; + + int src_stride = + _mesa_image_row_stride(unpack, image->Width, format, type); + + struct intel_mipmap_tree *pbo_mt = + intel_miptree_create_for_bo(intel, + src_buffer, + intelImage->mt->format, + src_offset, + image->Width, image->Height, + src_stride, I915_TILING_NONE); + if (!pbo_mt) + return false; + + if (!intel_miptree_blit(intel, + pbo_mt, 0, 0, + 0, 0, false, + intelImage->mt, image->Level, image->Face, + 0, 0, false, + image->Width, image->Height, GL_COPY)) { + DBG("%s: blit failed\n", __FUNCTION__); + return false; + } + + intel_miptree_release(&pbo_mt); + + DBG("%s: success\n", __FUNCTION__); + return true; +} + +static void +intelTexImage(struct gl_context * ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack) +{ + bool ok; + + DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + texImage->Level, texImage->Width, texImage->Height, texImage->Depth); + + ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, + 0, 0, 0, /*x,y,z offsets*/ + texImage->Width, + texImage->Height, + texImage->Depth, + format, type, pixels, unpack, + true /*for_glTexImage*/); + if (ok) + return; + + /* Attempt to use the blitter for PBO image uploads. + */ + if (dims <= 2 && + try_pbo_upload(ctx, texImage, unpack, format, type, pixels)) { + return; + } + + DBG("%s: upload image %dx%dx%d pixels %p\n", + __FUNCTION__, texImage->Width, texImage->Height, texImage->Depth, + pixels); + + _mesa_store_teximage(ctx, dims, texImage, + format, type, pixels, unpack); +} + + +/** + * Binds a region to a texture image, like it was uploaded by glTexImage2D(). + * + * Used for GLX_EXT_texture_from_pixmap and EGL image extensions, + */ +static void +intel_set_texture_image_region(struct gl_context *ctx, + struct gl_texture_image *image, + struct intel_region *region, + GLenum target, + GLenum internalFormat, + gl_format format, + uint32_t offset, + GLuint width, + GLuint height, + GLuint tile_x, + GLuint tile_y) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(image); + struct gl_texture_object *texobj = image->TexObject; + struct intel_texture_object *intel_texobj = intel_texture_object(texobj); + bool has_surface_tile_offset = false; + uint32_t draw_x, draw_y; + + _mesa_init_teximage_fields(&intel->ctx, image, + width, height, 1, + 0, internalFormat, format); + + ctx->Driver.FreeTextureImageBuffer(ctx, image); + + intel_image->mt = intel_miptree_create_layout(intel, target, image->TexFormat, + 0, 0, + width, height, 1, + true, 0 /* num_samples */); + if (intel_image->mt == NULL) + return; + intel_region_reference(&intel_image->mt->region, region); + intel_image->mt->total_width = width; + intel_image->mt->total_height = height; + intel_image->mt->level[0].slice[0].x_offset = tile_x; + intel_image->mt->level[0].slice[0].y_offset = tile_y; + + intel_miptree_get_tile_offsets(intel_image->mt, 0, 0, &draw_x, &draw_y); +#ifndef I915 + has_surface_tile_offset = brw_context(ctx)->has_surface_tile_offset; +#endif + + /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION + * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has + * trouble resolving back to destination image due to alignment issues. + */ + if (!has_surface_tile_offset && + (draw_x != 0 || draw_y != 0)) { + _mesa_error(ctx, GL_INVALID_OPERATION, __func__); + intel_miptree_release(&intel_image->mt); + return; + } + + intel_texobj->needs_validate = true; + + intel_image->mt->offset = offset; + assert(region->pitch % region->cpp == 0); + intel_image->base.RowStride = region->pitch / region->cpp; + + /* Immediately validate the image to the object. */ + intel_miptree_reference(&intel_texobj->mt, intel_image->mt); +} + +void +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint texture_format, + __DRIdrawable *dPriv) +{ + struct gl_framebuffer *fb = dPriv->driverPrivate; + struct intel_context *intel = pDRICtx->driverPrivate; + struct gl_context *ctx = &intel->ctx; + struct intel_texture_object *intelObj; + struct intel_renderbuffer *rb; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + int level = 0, internalFormat = 0; + gl_format texFormat = MESA_FORMAT_NONE; + + texObj = _mesa_get_current_tex_object(ctx, target); + intelObj = intel_texture_object(texObj); + + if (!intelObj) + return; + + if (dPriv->lastStamp != dPriv->dri2.stamp || + !pDRICtx->driScreenPriv->dri2.useInvalidate) + intel_update_renderbuffers(pDRICtx, dPriv); + + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + /* If the region isn't set, then intel_update_renderbuffers was unable + * to get the buffers for the drawable. + */ + if (!rb || !rb->mt) + return; + + if (rb->mt->cpp == 4) { + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) { + internalFormat = GL_RGB; + texFormat = MESA_FORMAT_XRGB8888; + } + else { + internalFormat = GL_RGBA; + texFormat = MESA_FORMAT_ARGB8888; + } + } else if (rb->mt->cpp == 2) { + internalFormat = GL_RGB; + texFormat = MESA_FORMAT_RGB565; + } + + _mesa_lock_texture(&intel->ctx, texObj); + texImage = _mesa_get_tex_image(ctx, texObj, target, level); + intel_miptree_make_shareable(intel, rb->mt); + intel_set_texture_image_region(ctx, texImage, rb->mt->region, target, + internalFormat, texFormat, 0, + rb->mt->region->width, + rb->mt->region->height, + 0, 0); + _mesa_unlock_texture(&intel->ctx, texObj); +} + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); +} + +static void +intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLeglImageOES image_handle) +{ + struct intel_context *intel = intel_context(ctx); + __DRIscreen *screen; + __DRIimage *image; + + screen = intel->intelScreen->driScrnPriv; + image = screen->dri2.image->lookupEGLImage(screen, image_handle, + screen->loaderPrivate); + if (image == NULL) + return; + + /* Disallow depth/stencil textures: we don't have a way to pass the + * separate stencil miptree of a GL_DEPTH_STENCIL texture through. + */ + if (image->has_depthstencil) { + _mesa_error(ctx, GL_INVALID_OPERATION, __func__); + return; + } + + intel_set_texture_image_region(ctx, texImage, image->region, + target, image->internal_format, + image->format, image->offset, + image->width, image->height, + image->tile_x, image->tile_y); +} + +void +intelInitTextureImageFuncs(struct dd_function_table *functions) +{ + functions->TexImage = intelTexImage; + functions->EGLImageTargetTexture2D = intel_image_target_texture_2d; +} diff --git a/src/mesa/drivers/dri/i965/intel_tex_layout.c b/src/mesa/drivers/dri/i965/intel_tex_layout.c index fe61b441945..fbb6520e7a1 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_tex_layout.c +++ b/src/mesa/drivers/dri/i965/intel_tex_layout.c @@ -1 +1,214 @@ -../intel/intel_tex_layout.c
\ No newline at end of file +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + * Michel Dänzer <[email protected]> + */ + +#include "intel_mipmap_tree.h" +#include "intel_tex_layout.h" +#include "intel_context.h" + +#include "main/image.h" +#include "main/macros.h" + +static unsigned int +intel_horizontal_texture_alignment_unit(struct intel_context *intel, + gl_format format) +{ + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + * + * +----------------------------------------------------------------------+ + * | | alignment unit width ("i") | + * | Surface Property |-----------------------------| + * | | 915 | 965 | ILK | SNB | IVB | + * +----------------------------------------------------------------------+ + * | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 | + * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | + * | FXT1 compressed format | 8 | 8 | 8 | 8 | 8 | + * | Depth Buffer (16-bit) | 4 | 4 | 4 | 4 | 8 | + * | Depth Buffer (other) | 4 | 4 | 4 | 4 | 4 | + * | Separate Stencil Buffer | N/A | N/A | 8 | 8 | 8 | + * | All Others | 4 | 4 | 4 | 4 | 4 | + * +----------------------------------------------------------------------+ + * + * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE + * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8. + */ + if (_mesa_is_format_compressed(format)) { + /* The hardware alignment requirements for compressed textures + * happen to match the block boundaries. + */ + unsigned int i, j; + _mesa_get_format_block_size(format, &i, &j); + return i; + } + + if (format == MESA_FORMAT_S8) + return 8; + + /* The depth alignment requirements in the table above are for rendering to + * depth miplevels using the LOD control fields. We don't use LOD control + * fields, and instead use page offsets plus intra-tile x/y offsets, which + * require that the low 3 bits are zero. To reduce the number of x/y + * offset workaround blits we do, align the X to 8, which depth texturing + * can handle (sadly, it can't handle 8 in the Y direction). + */ + if (intel->gen >= 7 && + _mesa_get_format_base_format(format) == GL_DEPTH_COMPONENT) + return 8; + + return 4; +} + +static unsigned int +intel_vertical_texture_alignment_unit(struct intel_context *intel, + gl_format format) +{ + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + * + * +----------------------------------------------------------------------+ + * | | alignment unit height ("j") | + * | Surface Property |-----------------------------| + * | | 915 | 965 | ILK | SNB | IVB | + * +----------------------------------------------------------------------+ + * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | + * | FXT1 compressed format | 4 | 4 | 4 | 4 | 4 | + * | Depth Buffer | 2 | 2 | 2 | 4 | 4 | + * | Separate Stencil Buffer | N/A | N/A | N/A | 4 | 8 | + * | Multisampled (4x or 8x) render target | N/A | N/A | N/A | 4 | 4 | + * | All Others | 2 | 2 | 2 | 2 | 2 | + * +----------------------------------------------------------------------+ + * + * On SNB+, non-special cases can be overridden by setting the SURFACE_STATE + * "Surface Vertical Alignment" field to VALIGN_2 or VALIGN_4. + * + * We currently don't support multisampling. + */ + if (_mesa_is_format_compressed(format)) + return 4; + + if (format == MESA_FORMAT_S8) + return intel->gen >= 7 ? 8 : 4; + + GLenum base_format = _mesa_get_format_base_format(format); + + if (intel->gen >= 6 && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL)) { + return 4; + } + + return 2; +} + +void +intel_get_texture_alignment_unit(struct intel_context *intel, + gl_format format, + unsigned int *w, unsigned int *h) +{ + *w = intel_horizontal_texture_alignment_unit(intel, format); + *h = intel_vertical_texture_alignment_unit(intel, format); +} + +void i945_miptree_layout_2d(struct intel_mipmap_tree *mt) +{ + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->physical_width0; + GLuint height = mt->physical_height0; + GLuint depth = mt->physical_depth0; /* number of array layers. */ + + mt->total_width = mt->physical_width0; + + if (mt->compressed) { + mt->total_width = ALIGN(mt->physical_width0, mt->align_w); + } + + /* May need to adjust width to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) + + ALIGN(minify(mt->physical_width0, 2), mt->align_w); + } else { + mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) + + minify(mt->physical_width0, 2); + } + + if (mip1_width > mt->total_width) { + mt->total_width = mip1_width; + } + } + + mt->total_height = 0; + + for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + GLuint img_height; + + intel_miptree_set_level_info(mt, level, x, y, width, + height, depth); + + img_height = ALIGN(height, mt->align_h); + if (mt->compressed) + img_height /= mt->align_h; + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + mt->total_height = MAX2(mt->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == mt->first_level + 1) { + x += ALIGN(width, mt->align_w); + } + else { + y += img_height; + } + + width = minify(width, 1); + height = minify(height, 1); + } +} diff --git a/src/mesa/drivers/dri/i965/intel_tex_layout.h b/src/mesa/drivers/dri/i965/intel_tex_layout.h new file mode 100644 index 00000000000..f353cf4eb43 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_layout.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + * Michel Dänzer <[email protected]> + */ + +#include "main/macros.h" + +extern void i945_miptree_layout_2d(struct intel_mipmap_tree *mt); + +void +intel_get_texture_alignment_unit(struct intel_context *intel, + gl_format format, + unsigned int *w, unsigned int *h); diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h b/src/mesa/drivers/dri/i965/intel_tex_obj.h new file mode 100644 index 00000000000..e30dd8ae7a3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_TEX_OBJ_H +#define _INTEL_TEX_OBJ_H + +#include "swrast/s_context.h" + + +struct intel_texture_object +{ + struct gl_texture_object base; + + /* This is a mirror of base._MaxLevel, updated at validate time, + * except that we don't bother with the non-base levels for + * non-mipmapped textures. + */ + unsigned int _MaxLevel; + + /* On validation any active images held in main memory or in other + * regions will be copied to this region and the old storage freed. + */ + struct intel_mipmap_tree *mt; + + /** + * Set when mipmap trees in the texture images of this texture object + * might not all be the mipmap tree above. + */ + bool needs_validate; +}; + + +/** + * intel_texture_image is a subclass of swrast_texture_image because we + * sometimes fall back to using the swrast module for software rendering. + */ +struct intel_texture_image +{ + struct swrast_texture_image base; + + /* If intelImage->mt != NULL, image data is stored here. + * Else if intelImage->base.Buffer != NULL, image is stored there. + * Else there is no image data. + */ + struct intel_mipmap_tree *mt; +}; + +static INLINE struct intel_texture_object * +intel_texture_object(struct gl_texture_object *obj) +{ + return (struct intel_texture_object *) obj; +} + +static INLINE struct intel_texture_image * +intel_texture_image(struct gl_texture_image *img) +{ + return (struct intel_texture_image *) img; +} + +#endif /* _INTEL_TEX_OBJ_H */ diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index b3a8a3d7ca7..f936e9b17e6 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -1 +1,335 @@ -../intel/intel_tex_subimage.c
\ No newline at end of file + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/pbo.h" +#include "main/texobj.h" +#include "main/texstore.h" +#include "main/texcompress.h" +#include "main/enums.h" + +#include "intel_batchbuffer.h" +#include "intel_context.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" +#include "intel_blit.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static bool +intel_blit_texsubimage(struct gl_context * ctx, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, + GLint width, GLint height, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + + /* Try to do a blit upload of the subimage if the texture is + * currently busy. + */ + if (!intelImage->mt) + return false; + + /* The blitter can't handle Y tiling */ + if (intelImage->mt->region->tiling == I915_TILING_Y) + return false; + + if (texImage->TexObject->Target != GL_TEXTURE_2D) + return false; + + /* On gen6, it's probably not worth swapping to the blit ring to do + * this because of all the overhead involved. + */ + if (intel->gen >= 6) + return false; + + if (!drm_intel_bo_busy(intelImage->mt->region->bo)) + return false; + + DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n", + __FUNCTION__, + _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + texImage->Level, xoffset, yoffset, width, height); + + pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1, + format, type, pixels, packing, + "glTexSubImage"); + if (!pixels) + return false; + + struct intel_mipmap_tree *temp_mt = + intel_miptree_create(intel, GL_TEXTURE_2D, texImage->TexFormat, + 0, 0, + width, height, 1, + false, 0, INTEL_MIPTREE_TILING_NONE); + if (!temp_mt) + goto err; + + GLubyte *dst = intel_miptree_map_raw(intel, temp_mt); + if (!dst) + goto err; + + if (!_mesa_texstore(ctx, 2, texImage->_BaseFormat, + texImage->TexFormat, + temp_mt->region->pitch, + &dst, + width, height, 1, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + } + + intel_miptree_unmap_raw(intel, temp_mt); + + bool ret; + + ret = intel_miptree_blit(intel, + temp_mt, 0, 0, + 0, 0, false, + intelImage->mt, texImage->Level, texImage->Face, + xoffset, yoffset, false, + width, height, GL_COPY); + assert(ret); + + intel_miptree_release(&temp_mt); + _mesa_unmap_teximage_pbo(ctx, packing); + + return ret; + +err: + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + intel_miptree_release(&temp_mt); + _mesa_unmap_teximage_pbo(ctx, packing); + return false; +} + +/** + * \brief A fast path for glTexImage and glTexSubImage. + * + * \param for_glTexImage Was this called from glTexImage or glTexSubImage? + * + * This fast path is taken when the hardware natively supports the texture + * format (such as GL_BGRA) and when the texture memory is X-tiled. It uploads + * the texture data by mapping the texture memory without a GTT fence, thus + * acquiring a tiled view of the memory, and then memcpy'ing sucessive + * subspans within each tile. + * + * This is a performance win over the conventional texture upload path because + * it avoids the performance penalty of writing through the write-combine + * buffer. In the conventional texture upload path, + * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT + * fence, thus acquiring a linear view of the memory, then each row in the + * image is memcpy'd. In this fast path, we replace each row's memcpy with + * a sequence of memcpy's over each bit6 swizzle span in the row. + * + * This fast path's use case is Google Chrome's paint rectangles. Chrome (as + * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures. + * Each page's content is initially uploaded with glTexImage2D and damaged + * regions are updated with glTexSubImage2D. On some workloads, the + * performance gain of this fastpath on Sandybridge is over 5x. + */ +bool +intel_texsubimage_tiled_memcpy(struct gl_context * ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + bool for_glTexImage) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *image = intel_texture_image(texImage); + + /* The miptree's buffer. */ + drm_intel_bo *bo; + + int error = 0; + + /* This fastpath is restricted to a specific texture type: level 0 of + * a 2D BGRA texture. It could be generalized to support more types by + * varying the arithmetic loop below. + */ + if (!intel->has_llc || + format != GL_BGRA || + type != GL_UNSIGNED_BYTE || + texImage->TexFormat != MESA_FORMAT_ARGB8888 || + texImage->TexObject->Target != GL_TEXTURE_2D || + texImage->Level != 0 || + pixels == NULL || + _mesa_is_bufferobj(packing->BufferObj) || + packing->Alignment > 4 || + packing->SkipPixels > 0 || + packing->SkipRows > 0 || + (packing->RowLength != 0 && packing->RowLength != width) || + packing->SwapBytes || + packing->LsbFirst || + packing->Invert) + return false; + + if (for_glTexImage) + ctx->Driver.AllocTextureImageBuffer(ctx, texImage); + + if (!image->mt || + image->mt->region->tiling != I915_TILING_X) { + /* The algorithm below is written only for X-tiled memory. */ + return false; + } + + /* Since we are going to write raw data to the miptree, we need to resolve + * any pending fast color clears before we start. + */ + intel_miptree_resolve_color(intel, image->mt); + + bo = image->mt->region->bo; + + if (drm_intel_bo_references(intel->batch.bo, bo)) { + perf_debug("Flushing before mapping a referenced bo.\n"); + intel_batchbuffer_flush(intel); + } + + if (unlikely(intel->perf_debug)) { + if (drm_intel_bo_busy(bo)) { + perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); + } + } + + error = drm_intel_bo_map(bo, true /*write_enable*/); + if (error || bo->virtual == NULL) { + DBG("%s: failed to map bo\n", __FUNCTION__); + return false; + } + + /* We postponed printing this message until having committed to executing + * the function. + */ + DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d)\n", + __FUNCTION__, texImage->Level, xoffset, yoffset, width, height); + + /* In the tiling algorithm below, some variables are in units of pixels, + * others are in units of bytes, and others (such as height) are unitless. + * Each variable name is suffixed with its units. + */ + + const uint32_t x_max_pixels = xoffset + width; + const uint32_t y_max_pixels = yoffset + height; + + const uint32_t tile_size_bytes = 4096; + + const uint32_t tile_width_bytes = 512; + const uint32_t tile_width_pixels = 128; + + const uint32_t tile_height = 8; + + const uint32_t cpp = 4; /* chars per pixel of GL_BGRA */ + const uint32_t swizzle_width_pixels = 16; + + const uint32_t stride_bytes = image->mt->region->pitch; + const uint32_t width_tiles = stride_bytes / tile_width_bytes; + + for (uint32_t y_pixels = yoffset; y_pixels < y_max_pixels; ++y_pixels) { + const uint32_t y_offset_bytes = (y_pixels / tile_height) * width_tiles * tile_size_bytes + + (y_pixels % tile_height) * tile_width_bytes; + + for (uint32_t x_pixels = xoffset; x_pixels < x_max_pixels; x_pixels += swizzle_width_pixels) { + const uint32_t x_offset_bytes = (x_pixels / tile_width_pixels) * tile_size_bytes + + (x_pixels % tile_width_pixels) * cpp; + + intptr_t offset_bytes = y_offset_bytes + x_offset_bytes; + if (intel->has_swizzling) { +#if 0 + /* Clear, unoptimized version. */ + bool bit6 = (offset_bytes >> 6) & 1; + bool bit9 = (offset_bytes >> 9) & 1; + bool bit10 = (offset_bytes >> 10) & 1; + + if (bit9 ^ bit10) + offset_bytes ^= (1 << 6); +#else + /* Optimized, obfuscated version. */ + offset_bytes ^= ((offset_bytes >> 3) ^ (offset_bytes >> 4)) + & (1 << 6); +#endif + } + + const uint32_t swizzle_bound_pixels = ALIGN(x_pixels + 1, swizzle_width_pixels); + const uint32_t memcpy_bound_pixels = MIN2(x_max_pixels, swizzle_bound_pixels); + const uint32_t copy_size = cpp * (memcpy_bound_pixels - x_pixels); + + memcpy(bo->virtual + offset_bytes, pixels, copy_size); + pixels += copy_size; + x_pixels -= (x_pixels % swizzle_width_pixels); + } + } + + drm_intel_bo_unmap(bo); + return true; +} + +static void +intelTexSubImage(struct gl_context * ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing) +{ + bool ok; + + ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, + false /*for_glTexImage*/); + if (ok) + return; + + /* The intel_blit_texsubimage() function only handles 2D images */ + if (dims != 2 || !intel_blit_texsubimage(ctx, texImage, + xoffset, yoffset, + width, height, + format, type, pixels, packing)) { + _mesa_store_texsubimage(ctx, dims, texImage, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing); + } +} + +void +intelInitTextureSubImageFuncs(struct dd_function_table *functions) +{ + functions->TexSubImage = intelTexSubImage; +} diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c index 41a75674c27..a8a8647eb3c 120000..100644 --- a/src/mesa/drivers/dri/i965/intel_tex_validate.c +++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c @@ -1 +1,141 @@ -../intel/intel_tex_validate.c
\ No newline at end of file +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/samplerobj.h" +#include "main/texobj.h" + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_blit.h" +#include "intel_tex.h" +#include "intel_tex_layout.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/** + * When validating, we only care about the texture images that could + * be seen, so for non-mipmapped modes we want to ignore everything + * but BaseLevel. + */ +static void +intel_update_max_level(struct intel_texture_object *intelObj, + struct gl_sampler_object *sampler) +{ + struct gl_texture_object *tObj = &intelObj->base; + int maxlevel; + + if (sampler->MinFilter == GL_NEAREST || + sampler->MinFilter == GL_LINEAR) { + maxlevel = tObj->BaseLevel; + } else { + maxlevel = tObj->_MaxLevel; + } + + if (intelObj->_MaxLevel != maxlevel) { + intelObj->_MaxLevel = maxlevel; + intelObj->needs_validate = true; + } +} + +/* + */ +GLuint +intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + GLuint face, i; + GLuint nr_faces = 0; + struct intel_texture_image *firstImage; + int width, height, depth; + + /* TBOs require no validation -- they always just point to their BO. */ + if (tObj->Target == GL_TEXTURE_BUFFER) + return true; + + /* We know/require this is true by now: + */ + assert(intelObj->base._BaseComplete); + + /* What levels must the tree include at a minimum? + */ + intel_update_max_level(intelObj, sampler); + if (intelObj->mt && intelObj->mt->first_level != tObj->BaseLevel) + intelObj->needs_validate = true; + + if (!intelObj->needs_validate) + return true; + + firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]); + + /* Check tree can hold all active levels. Check tree matches + * target, imageFormat, etc. + * + * For pre-gen4, we have to match first_level == tObj->BaseLevel, + * because we don't have the control that gen4 does to make min/mag + * determination happen at a nonzero (hardware) baselevel. Because + * of that, we just always relayout on baselevel change. + */ + if (intelObj->mt && + (!intel_miptree_match_image(intelObj->mt, &firstImage->base.Base) || + intelObj->mt->first_level != tObj->BaseLevel || + intelObj->mt->last_level < intelObj->_MaxLevel)) { + intel_miptree_release(&intelObj->mt); + } + + + /* May need to create a new tree: + */ + if (!intelObj->mt) { + intel_miptree_get_dimensions_for_image(&firstImage->base.Base, + &width, &height, &depth); + + perf_debug("Creating new %s %dx%dx%d %d..%d miptree to handle finalized " + "texture miptree.\n", + _mesa_get_format_name(firstImage->base.Base.TexFormat), + width, height, depth, tObj->BaseLevel, intelObj->_MaxLevel); + + intelObj->mt = intel_miptree_create(intel, + intelObj->base.Target, + firstImage->base.Base.TexFormat, + tObj->BaseLevel, + intelObj->_MaxLevel, + width, + height, + depth, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_ANY); + if (!intelObj->mt) + return false; + } + + /* Pull in any images not in the object's tree: + */ + nr_faces = _mesa_num_tex_faces(intelObj->base.Target); + for (face = 0; face < nr_faces; face++) { + for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) { + struct intel_texture_image *intelImage = + intel_texture_image(intelObj->base.Image[face][i]); + /* skip too small size mipmap */ + if (intelImage == NULL) + break; + + if (intelObj->mt != intelImage->mt) { + intel_miptree_copy_teximage(intel, intelImage, intelObj->mt, + false /* invalidate */); + } + + /* After we're done, we'd better agree that our layout is + * appropriate, or we'll end up hitting this function again on the + * next draw + */ + assert(intel_miptree_match_image(intelObj->mt, &intelImage->base.Base)); + } + } + + intelObj->needs_validate = false; + + return true; +} |