diff options
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_bufmgr.h | 341 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_bufmgr.c | 374 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_bufmgr_gem.c | 3819 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_bufmgr_priv.h | 325 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_chipset.h | 469 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/libdrm_lists.h | 118 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/libdrm_macros.h | 87 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/uthash.h | 1074 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/xf86atomic.h | 117 |
9 files changed, 6724 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h new file mode 100644 index 00000000000..693472a527f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h @@ -0,0 +1,341 @@ +/* + * Copyright © 2008-2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** + * @file intel_bufmgr.h + * + * Public definitions of Intel-specific bufmgr functions. + */ + +#ifndef INTEL_BUFMGR_H +#define INTEL_BUFMGR_H + +#include <stdio.h> +#include <stdint.h> +#include <stdio.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +struct drm_clip_rect; + +typedef struct _drm_intel_bufmgr drm_intel_bufmgr; +typedef struct _drm_intel_context drm_intel_context; +typedef struct _drm_intel_bo drm_intel_bo; + +struct _drm_intel_bo { + /** + * Size in bytes of the buffer object. + * + * The size may be larger than the size originally requested for the + * allocation, such as being aligned to page size. + */ + unsigned long size; + + /** + * Alignment requirement for object + * + * Used for GTT mapping & pinning the object. + */ + unsigned long align; + + /** + * Deprecated field containing (possibly the low 32-bits of) the last + * seen virtual card address. Use offset64 instead. + */ + unsigned long offset; + + /** + * Virtual address for accessing the buffer data. Only valid while + * mapped. + */ +#ifdef __cplusplus + void *virt; +#else + void *virtual; +#endif + + /** Buffer manager context associated with this buffer object */ + drm_intel_bufmgr *bufmgr; + + /** + * MM-specific handle for accessing object + */ + int handle; + + /** + * Last seen card virtual address (offset from the beginning of the + * aperture) for the object. This should be used to fill relocation + * entries when calling drm_intel_bo_emit_reloc() + */ + uint64_t offset64; +}; + +enum aub_dump_bmp_format { + AUB_DUMP_BMP_FORMAT_8BIT = 1, + AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4, + AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6, + AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7, +}; + +typedef struct _drm_intel_aub_annotation { + uint32_t type; + uint32_t subtype; + uint32_t ending_offset; +} drm_intel_aub_annotation; + +#define BO_ALLOC_FOR_RENDER (1<<0) + +drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment); +drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment); +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, uint32_t tiling_mode, + uint32_t stride, unsigned long size, + unsigned long flags); +drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, + const char *name, + int x, int y, int cpp, + uint32_t *tiling_mode, + unsigned long *pitch, + unsigned long flags); +void drm_intel_bo_reference(drm_intel_bo *bo); +void drm_intel_bo_unreference(drm_intel_bo *bo); +int drm_intel_bo_map(drm_intel_bo *bo, int write_enable); +int drm_intel_bo_unmap(drm_intel_bo *bo); + +int drm_intel_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data); +int drm_intel_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data); +void drm_intel_bo_wait_rendering(drm_intel_bo *bo); + +void drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug); +void drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr); +int drm_intel_bo_exec(drm_intel_bo *bo, int used, + struct drm_clip_rect *cliprects, int num_cliprects, int DR4); +int drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used, + struct drm_clip_rect *cliprects, int num_cliprects, int DR4, + unsigned int flags); +int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count); + +int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain); +int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain); +int drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment); +int drm_intel_bo_unpin(drm_intel_bo *bo); +int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride); +int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode); +int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name); +int drm_intel_bo_busy(drm_intel_bo *bo); +int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); +int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable); +int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset); + +int drm_intel_bo_disable_reuse(drm_intel_bo *bo); +int drm_intel_bo_is_reusable(drm_intel_bo *bo); +int drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo); + +/* drm_intel_bufmgr_gem.c */ +drm_intel_bufmgr *drm_intel_bufmgr_gem_init(int fd, int batch_size); +drm_intel_bo *drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned int handle); +void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr); +void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr); +void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, + int limit); +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo); +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo); +int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); + +#define HAVE_DRM_INTEL_GEM_BO_DISABLE_IMPLICIT_SYNC 1 +int drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr); +void drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo); +void drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo); + +void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo); +void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo); +void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo); + +int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo); +void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); +void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); + +void +drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, + const char *filename); +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable); +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset); +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count); + +int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id); + +int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total); +int drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns); + +drm_intel_context *drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_context_get_id(drm_intel_context *ctx, + uint32_t *ctx_id); +void drm_intel_gem_context_destroy(drm_intel_context *ctx); +int drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags); +int drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags); + +int drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd); +drm_intel_bo *drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, + int prime_fd, int size); + +/* drm_intel_bufmgr_fake.c */ +drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd, + unsigned long low_offset, + void *low_virtual, + unsigned long size, + volatile unsigned int + *last_dispatch); +void drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr, + volatile unsigned int + *last_dispatch); +void drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr, + int (*exec) (drm_intel_bo *bo, + unsigned int used, + void *priv), + void *priv); +void drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr, + unsigned int (*emit) (void *priv), + void (*wait) (unsigned int fence, + void *priv), + void *priv); +drm_intel_bo *drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long offset, + unsigned long size, void *virt); +void drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo, + void (*invalidate_cb) (drm_intel_bo + * bo, + void *ptr), + void *ptr); + +void drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr); +void drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr); + +struct drm_intel_decode *drm_intel_decode_context_alloc(uint32_t devid); +void drm_intel_decode_context_free(struct drm_intel_decode *ctx); +void drm_intel_decode_set_batch_pointer(struct drm_intel_decode *ctx, + void *data, uint32_t hw_offset, + int count); +void drm_intel_decode_set_dump_past_end(struct drm_intel_decode *ctx, + int dump_past_end); +void drm_intel_decode_set_head_tail(struct drm_intel_decode *ctx, + uint32_t head, uint32_t tail); +void drm_intel_decode_set_output_file(struct drm_intel_decode *ctx, FILE *out); +void drm_intel_decode(struct drm_intel_decode *ctx); + +int drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result); + +int drm_intel_get_reset_stats(drm_intel_context *ctx, + uint32_t *reset_count, + uint32_t *active, + uint32_t *pending); + +int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total); +int drm_intel_get_eu_total(int fd, unsigned int *eu_total); + +int drm_intel_get_pooled_eu(int fd); +int drm_intel_get_min_eu_in_pool(int fd); + +/** @{ Compatibility defines to keep old code building despite the symbol rename + * from dri_* to drm_intel_* + */ +#define dri_bo drm_intel_bo +#define dri_bufmgr drm_intel_bufmgr +#define dri_bo_alloc drm_intel_bo_alloc +#define dri_bo_reference drm_intel_bo_reference +#define dri_bo_unreference drm_intel_bo_unreference +#define dri_bo_map drm_intel_bo_map +#define dri_bo_unmap drm_intel_bo_unmap +#define dri_bo_subdata drm_intel_bo_subdata +#define dri_bo_get_subdata drm_intel_bo_get_subdata +#define dri_bo_wait_rendering drm_intel_bo_wait_rendering +#define dri_bufmgr_set_debug drm_intel_bufmgr_set_debug +#define dri_bufmgr_destroy drm_intel_bufmgr_destroy +#define dri_bo_exec drm_intel_bo_exec +#define dri_bufmgr_check_aperture_space drm_intel_bufmgr_check_aperture_space +#define dri_bo_emit_reloc(reloc_bo, read, write, target_offset, \ + reloc_offset, target_bo) \ + drm_intel_bo_emit_reloc(reloc_bo, reloc_offset, \ + target_bo, target_offset, \ + read, write); +#define dri_bo_pin drm_intel_bo_pin +#define dri_bo_unpin drm_intel_bo_unpin +#define dri_bo_get_tiling drm_intel_bo_get_tiling +#define dri_bo_set_tiling(bo, mode) drm_intel_bo_set_tiling(bo, mode, 0) +#define dri_bo_flink drm_intel_bo_flink +#define intel_bufmgr_gem_init drm_intel_bufmgr_gem_init +#define intel_bo_gem_create_from_name drm_intel_bo_gem_create_from_name +#define intel_bufmgr_gem_enable_reuse drm_intel_bufmgr_gem_enable_reuse +#define intel_bufmgr_fake_init drm_intel_bufmgr_fake_init +#define intel_bufmgr_fake_set_last_dispatch drm_intel_bufmgr_fake_set_last_dispatch +#define intel_bufmgr_fake_set_exec_callback drm_intel_bufmgr_fake_set_exec_callback +#define intel_bufmgr_fake_set_fence_callback drm_intel_bufmgr_fake_set_fence_callback +#define intel_bo_fake_alloc_static drm_intel_bo_fake_alloc_static +#define intel_bo_fake_disable_backing_store drm_intel_bo_fake_disable_backing_store +#define intel_bufmgr_fake_contended_lock_take drm_intel_bufmgr_fake_contended_lock_take +#define intel_bufmgr_fake_evict_all drm_intel_bufmgr_fake_evict_all + +/** @{ */ + +#if defined(__cplusplus) +} +#endif + +#endif /* INTEL_BUFMGR_H */ diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr.c b/src/mesa/drivers/dri/i965/intel_bufmgr.c new file mode 100644 index 00000000000..a285340039f --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr.c @@ -0,0 +1,374 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <assert.h> +#include <errno.h> +#include <drm.h> +#include <i915_drm.h> +#include <pciaccess.h> +#include "libdrm_macros.h" +#include "intel_bufmgr.h" +#include "intel_bufmgr_priv.h" +#include "xf86drm.h" + +/** @file intel_bufmgr.c + * + * Convenience functions for buffer management methods. + */ + +drm_intel_bo * +drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return bufmgr->bo_alloc(bufmgr, name, size, alignment); +} + +drm_intel_bo * +drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment); +} + +drm_intel_bo * +drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + if (bufmgr->bo_alloc_userptr) + return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode, + stride, size, flags); + return NULL; +} + +drm_intel_bo * +drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, + int x, int y, int cpp, uint32_t *tiling_mode, + unsigned long *pitch, unsigned long flags) +{ + return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, + tiling_mode, pitch, flags); +} + +void +drm_intel_bo_reference(drm_intel_bo *bo) +{ + bo->bufmgr->bo_reference(bo); +} + +void +drm_intel_bo_unreference(drm_intel_bo *bo) +{ + if (bo == NULL) + return; + + bo->bufmgr->bo_unreference(bo); +} + +int +drm_intel_bo_map(drm_intel_bo *buf, int write_enable) +{ + return buf->bufmgr->bo_map(buf, write_enable); +} + +int +drm_intel_bo_unmap(drm_intel_bo *buf) +{ + return buf->bufmgr->bo_unmap(buf); +} + +int +drm_intel_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + return bo->bufmgr->bo_subdata(bo, offset, size, data); +} + +int +drm_intel_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data) +{ + int ret; + if (bo->bufmgr->bo_get_subdata) + return bo->bufmgr->bo_get_subdata(bo, offset, size, data); + + if (size == 0 || data == NULL) + return 0; + + ret = drm_intel_bo_map(bo, 0); + if (ret) + return ret; + memcpy(data, (unsigned char *)bo->virtual + offset, size); + drm_intel_bo_unmap(bo); + return 0; +} + +void +drm_intel_bo_wait_rendering(drm_intel_bo *bo) +{ + bo->bufmgr->bo_wait_rendering(bo); +} + +void +drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr) +{ + bufmgr->destroy(bufmgr); +} + +int +drm_intel_bo_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t * cliprects, int num_cliprects, int DR4) +{ + return bo->bufmgr->bo_exec(bo, used, cliprects, num_cliprects, DR4); +} + +int +drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int rings) +{ + if (bo->bufmgr->bo_mrb_exec) + return bo->bufmgr->bo_mrb_exec(bo, used, + cliprects, num_cliprects, DR4, + rings); + + switch (rings) { + case I915_EXEC_DEFAULT: + case I915_EXEC_RENDER: + return bo->bufmgr->bo_exec(bo, used, + cliprects, num_cliprects, DR4); + default: + return -ENODEV; + } +} + +void +drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug) +{ + bufmgr->debug = enable_debug; +} + +int +drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count) +{ + return bo_array[0]->bufmgr->check_aperture_space(bo_array, count); +} + +int +drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name) +{ + if (bo->bufmgr->bo_flink) + return bo->bufmgr->bo_flink(bo, name); + + return -ENODEV; +} + +int +drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return bo->bufmgr->bo_emit_reloc(bo, offset, + target_bo, target_offset, + read_domains, write_domain); +} + +/* For fence registers, not GL fences */ +int +drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return bo->bufmgr->bo_emit_reloc_fence(bo, offset, + target_bo, target_offset, + read_domains, write_domain); +} + + +int +drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment) +{ + if (bo->bufmgr->bo_pin) + return bo->bufmgr->bo_pin(bo, alignment); + + return -ENODEV; +} + +int +drm_intel_bo_unpin(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_unpin) + return bo->bufmgr->bo_unpin(bo); + + return -ENODEV; +} + +int +drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride) +{ + if (bo->bufmgr->bo_set_tiling) + return bo->bufmgr->bo_set_tiling(bo, tiling_mode, stride); + + *tiling_mode = I915_TILING_NONE; + return 0; +} + +int +drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode) +{ + if (bo->bufmgr->bo_get_tiling) + return bo->bufmgr->bo_get_tiling(bo, tiling_mode, swizzle_mode); + + *tiling_mode = I915_TILING_NONE; + *swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + return 0; +} + +int +drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + if (bo->bufmgr->bo_set_softpin_offset) + return bo->bufmgr->bo_set_softpin_offset(bo, offset); + + return -ENODEV; +} + +int +drm_intel_bo_disable_reuse(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_disable_reuse) + return bo->bufmgr->bo_disable_reuse(bo); + return 0; +} + +int +drm_intel_bo_is_reusable(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_is_reusable) + return bo->bufmgr->bo_is_reusable(bo); + return 0; +} + +int +drm_intel_bo_busy(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_busy) + return bo->bufmgr->bo_busy(bo); + return 0; +} + +int +drm_intel_bo_madvise(drm_intel_bo *bo, int madv) +{ + if (bo->bufmgr->bo_madvise) + return bo->bufmgr->bo_madvise(bo, madv); + return -1; +} + +int +drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) +{ + if (bo->bufmgr->bo_use_48b_address_range) { + bo->bufmgr->bo_use_48b_address_range(bo, enable); + return 0; + } + + return -ENODEV; +} + +int +drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + return bo->bufmgr->bo_references(bo, target_bo); +} + +int +drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) +{ + if (bufmgr->get_pipe_from_crtc_id) + return bufmgr->get_pipe_from_crtc_id(bufmgr, crtc_id); + return -1; +} + +static size_t +drm_intel_probe_agp_aperture_size(int fd) +{ + struct pci_device *pci_dev; + size_t size = 0; + int ret; + + ret = pci_system_init(); + if (ret) + goto err; + + /* XXX handle multiple adaptors? */ + pci_dev = pci_device_find_by_slot(0, 0, 2, 0); + if (pci_dev == NULL) + goto err; + + ret = pci_device_probe(pci_dev); + if (ret) + goto err; + + size = pci_dev->regions[2].size; +err: + pci_system_cleanup (); + return size; +} + +int +drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total) +{ + + struct drm_i915_gem_get_aperture aperture; + int ret; + + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret) + return ret; + + *mappable = 0; + /* XXX add a query for the kernel value? */ + if (*mappable == 0) + *mappable = drm_intel_probe_agp_aperture_size(fd); + if (*mappable == 0) + *mappable = 64 * 1024 * 1024; /* minimum possible value */ + *total = aperture.aper_size; + return 0; +} diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c new file mode 100644 index 00000000000..5a591b2e9cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_gem.c @@ -0,0 +1,3819 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007-2012 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <[email protected]> + * Dave Airlie <[email protected]> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xf86drm.h> +#include <xf86atomic.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <pthread.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <stdbool.h> + +#include "errno.h" +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif +#include "libdrm_macros.h" +#include "libdrm_lists.h" +#include "intel_bufmgr.h" +#include "intel_bufmgr_priv.h" +#include "intel_chipset.h" +#include "string.h" + +#include "i915_drm.h" +#include "uthash.h" + +#ifdef HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#define memclear(s) memset(&s, 0, sizeof(s)) + +#define DBG(...) do { \ + if (bufmgr_gem->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define MAX2(A, B) ((A) > (B) ? (A) : (B)) + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((__u32)(n)) + +typedef struct _drm_intel_bo_gem drm_intel_bo_gem; + +struct drm_intel_gem_bo_bucket { + drmMMListHead head; + unsigned long size; +}; + +typedef struct _drm_intel_bufmgr_gem { + drm_intel_bufmgr bufmgr; + + atomic_t refcount; + + int fd; + + int max_relocs; + + pthread_mutex_t lock; + + struct drm_i915_gem_exec_object *exec_objects; + struct drm_i915_gem_exec_object2 *exec2_objects; + drm_intel_bo **exec_bos; + int exec_size; + int exec_count; + + /** Array of lists of cached gem objects of power-of-two sizes */ + struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; + int num_buckets; + time_t time; + + drmMMListHead managers; + + drm_intel_bo_gem *name_table; + drm_intel_bo_gem *handle_table; + + drmMMListHead vma_cache; + int vma_count, vma_open, vma_max; + + uint64_t gtt_size; + int available_fences; + int pci_device; + int gen; + unsigned int has_bsd : 1; + unsigned int has_blt : 1; + unsigned int has_relaxed_fencing : 1; + unsigned int has_llc : 1; + unsigned int has_wait_timeout : 1; + unsigned int bo_reuse : 1; + unsigned int no_exec : 1; + unsigned int has_vebox : 1; + unsigned int has_exec_async : 1; + bool fenced_relocs; + + struct { + void *ptr; + uint32_t handle; + } userptr_active; + +} drm_intel_bufmgr_gem; + +#define DRM_INTEL_RELOC_FENCE (1<<0) + +typedef struct _drm_intel_reloc_target_info { + drm_intel_bo *bo; + int flags; +} drm_intel_reloc_target; + +struct _drm_intel_bo_gem { + drm_intel_bo bo; + + atomic_t refcount; + uint32_t gem_handle; + const char *name; + + /** + * Kenel-assigned global name for this object + * + * List contains both flink named and prime fd'd objects + */ + unsigned int global_name; + + UT_hash_handle handle_hh; + UT_hash_handle name_hh; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** + * Current tiling mode + */ + uint32_t tiling_mode; + uint32_t swizzle_mode; + unsigned long stride; + + unsigned long kflags; + + time_t free_time; + + /** Array passed to the DRM containing relocation information. */ + struct drm_i915_gem_relocation_entry *relocs; + /** + * Array of info structs corresponding to relocs[i].target_handle etc + */ + drm_intel_reloc_target *reloc_target_info; + /** Number of entries in relocs */ + int reloc_count; + /** Array of BOs that are referenced by this buffer and will be softpinned */ + drm_intel_bo **softpin_target; + /** Number softpinned BOs that are referenced by this buffer */ + int softpin_target_count; + /** Maximum amount of softpinned BOs that are referenced by this buffer */ + int softpin_target_size; + + /** Mapped address for the buffer, saved across map/unmap cycles */ + void *mem_virtual; + /** GTT virtual address for the buffer, saved across map/unmap cycles */ + void *gtt_virtual; + /** WC CPU address for the buffer, saved across map/unmap cycles */ + void *wc_virtual; + /** + * Virtual address of the buffer allocated by user, used for userptr + * objects only. + */ + void *user_virtual; + int map_count; + drmMMListHead vma_list; + + /** BO cache list */ + drmMMListHead head; + + /** + * Boolean of whether this BO and its children have been included in + * the current drm_intel_bufmgr_check_aperture_space() total. + */ + bool included_in_check_aperture; + + /** + * Boolean of whether this buffer has been used as a relocation + * target and had its size accounted for, and thus can't have any + * further relocations added to it. + */ + bool used_as_reloc_target; + + /** + * Boolean of whether we have encountered an error whilst building the relocation tree. + */ + bool has_error; + + /** + * Boolean of whether this buffer can be re-used + */ + bool reusable; + + /** + * Boolean of whether the GPU is definitely not accessing the buffer. + * + * This is only valid when reusable, since non-reusable + * buffers are those that have been shared with other + * processes, so we don't know their state. + */ + bool idle; + + /** + * Boolean of whether this buffer was allocated with userptr + */ + bool is_userptr; + + /** + * Size in bytes of this buffer and its relocation descendents. + * + * Used to avoid costly tree walking in + * drm_intel_bufmgr_check_aperture in the common case. + */ + int reloc_tree_size; + + /** + * Number of potential fence registers required by this buffer and its + * relocations. + */ + int reloc_tree_fences; + + /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ + bool mapped_cpu_write; +}; + +static unsigned int +drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); + +static unsigned int +drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); + +static int +drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode); + +static int +drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, + uint32_t tiling_mode, + uint32_t stride); + +static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, + time_t time); + +static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); + +static void drm_intel_gem_bo_free(drm_intel_bo *bo); + +static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) +{ + return (drm_intel_bo_gem *)bo; +} + +static unsigned long +drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, + uint32_t *tiling_mode) +{ + unsigned long min_size, max_size; + unsigned long i; + + if (*tiling_mode == I915_TILING_NONE) + return size; + + /* 965+ just need multiples of page size for tiling */ + if (bufmgr_gem->gen >= 4) + return ROUND_UP_TO(size, 4096); + + /* Older chips need powers of two, of at least 512k or 1M */ + if (bufmgr_gem->gen == 3) { + min_size = 1024*1024; + max_size = 128*1024*1024; + } else { + min_size = 512*1024; + max_size = 64*1024*1024; + } + + if (size > max_size) { + *tiling_mode = I915_TILING_NONE; + return size; + } + + /* Do we need to allocate every page for the fence? */ + if (bufmgr_gem->has_relaxed_fencing) + return ROUND_UP_TO(size, 4096); + + for (i = min_size; i < size; i <<= 1) + ; + + return i; +} + +/* + * Round a given pitch up to the minimum required for X tiling on a + * given chip. We use 512 as the minimum to allow for a later tiling + * change. + */ +static unsigned long +drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, + unsigned long pitch, uint32_t *tiling_mode) +{ + unsigned long tile_width; + unsigned long i; + + /* If untiled, then just align it so that we can do rendering + * to it with the 3D engine. + */ + if (*tiling_mode == I915_TILING_NONE) + return ALIGN(pitch, 64); + + if (*tiling_mode == I915_TILING_X + || (IS_915(bufmgr_gem->pci_device) + && *tiling_mode == I915_TILING_Y)) + tile_width = 512; + else + tile_width = 128; + + /* 965 is flexible */ + if (bufmgr_gem->gen >= 4) + return ROUND_UP_TO(pitch, tile_width); + + /* The older hardware has a maximum pitch of 8192 with tiled + * surfaces, so fallback to untiled if it's too large. + */ + if (pitch > 8192) { + *tiling_mode = I915_TILING_NONE; + return ALIGN(pitch, 64); + } + + /* Pre-965 needs power of two tile width */ + for (i = tile_width; i < pitch; i <<= 1) + ; + + return i; +} + +static struct drm_intel_gem_bo_bucket * +drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, + unsigned long size) +{ + int i; + + for (i = 0; i < bufmgr_gem->num_buckets; i++) { + struct drm_intel_gem_bo_bucket *bucket = + &bufmgr_gem->cache_bucket[i]; + if (bucket->size >= size) { + return bucket; + } + } + + return NULL; +} + +static void +drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int i, j; + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { + DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name); + continue; + } + + for (j = 0; j < bo_gem->reloc_count; j++) { + drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; + drm_intel_bo_gem *target_gem = + (drm_intel_bo_gem *) target_bo; + + DBG("%2d: %d %s(%s)@0x%08x %08x -> " + "%d (%s)@0x%08x %08x + 0x%08x\n", + i, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + upper_32_bits(bo_gem->relocs[j].offset), + lower_32_bits(bo_gem->relocs[j].offset), + target_gem->gem_handle, + target_gem->name, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64), + bo_gem->relocs[j].delta); + } + + for (j = 0; j < bo_gem->softpin_target_count; j++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[j]; + drm_intel_bo_gem *target_gem = + (drm_intel_bo_gem *) target_bo; + DBG("%2d: %d %s(%s) -> " + "%d *(%s)@0x%08x %08x\n", + i, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + target_gem->gem_handle, + target_gem->name, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64)); + } + } +} + +static inline void +drm_intel_gem_bo_reference(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + atomic_inc(&bo_gem->refcount); +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +drm_intel_add_validate_buffer(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int index; + + if (bo_gem->validate_index != -1) + return; + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { + int new_size = bufmgr_gem->exec_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_gem->exec_objects = + realloc(bufmgr_gem->exec_objects, + sizeof(*bufmgr_gem->exec_objects) * new_size); + bufmgr_gem->exec_bos = + realloc(bufmgr_gem->exec_bos, + sizeof(*bufmgr_gem->exec_bos) * new_size); + bufmgr_gem->exec_size = new_size; + } + + index = bufmgr_gem->exec_count; + bo_gem->validate_index = index; + /* Fill in array entry */ + bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; + bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; + bufmgr_gem->exec_objects[index].alignment = bo->align; + bufmgr_gem->exec_objects[index].offset = 0; + bufmgr_gem->exec_bos[index] = bo; + bufmgr_gem->exec_count++; +} + +static void +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + int index; + unsigned long flags; + + flags = 0; + if (need_fence) + flags |= EXEC_OBJECT_NEEDS_FENCE; + + if (bo_gem->validate_index != -1) { + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; + return; + } + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { + int new_size = bufmgr_gem->exec_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_gem->exec2_objects = + realloc(bufmgr_gem->exec2_objects, + sizeof(*bufmgr_gem->exec2_objects) * new_size); + bufmgr_gem->exec_bos = + realloc(bufmgr_gem->exec_bos, + sizeof(*bufmgr_gem->exec_bos) * new_size); + bufmgr_gem->exec_size = new_size; + } + + index = bufmgr_gem->exec_count; + bo_gem->validate_index = index; + /* Fill in array entry */ + bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; + bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->exec2_objects[index].alignment = bo->align; + bufmgr_gem->exec2_objects[index].offset = bo->offset64; + bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; + bufmgr_gem->exec2_objects[index].rsvd1 = 0; + bufmgr_gem->exec2_objects[index].rsvd2 = 0; + bufmgr_gem->exec_bos[index] = bo; + bufmgr_gem->exec_count++; +} + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static void +drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem, + unsigned int alignment) +{ + unsigned int size; + + assert(!bo_gem->used_as_reloc_target); + + /* The older chipsets are far-less flexible in terms of tiling, + * and require tiled buffer to be size aligned in the aperture. + * This means that in the worst possible case we will need a hole + * twice as large as the object in order for it to fit into the + * aperture. Optimal packing is for wimps. + */ + size = bo_gem->bo.size; + if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { + unsigned int min_size; + + if (bufmgr_gem->has_relaxed_fencing) { + if (bufmgr_gem->gen == 3) + min_size = 1024*1024; + else + min_size = 512*1024; + + while (min_size < size) + min_size *= 2; + } else + min_size = size; + + /* Account for worst-case alignment. */ + alignment = MAX2(alignment, min_size); + } + + bo_gem->reloc_tree_size = size + alignment; +} + +static int +drm_intel_setup_reloc_list(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + unsigned int max_relocs = bufmgr_gem->max_relocs; + + if (bo->size / 4 < max_relocs) + max_relocs = bo->size / 4; + + bo_gem->relocs = malloc(max_relocs * + sizeof(struct drm_i915_gem_relocation_entry)); + bo_gem->reloc_target_info = malloc(max_relocs * + sizeof(drm_intel_reloc_target)); + if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { + bo_gem->has_error = true; + + free (bo_gem->relocs); + bo_gem->relocs = NULL; + + free (bo_gem->reloc_target_info); + bo_gem->reloc_target_info = NULL; + + return 1; + } + + return 0; +} + +static int +drm_intel_gem_bo_busy(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_busy busy; + int ret; + + if (bo_gem->reusable && bo_gem->idle) + return false; + + memclear(busy); + busy.handle = bo_gem->gem_handle; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + if (ret == 0) { + bo_gem->idle = !busy.busy; + return busy.busy; + } else { + return false; + } + return (ret == 0 && busy.busy); +} + +static int +drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem, int state) +{ + struct drm_i915_gem_madvise madv; + + memclear(madv); + madv.handle = bo_gem->gem_handle; + madv.madv = state; + madv.retained = 1; + drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); + + return madv.retained; +} + +static int +drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) +{ + return drm_intel_gem_bo_madvise_internal + ((drm_intel_bufmgr_gem *) bo->bufmgr, + (drm_intel_bo_gem *) bo, + madv); +} + +/* drop the oldest entries that have been purged by the kernel */ +static void +drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, + struct drm_intel_gem_bo_bucket *bucket) +{ + while (!DRMLISTEMPTY(&bucket->head)) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + if (drm_intel_gem_bo_madvise_internal + (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) + break; + + DRMLISTDEL(&bo_gem->head); + drm_intel_gem_bo_free(&bo_gem->bo); + } +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned long flags, + uint32_t tiling_mode, + unsigned long stride, + unsigned int alignment) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + unsigned int page_size = getpagesize(); + int ret; + struct drm_intel_gem_bo_bucket *bucket; + bool alloc_from_cache; + unsigned long bo_size; + bool for_render = false; + + if (flags & BO_ALLOC_FOR_RENDER) + for_render = true; + + /* Round the allocated size up to a power of two number of pages. */ + bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL) { + bo_size = size; + if (bo_size < page_size) + bo_size = page_size; + } else { + bo_size = bucket->size; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Get a buffer out of the cache if available */ +retry: + alloc_from_cache = false; + if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { + if (for_render) { + /* Allocate new render-target BOs from the tail (MRU) + * of the list, as it will likely be hot in the GPU + * cache and in the aperture for us. + */ + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.prev, head); + DRMLISTDEL(&bo_gem->head); + alloc_from_cache = true; + bo_gem->bo.align = alignment; + } else { + assert(alignment == 0); + /* For non-render-target BOs (where we're probably + * going to map it first thing in order to fill it + * with data), check if the last BO in the cache is + * unbusy, and only reuse in that case. Otherwise, + * allocating a new buffer is probably faster than + * waiting for the GPU to finish. + */ + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { + alloc_from_cache = true; + DRMLISTDEL(&bo_gem->head); + } + } + + if (alloc_from_cache) { + if (!drm_intel_gem_bo_madvise_internal + (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { + drm_intel_gem_bo_free(&bo_gem->bo); + drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, + bucket); + goto retry; + } + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) { + drm_intel_gem_bo_free(&bo_gem->bo); + goto retry; + } + } + } + + if (!alloc_from_cache) { + struct drm_i915_gem_create create; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto err; + + /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized + list (vma_list), so better set the list head here */ + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = bo_size; + + memclear(create); + create.size = bo_size; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_CREATE, + &create); + if (ret != 0) { + free(bo_gem); + goto err; + } + + bo_gem->gem_handle = create.handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->bo.align = alignment; + + bo_gem->tiling_mode = I915_TILING_NONE; + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->stride = 0; + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) + goto err_free; + } + + bo_gem->name = name; + atomic_set(&bo_gem->refcount, 1); + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = true; + + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); + pthread_mutex_unlock(&bufmgr_gem->lock); + + DBG("bo_create: buf %d (%s) %ldb\n", + bo_gem->gem_handle, bo_gem->name, size); + + return &bo_gem->bo; + +err_free: + drm_intel_gem_bo_free(&bo_gem->bo); +err: + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment) +{ + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, + BO_ALLOC_FOR_RENDER, + I915_TILING_NONE, 0, + alignment); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment) +{ + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, + I915_TILING_NONE, 0, 0); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, + int x, int y, int cpp, uint32_t *tiling_mode, + unsigned long *pitch, unsigned long flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + unsigned long size, stride; + uint32_t tiling; + + do { + unsigned long aligned_y, height_alignment; + + tiling = *tiling_mode; + + /* If we're tiled, our allocations are in 8 or 32-row blocks, + * so failure to align our height means that we won't allocate + * enough pages. + * + * If we're untiled, we still have to align to 2 rows high + * because the data port accesses 2x2 blocks even if the + * bottom row isn't to be rendered, so failure to align means + * we could walk off the end of the GTT and fault. This is + * documented on 965, and may be the case on older chipsets + * too so we try to be careful. + */ + aligned_y = y; + height_alignment = 2; + + if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) + height_alignment = 16; + else if (tiling == I915_TILING_X + || (IS_915(bufmgr_gem->pci_device) + && tiling == I915_TILING_Y)) + height_alignment = 8; + else if (tiling == I915_TILING_Y) + height_alignment = 32; + aligned_y = ALIGN(y, height_alignment); + + stride = x * cpp; + stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); + size = stride * aligned_y; + size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); + } while (*tiling_mode != tiling); + *pitch = stride; + + if (tiling == I915_TILING_NONE) + stride = 0; + + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, + tiling, stride, 0); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + int ret; + struct drm_i915_gem_userptr userptr; + + /* Tiling with userptr surfaces is not supported + * on all hardware so refuse it for time being. + */ + if (tiling_mode != I915_TILING_NONE) + return NULL; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = size; + + memclear(userptr); + userptr.user_ptr = (__u64)((unsigned long)addr); + userptr.user_size = size; + userptr.flags = flags; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_USERPTR, + &userptr); + if (ret != 0) { + DBG("bo_create_userptr: " + "ioctl failed with user ptr %p size 0x%lx, " + "user flags 0x%lx\n", addr, size, flags); + free(bo_gem); + return NULL; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + + bo_gem->gem_handle = userptr.handle; + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->is_userptr = true; + bo_gem->bo.virtual = addr; + /* Save the address provided by user */ + bo_gem->user_virtual = addr; + bo_gem->tiling_mode = I915_TILING_NONE; + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->stride = 0; + + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->name = name; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + pthread_mutex_unlock(&bufmgr_gem->lock); + + DBG("bo_create_userptr: " + "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", + addr, bo_gem->gem_handle, bo_gem->name, + size, stride, tiling_mode); + + return &bo_gem->bo; +} + +static bool +has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int ret; + void *ptr; + long pgsz; + struct drm_i915_gem_userptr userptr; + + pgsz = sysconf(_SC_PAGESIZE); + assert(pgsz > 0); + + ret = posix_memalign(&ptr, pgsz, pgsz); + if (ret) { + DBG("Failed to get a page (%ld) for userptr detection!\n", + pgsz); + return false; + } + + memclear(userptr); + userptr.user_ptr = (__u64)(unsigned long)ptr; + userptr.user_size = pgsz; + +retry: + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret) { + if (errno == ENODEV && userptr.flags == 0) { + userptr.flags = I915_USERPTR_UNSYNCHRONIZED; + goto retry; + } + free(ptr); + return false; + } + + /* We don't release the userptr bo here as we want to keep the + * kernel mm tracking alive for our lifetime. The first time we + * create a userptr object the kernel has to install a mmu_notifer + * which is a heavyweight operation (e.g. it requires taking all + * mm_locks and stop_machine()). + */ + + bufmgr_gem->userptr_active.ptr = ptr; + bufmgr_gem->userptr_active.handle = userptr.handle; + + return true; +} + +static drm_intel_bo * +check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) + bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; + else + bufmgr->bo_alloc_userptr = NULL; + + return drm_intel_bo_alloc_userptr(bufmgr, name, addr, + tiling_mode, stride, size, flags); +} + +/** + * Returns a drm_intel_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +drm_intel_bo * +drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned int handle) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + int ret; + struct drm_gem_open open_arg; + struct drm_i915_gem_get_tiling get_tiling; + + /* At the moment most applications only have a few named bo. + * For instance, in a DRI client only the render buffers passed + * between X and the client are named. And since X returns the + * alternating names for the front/back buffer a linear search + * provides a sufficiently fast match. + */ + pthread_mutex_lock(&bufmgr_gem->lock); + HASH_FIND(name_hh, bufmgr_gem->name_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + memclear(open_arg); + open_arg.name = handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_GEM_OPEN, + &open_arg); + if (ret != 0) { + DBG("Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(errno)); + bo_gem = NULL; + goto out; + } + /* Now see if someone has used a prime handle to get this + * object from the kernel before by looking through the list + * again for a matching gem_handle + */ + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &open_arg.handle, sizeof(open_arg.handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = open_arg.size; + bo_gem->bo.offset = 0; + bo_gem->bo.offset64 = 0; + bo_gem->bo.virtual = NULL; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->name = name; + bo_gem->validate_index = -1; + bo_gem->gem_handle = open_arg.handle; + bo_gem->bo.handle = open_arg.handle; + bo_gem->global_name = handle; + bo_gem->reusable = false; + + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), bo_gem); + + memclear(get_tiling); + get_tiling.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling); + if (ret != 0) + goto err_unref; + + bo_gem->tiling_mode = get_tiling.tiling_mode; + bo_gem->swizzle_mode = get_tiling.swizzle_mode; + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); + +out: + pthread_mutex_unlock(&bufmgr_gem->lock); + return &bo_gem->bo; + +err_unref: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +static void +drm_intel_gem_bo_free(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_gem_close close; + int ret; + + DRMLISTDEL(&bo_gem->vma_list); + if (bo_gem->mem_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); + drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + if (bo_gem->wc_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + if (bo_gem->gtt_virtual) { + drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + + if (bo_gem->global_name) + HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); + HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); + + /* Close this object */ + memclear(close); + close.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); + if (ret != 0) { + DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", + bo_gem->gem_handle, bo_gem->name, strerror(errno)); + } + free(bo); +} + +static void +drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) +{ +#if HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); + + if (bo_gem->wc_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); + + if (bo_gem->gtt_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); +#endif +} + +/** Frees all cached buffers significantly older than @time. */ +static void +drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) +{ + int i; + + if (bufmgr_gem->time == time) + return; + + for (i = 0; i < bufmgr_gem->num_buckets; i++) { + struct drm_intel_gem_bo_bucket *bucket = + &bufmgr_gem->cache_bucket[i]; + + while (!DRMLISTEMPTY(&bucket->head)) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + if (time - bo_gem->free_time <= 1) + break; + + DRMLISTDEL(&bo_gem->head); + + drm_intel_gem_bo_free(&bo_gem->bo); + } + } + + bufmgr_gem->time = time; +} + +static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int limit; + + DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, + bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); + + if (bufmgr_gem->vma_max < 0) + return; + + /* We may need to evict a few entries in order to create new mmaps */ + limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; + if (limit < 0) + limit = 0; + + while (bufmgr_gem->vma_count > limit) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bufmgr_gem->vma_cache.next, + vma_list); + assert(bo_gem->map_count == 0); + DRMLISTDELINIT(&bo_gem->vma_list); + + if (bo_gem->mem_virtual) { + drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); + bo_gem->mem_virtual = NULL; + bufmgr_gem->vma_count--; + } + if (bo_gem->wc_virtual) { + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bo_gem->wc_virtual = NULL; + bufmgr_gem->vma_count--; + } + if (bo_gem->gtt_virtual) { + drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + bo_gem->gtt_virtual = NULL; + bufmgr_gem->vma_count--; + } + } +} + +static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem) +{ + bufmgr_gem->vma_open--; + DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); + if (bo_gem->mem_virtual) + bufmgr_gem->vma_count++; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count++; + if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count++; + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem) +{ + bufmgr_gem->vma_open++; + DRMLISTDEL(&bo_gem->vma_list); + if (bo_gem->mem_virtual) + bufmgr_gem->vma_count--; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count--; + if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count--; + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static void +drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_intel_gem_bo_bucket *bucket; + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < bo_gem->reloc_count; i++) { + if (bo_gem->reloc_target_info[i].bo != bo) { + drm_intel_gem_bo_unreference_locked_timed(bo_gem-> + reloc_target_info[i].bo, + time); + } + } + for (i = 0; i < bo_gem->softpin_target_count; i++) + drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], + time); + bo_gem->kflags = 0; + bo_gem->reloc_count = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->softpin_target_count = 0; + + DBG("bo_unreference final: %d (%s)\n", + bo_gem->gem_handle, bo_gem->name); + + /* release memory associated with this object */ + if (bo_gem->reloc_target_info) { + free(bo_gem->reloc_target_info); + bo_gem->reloc_target_info = NULL; + } + if (bo_gem->relocs) { + free(bo_gem->relocs); + bo_gem->relocs = NULL; + } + if (bo_gem->softpin_target) { + free(bo_gem->softpin_target); + bo_gem->softpin_target = NULL; + bo_gem->softpin_target_size = 0; + } + + /* Clear any left-over mappings */ + if (bo_gem->map_count) { + DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); + bo_gem->map_count = 0; + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + } + + bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && + drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, + I915_MADV_DONTNEED)) { + bo_gem->free_time = time; + + bo_gem->name = NULL; + bo_gem->validate_index = -1; + + DRMLISTADDTAIL(&bo_gem->head, &bucket->head); + } else { + drm_intel_gem_bo_free(bo); + } +} + +static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, + time_t time) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + assert(atomic_read(&bo_gem->refcount) > 0); + if (atomic_dec_and_test(&bo_gem->refcount)) + drm_intel_gem_bo_unreference_final(bo, time); +} + +static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + assert(atomic_read(&bo_gem->refcount) > 0); + + if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { + drm_intel_bufmgr_gem *bufmgr_gem = + (drm_intel_bufmgr_gem *) bo->bufmgr; + struct timespec time; + + clock_gettime(CLOCK_MONOTONIC, &time); + + pthread_mutex_lock(&bufmgr_gem->lock); + + if (atomic_dec_and_test(&bo_gem->refcount)) { + drm_intel_gem_bo_unreference_final(bo, time.tv_sec); + drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); + } + + pthread_mutex_unlock(&bufmgr_gem->lock); + } +} + +static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + bo->virtual = bo_gem->user_virtual; + return 0; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + if (!bo_gem->mem_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg); + if (ret != 0) { + ret = -errno; + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, + bo_gem->mem_virtual); + bo->virtual = bo_gem->mem_virtual; + + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + if (write_enable) + set_domain.write_domain = I915_GEM_DOMAIN_CPU; + else + set_domain.write_domain = 0; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) { + DBG("%s:%d: Error setting to CPU domain %d: %s\n", + __FILE__, __LINE__, bo_gem->gem_handle, + strerror(errno)); + } + + if (write_enable) + bo_gem->mapped_cpu_write = true; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); + pthread_mutex_unlock(&bufmgr_gem->lock); + + return 0; +} + +static int +map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret; + + if (bo_gem->is_userptr) + return -EINVAL; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + /* Get a mapping of the buffer if we haven't before. */ + if (bo_gem->gtt_virtual == NULL) { + struct drm_i915_gem_mmap_gtt mmap_arg; + + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + + /* Get the fake offset back... */ + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP_GTT, + &mmap_arg); + if (ret != 0) { + ret = -errno; + DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, bo_gem->name, + strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + return ret; + } + + /* and mmap it */ + bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); + if (bo_gem->gtt_virtual == MAP_FAILED) { + bo_gem->gtt_virtual = NULL; + ret = -errno; + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, bo_gem->name, + strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + return ret; + } + } + + bo->virtual = bo_gem->gtt_virtual; + + DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, + bo_gem->gtt_virtual); + + return 0; +} + +int +drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret) { + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + + /* Now move it to the GTT domain so that the GPU and CPU + * caches are flushed and the GPU isn't actively using the + * buffer. + * + * The pagefault handler does this domain change for us when + * it has unbound the BO from the GTT, but it's up to us to + * tell it when we're about to use things if we had done + * rendering and it still happens to be bound to the GTT. + */ + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) { + DBG("%s:%d: Error setting domain %d: %s\n", + __FILE__, __LINE__, bo_gem->gem_handle, + strerror(errno)); + } + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); + pthread_mutex_unlock(&bufmgr_gem->lock); + + return 0; +} + +/** + * Performs a mapping of the buffer object like the normal GTT + * mapping, but avoids waiting for the GPU to be done reading from or + * rendering to the buffer. + * + * This is used in the implementation of GL_ARB_map_buffer_range: The + * user asks to create a buffer, then does a mapping, fills some + * space, runs a drawing command, then asks to map it again without + * synchronizing because it guarantees that it won't write over the + * data that the GPU is busy using (or, more specifically, that if it + * does write over the data, it acknowledges that rendering is + * undefined). + */ + +int +drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; +#ifdef HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; +#endif + int ret; + + /* If the CPU cache isn't coherent with the GTT, then use a + * regular synchronized mapping. The problem is that we don't + * track where the buffer was last used on the CPU side in + * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so + * we would potentially corrupt the buffer even when the user + * does reasonable things. + */ + if (!bufmgr_gem->has_llc) + return drm_intel_gem_bo_map_gtt(bo); + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret == 0) { + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); + } + + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret = 0; + + if (bo == NULL) + return 0; + + if (bo_gem->is_userptr) + return 0; + + bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + + pthread_mutex_lock(&bufmgr_gem->lock); + + if (bo_gem->map_count <= 0) { + DBG("attempted to unmap an unmapped bo\n"); + pthread_mutex_unlock(&bufmgr_gem->lock); + /* Preserve the old behaviour of just treating this as a + * no-op rather than reporting the error. + */ + return 0; + } + + if (bo_gem->mapped_cpu_write) { + struct drm_i915_gem_sw_finish sw_finish; + + /* Cause a flush to happen if the buffer's pinned for + * scanout, so the results show up in a timely manner. + * Unlike GTT set domains, this only does work if the + * buffer should be scanout-related. + */ + memclear(sw_finish); + sw_finish.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SW_FINISH, + &sw_finish); + ret = ret == -1 ? -errno : 0; + + bo_gem->mapped_cpu_write = false; + } + + /* We need to unmap after every innovation as we cannot track + * an open vma for every bo as that will exhaust the system + * limits and cause later failures. + */ + if (--bo_gem->map_count == 0) { + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + bo->virtual = NULL; + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +int +drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) +{ + return drm_intel_gem_bo_unmap(bo); +} + +static int +drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_pwrite pwrite; + int ret; + + if (bo_gem->is_userptr) + return -EINVAL; + + memclear(pwrite); + pwrite.handle = bo_gem->gem_handle; + pwrite.offset = offset; + pwrite.size = size; + pwrite.data_ptr = (uint64_t) (uintptr_t) data; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_PWRITE, + &pwrite); + if (ret != 0) { + ret = -errno; + DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, + (int)size, strerror(errno)); + } + + return ret; +} + +static int +drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; + int ret; + + memclear(get_pipe_from_crtc_id); + get_pipe_from_crtc_id.crtc_id = crtc_id; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, + &get_pipe_from_crtc_id); + if (ret != 0) { + /* We return -1 here to signal that we don't + * know which pipe is associated with this crtc. + * This lets the caller know that this information + * isn't available; using the wrong pipe for + * vblank waiting can cause the chipset to lock up + */ + return -1; + } + + return get_pipe_from_crtc_id.pipe; +} + +static int +drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_pread pread; + int ret; + + if (bo_gem->is_userptr) + return -EINVAL; + + memclear(pread); + pread.handle = bo_gem->gem_handle; + pread.offset = offset; + pread.size = size; + pread.data_ptr = (uint64_t) (uintptr_t) data; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_PREAD, + &pread); + if (ret != 0) { + ret = -errno; + DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, + (int)size, strerror(errno)); + } + + return ret; +} + +/** Waits for all GPU rendering with the object to have completed. */ +static void +drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) +{ + drm_intel_gem_bo_start_gtt_access(bo, 1); +} + +/** + * Waits on a BO for the given amount of time. + * + * @bo: buffer object to wait for + * @timeout_ns: amount of time to wait in nanoseconds. + * If value is less than 0, an infinite wait will occur. + * + * Returns 0 if the wait was successful ie. the last batch referencing the + * object has completed within the allotted time. Otherwise some negative return + * value describes the error. Of particular interest is -ETIME when the wait has + * failed to yield the desired result. + * + * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows + * the operation to give up after a certain amount of time. Another subtle + * difference is the internal locking semantics are different (this variant does + * not hold the lock for the duration of the wait). This makes the wait subject + * to a larger userspace race window. + * + * The implementation shall wait until the object is no longer actively + * referenced within a batch buffer at the time of the call. The wait will + * not guarantee that the buffer is re-issued via another thread, or an flinked + * handle. Userspace must make sure this race does not occur if such precision + * is important. + * + * Note that some kernels have broken the inifite wait for negative values + * promise, upgrade to latest stable kernels if this is the case. + */ +int +drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_wait wait; + int ret; + + if (!bufmgr_gem->has_wait_timeout) { + DBG("%s:%d: Timed wait is not supported. Falling back to " + "infinite wait\n", __FILE__, __LINE__); + if (timeout_ns) { + drm_intel_gem_bo_wait_rendering(bo); + return 0; + } else { + return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; + } + } + + memclear(wait); + wait.bo_handle = bo_gem->gem_handle; + wait.timeout_ns = timeout_ns; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + if (ret == -1) + return -errno; + + return ret; +} + +/** + * Sets the object to the GTT read and possibly write domain, used by the X + * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). + * + * In combination with drm_intel_gem_bo_pin() and manual fence management, we + * can do tiled pixmaps this way. + */ +void +drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) { + DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + set_domain.read_domains, set_domain.write_domain, + strerror(errno)); + } +} + +static void +drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + struct drm_gem_close close_bo; + int i, ret; + + free(bufmgr_gem->exec2_objects); + free(bufmgr_gem->exec_objects); + free(bufmgr_gem->exec_bos); + + pthread_mutex_destroy(&bufmgr_gem->lock); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < bufmgr_gem->num_buckets; i++) { + struct drm_intel_gem_bo_bucket *bucket = + &bufmgr_gem->cache_bucket[i]; + drm_intel_bo_gem *bo_gem; + + while (!DRMLISTEMPTY(&bucket->head)) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + DRMLISTDEL(&bo_gem->head); + + drm_intel_gem_bo_free(&bo_gem->bo); + } + } + + /* Release userptr bo kept hanging around for optimisation. */ + if (bufmgr_gem->userptr_active.ptr) { + memclear(close_bo); + close_bo.handle = bufmgr_gem->userptr_active.handle; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); + free(bufmgr_gem->userptr_active.ptr); + if (ret) + fprintf(stderr, + "Failed to release test userptr object! (%d) " + "i915 kernel driver may not be sane!\n", errno); + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_bo. + */ +static int +do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain, + bool need_fence) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + bool fenced_command; + + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = true; + return -ENOMEM; + } + + /* We never use HW fences for rendering on 965+ */ + if (bufmgr_gem->gen >= 4) + need_fence = false; + + fenced_command = need_fence; + if (target_bo_gem->tiling_mode == I915_TILING_NONE) + need_fence = false; + + /* Create a new relocation list if needed */ + if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) + return -ENOMEM; + + /* Check overflow */ + assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); + + /* Check args */ + assert(offset <= bo->size - 4); + assert((write_domain & (write_domain - 1)) == 0); + + /* An object needing a fence is a tiled buffer, so it won't have + * relocs to other buffers. + */ + if (need_fence) { + assert(target_bo_gem->reloc_count == 0); + target_bo_gem->reloc_tree_fences = 1; + } + + /* Make sure that we're not adding a reloc to something whose size has + * already been accounted for. + */ + assert(!bo_gem->used_as_reloc_target); + if (target_bo_gem != bo_gem) { + target_bo_gem->used_as_reloc_target = true; + bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; + bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; + } + + bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; + if (target_bo != bo) + drm_intel_gem_bo_reference(target_bo); + if (fenced_command) + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = + DRM_INTEL_RELOC_FENCE; + else + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; + + bo_gem->relocs[bo_gem->reloc_count].offset = offset; + bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; + bo_gem->relocs[bo_gem->reloc_count].target_handle = + target_bo_gem->gem_handle; + bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; + bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; + bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; + bo_gem->reloc_count++; + + return 0; +} + +static void +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (enable) + bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + else + bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; +} + +static int +drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = true; + return -ENOMEM; + } + + if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) + return -EINVAL; + if (target_bo_gem == bo_gem) + return -EINVAL; + + if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { + int new_size = bo_gem->softpin_target_size * 2; + if (new_size == 0) + new_size = bufmgr_gem->max_relocs; + + bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * + sizeof(drm_intel_bo *)); + if (!bo_gem->softpin_target) + return -ENOMEM; + + bo_gem->softpin_target_size = new_size; + } + bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; + drm_intel_gem_bo_reference(target_bo); + bo_gem->softpin_target_count++; + + return 0; +} + +static int +drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; + + if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) + return drm_intel_gem_bo_add_softpin_target(bo, target_bo); + else + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, + !bufmgr_gem->fenced_relocs); +} + +static int +drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, true); +} + +int +drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + return bo_gem->reloc_count; +} + +/** + * Removes existing relocation entries in the BO after "start". + * + * This allows a user to avoid a two-step process for state setup with + * counting up all the buffer objects and doing a + * drm_intel_bufmgr_check_aperture_space() before emitting any of the + * relocations for the state setup. Instead, save the state of the + * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the + * state, and then check if it still fits in the aperture. + * + * Any further drm_intel_bufmgr_check_aperture_space() queries + * involving this buffer in the tree are undefined after this call. + * + * This also removes all softpinned targets being referenced by the BO. + */ +void +drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + struct timespec time; + + clock_gettime(CLOCK_MONOTONIC, &time); + + assert(bo_gem->reloc_count >= start); + + /* Unreference the cleared target buffers */ + pthread_mutex_lock(&bufmgr_gem->lock); + + for (i = start; i < bo_gem->reloc_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; + if (&target_bo_gem->bo != bo) { + bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, + time.tv_sec); + } + } + bo_gem->reloc_count = start; + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); + } + bo_gem->softpin_target_count = 0; + + pthread_mutex_unlock(&bufmgr_gem->lock); + +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + if (bo_gem->relocs == NULL) + return; + + for (i = 0; i < bo_gem->reloc_count; i++) { + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + + /* Continue walking the tree depth-first. */ + drm_intel_gem_bo_process_reloc(target_bo); + + /* Add the target to the validate list */ + drm_intel_add_validate_buffer(target_bo); + } +} + +static void +drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + int i; + + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) + return; + + for (i = 0; i < bo_gem->reloc_count; i++) { + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; + int need_fence; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + + /* Continue walking the tree depth-first. */ + drm_intel_gem_bo_process_reloc2(target_bo); + + need_fence = (bo_gem->reloc_target_info[i].flags & + DRM_INTEL_RELOC_FENCE); + + /* Add the target to the validate list */ + drm_intel_add_validate_buffer2(target_bo, need_fence); + } + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[i]; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + drm_intel_gem_bo_process_reloc2(target_bo); + drm_intel_add_validate_buffer2(target_bo, false); + } +} + + +static void +drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int i; + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + /* Update the buffer offset */ + if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec_objects[i].offset), + lower_32_bits(bufmgr_gem->exec_objects[i].offset)); + bo->offset64 = bufmgr_gem->exec_objects[i].offset; + bo->offset = bufmgr_gem->exec_objects[i].offset; + } + } +} + +static void +drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) +{ + int i; + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + + /* Update the buffer offset */ + if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { + /* If we're seeing softpinned object here it means that the kernel + * has relocated our object... Indicating a programming error + */ + assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec2_objects[i].offset), + lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); + bo->offset64 = bufmgr_gem->exec2_objects[i].offset; + bo->offset = bufmgr_gem->exec2_objects[i].offset; + } + } +} + +void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) +{ +} + +static int +drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t * cliprects, int num_cliprects, int DR4) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + struct drm_i915_gem_execbuffer execbuf; + int ret, i; + + if (to_bo_gem(bo)->has_error) + return -ENOMEM; + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Update indices and set up the validate list. */ + drm_intel_gem_bo_process_reloc(bo); + + /* Add the batch buffer to the validation list. There are no + * relocations pointing to it. + */ + drm_intel_add_validate_buffer(bo); + + memclear(execbuf); + execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; + execbuf.buffer_count = bufmgr_gem->exec_count; + execbuf.batch_start_offset = 0; + execbuf.batch_len = used; + execbuf.cliprects_ptr = (uintptr_t) cliprects; + execbuf.num_cliprects = num_cliprects; + execbuf.DR1 = 0; + execbuf.DR4 = DR4; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER, + &execbuf); + if (ret != 0) { + ret = -errno; + if (errno == ENOSPC) { + DBG("Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + (unsigned int)bufmgr_gem->gtt_size); + } + } + drm_intel_update_buffer_offsets(bufmgr_gem); + + if (bufmgr_gem->bufmgr.debug) + drm_intel_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); + + bo_gem->idle = false; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + bufmgr_gem->exec_bos[i] = NULL; + } + bufmgr_gem->exec_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int +do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + int in_fence, int *out_fence, + unsigned int flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + struct drm_i915_gem_execbuffer2 execbuf; + int ret = 0; + int i; + + if (to_bo_gem(bo)->has_error) + return -ENOMEM; + + switch (flags & 0x7) { + default: + return -EINVAL; + case I915_EXEC_BLT: + if (!bufmgr_gem->has_blt) + return -EINVAL; + break; + case I915_EXEC_BSD: + if (!bufmgr_gem->has_bsd) + return -EINVAL; + break; + case I915_EXEC_VEBOX: + if (!bufmgr_gem->has_vebox) + return -EINVAL; + break; + case I915_EXEC_RENDER: + case I915_EXEC_DEFAULT: + break; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Update indices and set up the validate list. */ + drm_intel_gem_bo_process_reloc2(bo); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + drm_intel_add_validate_buffer2(bo, 0); + + memclear(execbuf); + execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; + execbuf.buffer_count = bufmgr_gem->exec_count; + execbuf.batch_start_offset = 0; + execbuf.batch_len = used; + execbuf.cliprects_ptr = (uintptr_t)cliprects; + execbuf.num_cliprects = num_cliprects; + execbuf.DR1 = 0; + execbuf.DR4 = DR4; + execbuf.flags = flags; + if (ctx == NULL) + i915_execbuffer2_set_context_id(execbuf, 0); + else + i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); + execbuf.rsvd2 = 0; + if (in_fence != -1) { + execbuf.rsvd2 = in_fence; + execbuf.flags |= I915_EXEC_FENCE_IN; + } + if (out_fence != NULL) { + *out_fence = -1; + execbuf.flags |= I915_EXEC_FENCE_OUT; + } + + if (bufmgr_gem->no_exec) + goto skip_execution; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, + &execbuf); + if (ret != 0) { + ret = -errno; + if (ret == -ENOSPC) { + DBG("Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem->exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem->exec_count), + (unsigned int) bufmgr_gem->gtt_size); + } + } + drm_intel_update_buffer_offsets2(bufmgr_gem); + + if (ret == 0 && out_fence != NULL) + *out_fence = execbuf.rsvd2 >> 32; + +skip_execution: + if (bufmgr_gem->bufmgr.debug) + drm_intel_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); + + bo_gem->idle = false; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + bufmgr_gem->exec_bos[i] = NULL; + } + bufmgr_gem->exec_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int +drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + -1, NULL, I915_EXEC_RENDER); +} + +static int +drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + -1, NULL, flags); +} + +int +drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); +} + +int +drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); +} + +static int +drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_pin pin; + int ret; + + memclear(pin); + pin.handle = bo_gem->gem_handle; + pin.alignment = alignment; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_PIN, + &pin); + if (ret != 0) + return -errno; + + bo->offset64 = pin.offset; + bo->offset = pin.offset; + return 0; +} + +static int +drm_intel_gem_bo_unpin(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_unpin unpin; + int ret; + + memclear(unpin); + unpin.handle = bo_gem->gem_handle; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); + if (ret != 0) + return -errno; + + return 0; +} + +static int +drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, + uint32_t tiling_mode, + uint32_t stride) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + if (bo_gem->global_name == 0 && + tiling_mode == bo_gem->tiling_mode && + stride == bo_gem->stride) + return 0; + + memset(&set_tiling, 0, sizeof(set_tiling)); + do { + /* set_tiling is slightly broken and overwrites the + * input on the error path, so we have to open code + * rmIoctl. + */ + set_tiling.handle = bo_gem->gem_handle; + set_tiling.tiling_mode = tiling_mode; + set_tiling.stride = stride; + + ret = ioctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_TILING, + &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + if (ret == -1) + return -errno; + + bo_gem->tiling_mode = set_tiling.tiling_mode; + bo_gem->swizzle_mode = set_tiling.swizzle_mode; + bo_gem->stride = set_tiling.stride; + return 0; +} + +static int +drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret; + + /* Tiling with userptr surfaces is not supported + * on all hardware so refuse it for time being. + */ + if (bo_gem->is_userptr) + return -EINVAL; + + /* Linear buffers have no stride. By ensuring that we only ever use + * stride 0 with linear buffers, we simplify our code. + */ + if (*tiling_mode == I915_TILING_NONE) + stride = 0; + + ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); + if (ret == 0) + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + + *tiling_mode = bo_gem->tiling_mode; + return ret; +} + +static int +drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + *tiling_mode = bo_gem->tiling_mode; + *swizzle_mode = bo_gem->swizzle_mode; + return 0; +} + +static int +drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo->offset64 = offset; + bo->offset = offset; + bo_gem->kflags |= EXEC_OBJECT_PINNED; + + return 0; +} + +drm_intel_bo * +drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + int ret; + uint32_t handle; + drm_intel_bo_gem *bo_gem; + struct drm_i915_gem_get_tiling get_tiling; + + pthread_mutex_lock(&bufmgr_gem->lock); + ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); + if (ret) { + DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; + } + + /* + * See if the kernel has already returned this buffer to us. Just as + * for named buffers, we must not create two bo's pointing at the same + * kernel object + */ + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + /* Determine size of bo. The fd-to-handle ioctl really should + * return the size, but it doesn't. If we have kernel 3.12 or + * later, we can lseek on the prime fd to get the size. Older + * kernels will just fail, in which case we fall back to the + * provided (estimated or guess size). */ + ret = lseek(prime_fd, 0, SEEK_END); + if (ret != -1) + bo_gem->bo.size = ret; + else + bo_gem->bo.size = size; + + bo_gem->bo.handle = handle; + bo_gem->bo.bufmgr = bufmgr; + + bo_gem->gem_handle = handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + + bo_gem->name = "prime"; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + memclear(get_tiling); + get_tiling.handle = bo_gem->gem_handle; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling)) + goto err; + + bo_gem->tiling_mode = get_tiling.tiling_mode; + bo_gem->swizzle_mode = get_tiling.swizzle_mode; + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + +out: + pthread_mutex_unlock(&bufmgr_gem->lock); + return &bo_gem->bo; + +err: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +int +drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, + DRM_CLOEXEC, prime_fd) != 0) + return -errno; + + bo_gem->reusable = false; + + return 0; +} + +static int +drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (!bo_gem->global_name) { + struct drm_gem_flink flink; + + memclear(flink); + flink.handle = bo_gem->gem_handle; + if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) + return -errno; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->global_name) { + bo_gem->global_name = flink.name; + bo_gem->reusable = false; + + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), + bo_gem); + } + pthread_mutex_unlock(&bufmgr_gem->lock); + } + + *name = bo_gem->global_name; + return 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +void +drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + bufmgr_gem->bo_reuse = true; +} + +/** + * Disables implicit synchronisation before executing the bo + * + * This will cause rendering corruption unless you correctly manage explicit + * fences for all rendering involving this buffer - including use by others. + * Disabling the implicit serialisation is only required if that serialisation + * is too coarse (for example, you have split the buffer into many + * non-overlapping regions and are sharing the whole buffer between concurrent + * independent command streams). + * + * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, + * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, + * or subsequent execbufs involving the bo will generate EINVAL. + */ +void +drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags |= EXEC_OBJECT_ASYNC; +} + +/** + * Enables implicit synchronisation before executing the bo + * + * This is the default behaviour of the kernel, to wait upon prior writes + * completing on the object before rendering with it, or to wait for prior + * reads to complete before writing into the object. + * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling + * the kernel never to insert a stall before using the object. Then this + * function can be used to restore the implicit sync before subsequent + * rendering. + */ +void +drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; +} + +/** + * Query whether the kernel supports disabling of its implicit synchronisation + * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() + */ +int +drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + return bufmgr_gem->has_exec_async; +} + +/** + * Enable use of fenced reloc type. + * + * New code should enable this to avoid unnecessary fence register + * allocation. If this option is not enabled, all relocs will have fence + * register allocated. + */ +void +drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) + bufmgr_gem->fenced_relocs = true; +} + +/** + * Return the additional aperture space required by the tree of buffer objects + * rooted at bo. + */ +static int +drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + int total = 0; + + if (bo == NULL || bo_gem->included_in_check_aperture) + return 0; + + total += bo->size; + bo_gem->included_in_check_aperture = true; + + for (i = 0; i < bo_gem->reloc_count; i++) + total += + drm_intel_gem_bo_get_aperture_space(bo_gem-> + reloc_target_info[i].bo); + + return total; +} + +/** + * Count the number of buffers in this list that need a fence reg + * + * If the count is greater than the number of available regs, we'll have + * to ask the caller to resubmit a batch with fewer tiled buffers. + * + * This function over-counts if the same buffer is used multiple times. + */ +static unsigned int +drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; + + if (bo_gem == NULL) + continue; + + total += bo_gem->reloc_tree_fences; + } + return total; +} + +/** + * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready + * for the next drm_intel_bufmgr_check_aperture_space() call. + */ +static void +drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + if (bo == NULL || !bo_gem->included_in_check_aperture) + return; + + bo_gem->included_in_check_aperture = false; + + for (i = 0; i < bo_gem->reloc_count; i++) + drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> + reloc_target_info[i].bo); +} + +/** + * Return a conservative estimate for the amount of aperture required + * for a collection of buffers. This may double-count some buffers. + */ +static unsigned int +drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; + if (bo_gem != NULL) + total += bo_gem->reloc_tree_size; + } + return total; +} + +/** + * Return the amount of aperture needed for a collection of buffers. + * This avoids double counting any buffers, at the cost of looking + * at every buffer in the set. + */ +static unsigned int +drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); + /* For the first buffer object in the array, we get an + * accurate count back for its reloc_tree size (since nothing + * had been flagged as being counted yet). We can save that + * value out as a more conservative reloc_tree_size that + * avoids double-counting target buffers. Since the first + * buffer happens to usually be the batch buffer in our + * callers, this can pull us back from doing the tree + * walk on every new batch emit. + */ + if (i == 0) { + drm_intel_bo_gem *bo_gem = + (drm_intel_bo_gem *) bo_array[i]; + bo_gem->reloc_tree_size = total; + } + } + + for (i = 0; i < count; i++) + drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); + return total; +} + +/** + * Return -1 if the batchbuffer should be flushed before attempting to + * emit rendering referencing the buffers pointed to by bo_array. + * + * This is required because if we try to emit a batchbuffer with relocations + * to a tree of buffers that won't simultaneously fit in the aperture, + * the rendering will return an error at a point where the software is not + * prepared to recover from it. + * + * However, we also want to emit the batchbuffer significantly before we reach + * the limit, as a series of batchbuffers each of which references buffers + * covering almost all of the aperture means that at each emit we end up + * waiting to evict a buffer from the last rendering, and we get synchronous + * performance. By emitting smaller batchbuffers, we eat some CPU overhead to + * get better parallelism. + */ +static int +drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) +{ + drm_intel_bufmgr_gem *bufmgr_gem = + (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; + unsigned int total = 0; + unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; + int total_fences; + + /* Check for fence reg constraints if necessary */ + if (bufmgr_gem->available_fences) { + total_fences = drm_intel_gem_total_fences(bo_array, count); + if (total_fences > bufmgr_gem->available_fences) + return -ENOSPC; + } + + total = drm_intel_gem_estimate_batch_space(bo_array, count); + + if (total > threshold) + total = drm_intel_gem_compute_batch_space(bo_array, count); + + if (total > threshold) { + DBG("check_space: overflowed available aperture, " + "%dkb vs %dkb\n", + total / 1024, (int)bufmgr_gem->gtt_size / 1024); + return -ENOSPC; + } else { + DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, + (int)bufmgr_gem->gtt_size / 1024); + return 0; + } +} + +/* + * Disable buffer reuse for objects which are shared with the kernel + * as scanout buffers + */ +static int +drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->reusable = false; + return 0; +} + +static int +drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + return bo_gem->reusable; +} + +static int +_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + for (i = 0; i < bo_gem->reloc_count; i++) { + if (bo_gem->reloc_target_info[i].bo == target_bo) + return 1; + if (bo == bo_gem->reloc_target_info[i].bo) + continue; + if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, + target_bo)) + return 1; + } + + for (i = 0; i< bo_gem->softpin_target_count; i++) { + if (bo_gem->softpin_target[i] == target_bo) + return 1; + if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) + return 1; + } + + return 0; +} + +/** Return true if target_bo is referenced by bo's relocation tree. */ +static int +drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + + if (bo == NULL || target_bo == NULL) + return 0; + if (target_bo_gem->used_as_reloc_target) + return _drm_intel_gem_bo_references(bo, target_bo); + return 0; +} + +static void +add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) +{ + unsigned int i = bufmgr_gem->num_buckets; + + assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); + + DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); + bufmgr_gem->cache_bucket[i].size = size; + bufmgr_gem->num_buckets++; +} + +static void +init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) +{ + unsigned long size, cache_max_size = 64 * 1024 * 1024; + + /* OK, so power of two buckets was too wasteful of memory. + * Give 3 other sizes between each power of two, to hopefully + * cover things accurately enough. (The alternative is + * probably to just go for exact matching of sizes, and assume + * that for things like composited window resize the tiled + * width/height alignment and rounding of sizes to pages will + * get us useful cache hit rates anyway) + */ + add_bucket(bufmgr_gem, 4096); + add_bucket(bufmgr_gem, 4096 * 2); + add_bucket(bufmgr_gem, 4096 * 3); + + /* Initialize the linked lists for BO reuse cache. */ + for (size = 4 * 4096; size <= cache_max_size; size *= 2) { + add_bucket(bufmgr_gem, size); + + add_bucket(bufmgr_gem, size + size * 1 / 4); + add_bucket(bufmgr_gem, size + size * 2 / 4); + add_bucket(bufmgr_gem, size + size * 3 / 4); + } +} + +void +drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + bufmgr_gem->vma_max = limit; + + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static int +parse_devid_override(const char *devid_override) +{ + static const struct { + const char *name; + int pci_id; + } name_map[] = { + { "brw", PCI_CHIP_I965_GM }, + { "g4x", PCI_CHIP_GM45_GM }, + { "ilk", PCI_CHIP_ILD_G }, + { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, + { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, + { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, + { "byt", PCI_CHIP_VALLEYVIEW_3 }, + { "bdw", 0x1620 | BDW_ULX }, + { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, + { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(name_map); i++) { + if (!strcmp(name_map[i].name, devid_override)) + return name_map[i].pci_id; + } + + return strtod(devid_override, NULL); +} + +/** + * Get the PCI ID for the device. This can be overridden by setting the + * INTEL_DEVID_OVERRIDE environment variable to the desired ID. + */ +static int +get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) +{ + char *devid_override; + int devid = 0; + int ret; + drm_i915_getparam_t gp; + + if (geteuid() == getuid()) { + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + bufmgr_gem->no_exec = true; + return parse_devid_override(devid_override); + } + } + + memclear(gp); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &devid; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); + fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); + } + return devid; +} + +int +drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + return bufmgr_gem->pci_device; +} + +/** + * Sets the AUB filename. + * + * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() + * for it to have any effect. + */ +void +drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, + const char *filename) +{ +} + +/** + * Sets up AUB dumping. + * + * This is a trace file format that can be used with the simulator. + * Packets are emitted in a format somewhat like GPU command packets. + * You can set up a GTT and upload your objects into the referenced + * space, then send off batchbuffers and get BMPs out the other end. + */ +void +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) +{ + fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" + "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" + "then run (for example)\n\n" + "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" + "See the intel_aubdump man page for more details.\n"); +} + +drm_intel_context * +drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_gem_context_create create; + drm_intel_context *context = NULL; + int ret; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + memclear(create); + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret != 0) { + DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", + strerror(errno)); + free(context); + return NULL; + } + + context->ctx_id = create.ctx_id; + context->bufmgr = bufmgr; + + return context; +} + +int +drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) +{ + if (ctx == NULL) + return -EINVAL; + + *ctx_id = ctx->ctx_id; + + return 0; +} + +void +drm_intel_gem_context_destroy(drm_intel_context *ctx) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_context_destroy destroy; + int ret; + + if (ctx == NULL) + return; + + memclear(destroy); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + destroy.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, + &destroy); + if (ret != 0) + fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", + strerror(errno)); + + free(ctx); +} + +int +drm_intel_get_reset_stats(drm_intel_context *ctx, + uint32_t *reset_count, + uint32_t *active, + uint32_t *pending) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_reset_stats stats; + int ret; + + if (ctx == NULL) + return -EINVAL; + + memclear(stats); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + stats.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GET_RESET_STATS, + &stats); + if (ret == 0) { + if (reset_count != NULL) + *reset_count = stats.reset_count; + + if (active != NULL) + *active = stats.batch_active; + + if (pending != NULL) + *pending = stats.batch_pending; + } + + return ret; +} + +int +drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_reg_read reg_read; + int ret; + + memclear(reg_read); + reg_read.offset = offset; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); + + *result = reg_read.val; + return ret; +} + +int +drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) +{ + drm_i915_getparam_t gp; + int ret; + + memclear(gp); + gp.value = (int*)subslice_total; + gp.param = I915_PARAM_SUBSLICE_TOTAL; + ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return -errno; + + return 0; +} + +int +drm_intel_get_eu_total(int fd, unsigned int *eu_total) +{ + drm_i915_getparam_t gp; + int ret; + + memclear(gp); + gp.value = (int*)eu_total; + gp.param = I915_PARAM_EU_TOTAL; + ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return -errno; + + return 0; +} + +int +drm_intel_get_pooled_eu(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_HAS_POOLED_EU; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +int +drm_intel_get_min_eu_in_pool(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_MIN_EU_IN_POOL; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +/** + * Annotate the given bo for use in aub dumping. + * + * \param annotations is an array of drm_intel_aub_annotation objects + * describing the type of data in various sections of the bo. Each + * element of the array specifies the type and subtype of a section of + * the bo, and the past-the-end offset of that section. The elements + * of \c annotations must be sorted so that ending_offset is + * increasing. + * + * \param count is the number of elements in the \c annotations array. + * If \c count is zero, then \c annotations will not be dereferenced. + * + * Annotations are copied into a private data structure, so caller may + * re-use the memory pointed to by \c annotations after the call + * returns. + * + * Annotations are stored for the lifetime of the bo; to reset to the + * default state (no annotations), call this function with a \c count + * of zero. + */ +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count) +{ +} + +static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; +static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; + +static drm_intel_bufmgr_gem * +drm_intel_bufmgr_gem_find(int fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + + DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { + if (bufmgr_gem->fd == fd) { + atomic_inc(&bufmgr_gem->refcount); + return bufmgr_gem; + } + } + + return NULL; +} + +static void +drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { + pthread_mutex_lock(&bufmgr_list_mutex); + + if (atomic_dec_and_test(&bufmgr_gem->refcount)) { + DRMLISTDEL(&bufmgr_gem->managers); + drm_intel_bufmgr_gem_destroy(bufmgr); + } + + pthread_mutex_unlock(&bufmgr_list_mutex); + } +} + +void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->gtt_virtual) + return bo_gem->gtt_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->gtt_virtual == NULL) { + struct drm_i915_gem_mmap_gtt mmap_arg; + void *ptr; + + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + + /* Get the fake offset back... */ + ptr = MAP_FAILED; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP_GTT, + &mmap_arg) == 0) { + /* and mmap it */ + ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); + } + if (ptr == MAP_FAILED) { + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + ptr = NULL; + } + + bo_gem->gtt_virtual = ptr; + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->gtt_virtual; +} + +void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + return bo_gem->mem_virtual; + + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + return bo_gem->user_virtual; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->mem_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->mem_virtual; +} + +void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->wc_virtual) + return bo_gem->wc_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->wc_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + mmap_arg.flags = I915_MMAP_WC; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->wc_virtual; +} + +/** + * Initializes the GEM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + */ +drm_intel_bufmgr * +drm_intel_bufmgr_gem_init(int fd, int batch_size) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_get_aperture aperture; + drm_i915_getparam_t gp; + int ret, tmp; + bool exec2 = false; + + pthread_mutex_lock(&bufmgr_list_mutex); + + bufmgr_gem = drm_intel_bufmgr_gem_find(fd); + if (bufmgr_gem) + goto exit; + + bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); + if (bufmgr_gem == NULL) + goto exit; + + bufmgr_gem->fd = fd; + atomic_set(&bufmgr_gem->refcount, 1); + + if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { + free(bufmgr_gem); + bufmgr_gem = NULL; + goto exit; + } + + memclear(aperture); + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_APERTURE, + &aperture); + + if (ret == 0) + bufmgr_gem->gtt_size = aperture.aper_available_size; + else { + fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", + strerror(errno)); + bufmgr_gem->gtt_size = 128 * 1024 * 1024; + fprintf(stderr, "Assuming %dkB available aperture size.\n" + "May lead to reduced performance or incorrect " + "rendering.\n", + (int)bufmgr_gem->gtt_size / 1024); + } + + bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); + + if (IS_GEN2(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 2; + else if (IS_GEN3(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 3; + else if (IS_GEN4(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 4; + else if (IS_GEN5(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 5; + else if (IS_GEN6(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 6; + else if (IS_GEN7(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 7; + else if (IS_GEN8(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 8; + else if (IS_GEN9(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 9; + else { + free(bufmgr_gem); + bufmgr_gem = NULL; + goto exit; + } + + if (IS_GEN3(bufmgr_gem->pci_device) && + bufmgr_gem->gtt_size > 256*1024*1024) { + /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't + * be used for tiled blits. To simplify the accounting, just + * subtract the unmappable part (fixed to 256MB on all known + * gen3 devices) if the kernel advertises it. */ + bufmgr_gem->gtt_size -= 256*1024*1024; + } + + memclear(gp); + gp.value = &tmp; + + gp.param = I915_PARAM_HAS_EXECBUF2; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (!ret) + exec2 = true; + + gp.param = I915_PARAM_HAS_BSD; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_bsd = ret == 0; + + gp.param = I915_PARAM_HAS_BLT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_blt = ret == 0; + + gp.param = I915_PARAM_HAS_RELAXED_FENCING; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_relaxed_fencing = ret == 0; + + gp.param = I915_PARAM_HAS_EXEC_ASYNC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_exec_async = ret == 0; + + bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; + + gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_wait_timeout = ret == 0; + + gp.param = I915_PARAM_HAS_LLC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret != 0) { + /* Kernel does not supports HAS_LLC query, fallback to GPU + * generation detection and assume that we have LLC on GEN6/7 + */ + bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | + IS_GEN7(bufmgr_gem->pci_device)); + } else + bufmgr_gem->has_llc = *gp.value; + + gp.param = I915_PARAM_HAS_VEBOX; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); + + gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value > 0) + bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; + + if (bufmgr_gem->gen < 4) { + gp.param = I915_PARAM_NUM_FENCES_AVAIL; + gp.value = &bufmgr_gem->available_fences; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get fences failed: %d [%d]\n", ret, + errno); + fprintf(stderr, "param: %d, val: %d\n", gp.param, + *gp.value); + bufmgr_gem->available_fences = 0; + } else { + /* XXX The kernel reports the total number of fences, + * including any that may be pinned. + * + * We presume that there will be at least one pinned + * fence for the scanout buffer, but there may be more + * than one scanout and the user may be manually + * pinning buffers. Let's move to execbuffer2 and + * thereby forget the insanity of using fences... + */ + bufmgr_gem->available_fences -= 2; + if (bufmgr_gem->available_fences < 0) + bufmgr_gem->available_fences = 0; + } + } + + if (bufmgr_gem->gen >= 8) { + gp.param = I915_PARAM_HAS_ALIASING_PPGTT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value == 3) + bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; + } + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; + bufmgr_gem->bufmgr.bo_alloc_for_render = + drm_intel_gem_bo_alloc_for_render; + bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; + bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; + bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; + bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; + bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; + bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; + bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; + bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; + bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; + bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; + bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; + bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; + bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; + bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; + bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; + /* Use the new one if available */ + if (exec2) { + bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; + bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; + } else + bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; + bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; + bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; + bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; + bufmgr_gem->bufmgr.debug = 0; + bufmgr_gem->bufmgr.check_aperture_space = + drm_intel_gem_check_aperture_space; + bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; + bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; + bufmgr_gem->bufmgr.get_pipe_from_crtc_id = + drm_intel_gem_get_pipe_from_crtc_id; + bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; + + init_cache_buckets(bufmgr_gem); + + DRMINITLISTHEAD(&bufmgr_gem->vma_cache); + bufmgr_gem->vma_max = -1; /* unlimited by default */ + + DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); + +exit: + pthread_mutex_unlock(&bufmgr_list_mutex); + + return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; +} diff --git a/src/mesa/drivers/dri/i965/intel_bufmgr_priv.h b/src/mesa/drivers/dri/i965/intel_bufmgr_priv.h new file mode 100644 index 00000000000..7e360a0b23d --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_bufmgr_priv.h @@ -0,0 +1,325 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +/** + * @file intel_bufmgr_priv.h + * + * Private definitions of Intel-specific bufmgr functions and structures. + */ + +#ifndef INTEL_BUFMGR_PRIV_H +#define INTEL_BUFMGR_PRIV_H + +/** + * Context for a buffer manager instance. + * + * Contains public methods followed by private storage for the buffer manager. + */ +struct _drm_intel_bufmgr { + /** + * Allocate a buffer object. + * + * Buffer objects are not necessarily initially mapped into CPU virtual + * address space or graphics device aperture. They must be mapped + * using bo_map() or drm_intel_gem_bo_map_gtt() to be used by the CPU. + */ + drm_intel_bo *(*bo_alloc) (drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment); + + /** + * Allocate a buffer object, hinting that it will be used as a + * render target. + * + * This is otherwise the same as bo_alloc. + */ + drm_intel_bo *(*bo_alloc_for_render) (drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment); + + /** + * Allocate a buffer object from an existing user accessible + * address malloc'd with the provided size. + * Alignment is used when mapping to the gtt. + * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED + */ + drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr, + const char *name, void *addr, + uint32_t tiling_mode, uint32_t stride, + unsigned long size, + unsigned long flags); + + /** + * Allocate a tiled buffer object. + * + * Alignment for tiled objects is set automatically; the 'flags' + * argument provides a hint about how the object will be used initially. + * + * Valid tiling formats are: + * I915_TILING_NONE + * I915_TILING_X + * I915_TILING_Y + * + * Note the tiling format may be rejected; callers should check the + * 'tiling_mode' field on return, as well as the pitch value, which + * may have been rounded up to accommodate for tiling restrictions. + */ + drm_intel_bo *(*bo_alloc_tiled) (drm_intel_bufmgr *bufmgr, + const char *name, + int x, int y, int cpp, + uint32_t *tiling_mode, + unsigned long *pitch, + unsigned long flags); + + /** Takes a reference on a buffer object */ + void (*bo_reference) (drm_intel_bo *bo); + + /** + * Releases a reference on a buffer object, freeing the data if + * no references remain. + */ + void (*bo_unreference) (drm_intel_bo *bo); + + /** + * Maps the buffer into userspace. + * + * This function will block waiting for any existing execution on the + * buffer to complete, first. The resulting mapping is available at + * buf->virtual. + */ + int (*bo_map) (drm_intel_bo *bo, int write_enable); + + /** + * Reduces the refcount on the userspace mapping of the buffer + * object. + */ + int (*bo_unmap) (drm_intel_bo *bo); + + /** + * Write data into an object. + * + * This is an optional function, if missing, + * drm_intel_bo will map/memcpy/unmap. + */ + int (*bo_subdata) (drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data); + + /** + * Read data from an object + * + * This is an optional function, if missing, + * drm_intel_bo will map/memcpy/unmap. + */ + int (*bo_get_subdata) (drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data); + + /** + * Waits for rendering to an object by the GPU to have completed. + * + * This is not required for any access to the BO by bo_map, + * bo_subdata, etc. It is merely a way for the driver to implement + * glFinish. + */ + void (*bo_wait_rendering) (drm_intel_bo *bo); + + /** + * Tears down the buffer manager instance. + */ + void (*destroy) (drm_intel_bufmgr *bufmgr); + + /** + * Indicate if the buffer can be placed anywhere in the full ppgtt + * address range (2^48). + * + * Any resource used with flat/heapless (0x00000000-0xfffff000) + * General State Heap (GSH) or Intructions State Heap (ISH) must + * be in a 32-bit range. 48-bit range will only be used when explicitly + * requested. + * + * \param bo Buffer to set the use_48b_address_range flag. + * \param enable The flag value. + */ + void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable); + + /** + * Add relocation entry in reloc_buf, which will be updated with the + * target buffer's real offset on on command submission. + * + * Relocations remain in place for the lifetime of the buffer object. + * + * \param bo Buffer to write the relocation into. + * \param offset Byte offset within reloc_bo of the pointer to + * target_bo. + * \param target_bo Buffer whose offset should be written into the + * relocation entry. + * \param target_offset Constant value to be added to target_bo's + * offset in relocation entry. + * \param read_domains GEM read domains which the buffer will be + * read into by the command that this relocation + * is part of. + * \param write_domains GEM read domains which the buffer will be + * dirtied in by the command that this + * relocation is part of. + */ + int (*bo_emit_reloc) (drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain); + int (*bo_emit_reloc_fence)(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain); + + /** Executes the command buffer pointed to by bo. */ + int (*bo_exec) (drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4); + + /** Executes the command buffer pointed to by bo on the selected + * ring buffer + */ + int (*bo_mrb_exec) (drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4, unsigned flags); + + /** + * Pin a buffer to the aperture and fix the offset until unpinned + * + * \param buf Buffer to pin + * \param alignment Required alignment for aperture, in bytes + */ + int (*bo_pin) (drm_intel_bo *bo, uint32_t alignment); + + /** + * Unpin a buffer from the aperture, allowing it to be removed + * + * \param buf Buffer to unpin + */ + int (*bo_unpin) (drm_intel_bo *bo); + + /** + * Ask that the buffer be placed in tiling mode + * + * \param buf Buffer to set tiling mode for + * \param tiling_mode desired, and returned tiling mode + */ + int (*bo_set_tiling) (drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride); + + /** + * Get the current tiling (and resulting swizzling) mode for the bo. + * + * \param buf Buffer to get tiling mode for + * \param tiling_mode returned tiling mode + * \param swizzle_mode returned swizzling mode + */ + int (*bo_get_tiling) (drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode); + + /** + * Set the offset at which this buffer will be softpinned + * \param bo Buffer to set the softpin offset for + * \param offset Softpin offset + */ + int (*bo_set_softpin_offset) (drm_intel_bo *bo, uint64_t offset); + + /** + * Create a visible name for a buffer which can be used by other apps + * + * \param buf Buffer to create a name for + * \param name Returned name + */ + int (*bo_flink) (drm_intel_bo *bo, uint32_t * name); + + /** + * Returns 1 if mapping the buffer for write could cause the process + * to block, due to the object being active in the GPU. + */ + int (*bo_busy) (drm_intel_bo *bo); + + /** + * Specify the volatility of the buffer. + * \param bo Buffer to create a name for + * \param madv The purgeable status + * + * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be + * reclaimed under memory pressure. If you subsequently require the buffer, + * then you must pass I915_MADV_WILLNEED to mark the buffer as required. + * + * Returns 1 if the buffer was retained, or 0 if it was discarded whilst + * marked as I915_MADV_DONTNEED. + */ + int (*bo_madvise) (drm_intel_bo *bo, int madv); + + int (*check_aperture_space) (drm_intel_bo ** bo_array, int count); + + /** + * Disable buffer reuse for buffers which will be shared in some way, + * as with scanout buffers. When the buffer reference count goes to + * zero, it will be freed and not placed in the reuse list. + * + * \param bo Buffer to disable reuse for + */ + int (*bo_disable_reuse) (drm_intel_bo *bo); + + /** + * Query whether a buffer is reusable. + * + * \param bo Buffer to query + */ + int (*bo_is_reusable) (drm_intel_bo *bo); + + /** + * + * Return the pipe associated with a crtc_id so that vblank + * synchronization can use the correct data in the request. + * This is only supported for KMS and gem at this point, when + * unsupported, this function returns -1 and leaves the decision + * of what to do in that case to the caller + * + * \param bufmgr the associated buffer manager + * \param crtc_id the crtc identifier + */ + int (*get_pipe_from_crtc_id) (drm_intel_bufmgr *bufmgr, int crtc_id); + + /** Returns true if target_bo is in the relocation tree rooted at bo. */ + int (*bo_references) (drm_intel_bo *bo, drm_intel_bo *target_bo); + + /**< Enables verbose debugging printouts */ + int debug; +}; + +struct _drm_intel_context { + unsigned int ctx_id; + struct _drm_intel_bufmgr *bufmgr; +}; + +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define ROUND_UP_TO(x, y) (((x) + (y) - 1) / (y) * (y)) +#define ROUND_UP_TO_MB(x) ROUND_UP_TO((x), 1024*1024) + +#endif /* INTEL_BUFMGR_PRIV_H */ diff --git a/src/mesa/drivers/dri/i965/intel_chipset.h b/src/mesa/drivers/dri/i965/intel_chipset.h new file mode 100644 index 00000000000..41fc0da061c --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_chipset.h @@ -0,0 +1,469 @@ +/* + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_CHIPSET_H +#define _INTEL_CHIPSET_H + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) ((devid) == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) ((devid) == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* server */ + +#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* desktop */ +#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 +#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* mobile */ +#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 +#define PCI_CHIP_IVYBRIDGE_S 0x015a /* server */ +#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a /* server */ + +#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ +#define PCI_CHIP_HASWELL_GT2 0x0412 +#define PCI_CHIP_HASWELL_GT3 0x0422 +#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ +#define PCI_CHIP_HASWELL_M_GT2 0x0416 +#define PCI_CHIP_HASWELL_M_GT3 0x0426 +#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */ +#define PCI_CHIP_HASWELL_S_GT2 0x041A +#define PCI_CHIP_HASWELL_S_GT3 0x042A +#define PCI_CHIP_HASWELL_B_GT1 0x040B /* Reserved */ +#define PCI_CHIP_HASWELL_B_GT2 0x041B +#define PCI_CHIP_HASWELL_B_GT3 0x042B +#define PCI_CHIP_HASWELL_E_GT1 0x040E /* Reserved */ +#define PCI_CHIP_HASWELL_E_GT2 0x041E +#define PCI_CHIP_HASWELL_E_GT3 0x042E +#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */ +#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12 +#define PCI_CHIP_HASWELL_SDV_GT3 0x0C22 +#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */ +#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 +#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0C26 +#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */ +#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A +#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0C2A +#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0C0B /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0C1B +#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0C2B +#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0C0E /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0C1E +#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0C2E +#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ +#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 +#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 +#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ +#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 +#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 +#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ +#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A +#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A +#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B +#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B +#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E +#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E +#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ +#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 +#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 +#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A +#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A +#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B +#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B +#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E +#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E +#define BDW_SPARE 0x2 +#define BDW_ULT 0x6 +#define BDW_SERVER 0xa +#define BDW_IRIS 0xb +#define BDW_WORKSTATION 0xd +#define BDW_ULX 0xe + +#define PCI_CHIP_VALLEYVIEW_PO 0x0f30 /* VLV PO board */ +#define PCI_CHIP_VALLEYVIEW_1 0x0f31 +#define PCI_CHIP_VALLEYVIEW_2 0x0f32 +#define PCI_CHIP_VALLEYVIEW_3 0x0f33 + +#define PCI_CHIP_CHERRYVIEW_0 0x22b0 +#define PCI_CHIP_CHERRYVIEW_1 0x22b1 +#define PCI_CHIP_CHERRYVIEW_2 0x22b2 +#define PCI_CHIP_CHERRYVIEW_3 0x22b3 + +#define PCI_CHIP_SKYLAKE_DT_GT1 0x1902 +#define PCI_CHIP_SKYLAKE_ULT_GT1 0x1906 +#define PCI_CHIP_SKYLAKE_SRV_GT1 0x190A /* Reserved */ +#define PCI_CHIP_SKYLAKE_H_GT1 0x190B +#define PCI_CHIP_SKYLAKE_ULX_GT1 0x190E /* Reserved */ +#define PCI_CHIP_SKYLAKE_DT_GT2 0x1912 +#define PCI_CHIP_SKYLAKE_FUSED0_GT2 0x1913 /* Reserved */ +#define PCI_CHIP_SKYLAKE_FUSED1_GT2 0x1915 /* Reserved */ +#define PCI_CHIP_SKYLAKE_ULT_GT2 0x1916 +#define PCI_CHIP_SKYLAKE_FUSED2_GT2 0x1917 /* Reserved */ +#define PCI_CHIP_SKYLAKE_SRV_GT2 0x191A /* Reserved */ +#define PCI_CHIP_SKYLAKE_HALO_GT2 0x191B +#define PCI_CHIP_SKYLAKE_WKS_GT2 0x191D +#define PCI_CHIP_SKYLAKE_ULX_GT2 0x191E +#define PCI_CHIP_SKYLAKE_MOBILE_GT2 0x1921 /* Reserved */ +#define PCI_CHIP_SKYLAKE_ULT_GT3_0 0x1923 +#define PCI_CHIP_SKYLAKE_ULT_GT3_1 0x1926 +#define PCI_CHIP_SKYLAKE_ULT_GT3_2 0x1927 +#define PCI_CHIP_SKYLAKE_SRV_GT4 0x192A +#define PCI_CHIP_SKYLAKE_HALO_GT3 0x192B /* Reserved */ +#define PCI_CHIP_SKYLAKE_SRV_GT3 0x192D +#define PCI_CHIP_SKYLAKE_DT_GT4 0x1932 +#define PCI_CHIP_SKYLAKE_SRV_GT4X 0x193A +#define PCI_CHIP_SKYLAKE_H_GT4 0x193B +#define PCI_CHIP_SKYLAKE_WKS_GT4 0x193D + +#define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916 +#define PCI_CHIP_KABYLAKE_ULT_GT1_5 0x5913 +#define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906 +#define PCI_CHIP_KABYLAKE_ULT_GT3_0 0x5923 +#define PCI_CHIP_KABYLAKE_ULT_GT3_1 0x5926 +#define PCI_CHIP_KABYLAKE_ULT_GT3_2 0x5927 +#define PCI_CHIP_KABYLAKE_ULT_GT2F 0x5921 +#define PCI_CHIP_KABYLAKE_ULX_GT1_5 0x5915 +#define PCI_CHIP_KABYLAKE_ULX_GT1 0x590E +#define PCI_CHIP_KABYLAKE_ULX_GT2 0x591E +#define PCI_CHIP_KABYLAKE_DT_GT2 0x5912 +#define PCI_CHIP_KABYLAKE_DT_GT1_5 0x5917 +#define PCI_CHIP_KABYLAKE_DT_GT1 0x5902 +#define PCI_CHIP_KABYLAKE_HALO_GT2 0x591B +#define PCI_CHIP_KABYLAKE_HALO_GT4 0x593B +#define PCI_CHIP_KABYLAKE_HALO_GT1_0 0x5908 +#define PCI_CHIP_KABYLAKE_HALO_GT1_1 0x590B +#define PCI_CHIP_KABYLAKE_SRV_GT2 0x591A +#define PCI_CHIP_KABYLAKE_SRV_GT1 0x590A +#define PCI_CHIP_KABYLAKE_WKS_GT2 0x591D + +#define PCI_CHIP_BROXTON_0 0x0A84 +#define PCI_CHIP_BROXTON_1 0x1A84 +#define PCI_CHIP_BROXTON_2 0x5A84 +#define PCI_CHIP_BROXTON_3 0x1A85 +#define PCI_CHIP_BROXTON_4 0x5A85 + +#define PCI_CHIP_GLK 0x3184 +#define PCI_CHIP_GLK_2X6 0x3185 + +#define IS_MOBILE(devid) ((devid) == PCI_CHIP_I855_GM || \ + (devid) == PCI_CHIP_I915_GM || \ + (devid) == PCI_CHIP_I945_GM || \ + (devid) == PCI_CHIP_I945_GME || \ + (devid) == PCI_CHIP_I965_GM || \ + (devid) == PCI_CHIP_I965_GME || \ + (devid) == PCI_CHIP_GM45_GM || IS_IGD(devid) || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT2) + +#define IS_G45(devid) ((devid) == PCI_CHIP_IGD_E_G || \ + (devid) == PCI_CHIP_Q45_G || \ + (devid) == PCI_CHIP_G45_G || \ + (devid) == PCI_CHIP_G41_G) +#define IS_GM45(devid) ((devid) == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) ((devid) == PCI_CHIP_ILD_G) +#define IS_ILM(devid) ((devid) == PCI_CHIP_ILM_G) + +#define IS_915(devid) ((devid) == PCI_CHIP_I915_G || \ + (devid) == PCI_CHIP_E7221_G || \ + (devid) == PCI_CHIP_I915_GM) + +#define IS_945GM(devid) ((devid) == PCI_CHIP_I945_GM || \ + (devid) == PCI_CHIP_I945_GME) + +#define IS_945(devid) ((devid) == PCI_CHIP_I945_G || \ + (devid) == PCI_CHIP_I945_GM || \ + (devid) == PCI_CHIP_I945_GME || \ + IS_G33(devid)) + +#define IS_G33(devid) ((devid) == PCI_CHIP_G33_G || \ + (devid) == PCI_CHIP_Q33_G || \ + (devid) == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_GEN2(devid) ((devid) == PCI_CHIP_I830_M || \ + (devid) == PCI_CHIP_845_G || \ + (devid) == PCI_CHIP_I855_GM || \ + (devid) == PCI_CHIP_I865_G) + +#define IS_GEN3(devid) (IS_945(devid) || IS_915(devid)) + +#define IS_GEN4(devid) ((devid) == PCI_CHIP_I965_G || \ + (devid) == PCI_CHIP_I965_Q || \ + (devid) == PCI_CHIP_I965_G_1 || \ + (devid) == PCI_CHIP_I965_GM || \ + (devid) == PCI_CHIP_I965_GME || \ + (devid) == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_GEN6(devid) ((devid) == PCI_CHIP_SANDYBRIDGE_GT1 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_GT2 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + (devid) == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + (devid) == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ + IS_HASWELL(devid) || \ + IS_VALLEYVIEW(devid)) + +#define IS_IVYBRIDGE(devid) ((devid) == PCI_CHIP_IVYBRIDGE_GT1 || \ + (devid) == PCI_CHIP_IVYBRIDGE_GT2 || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT2 || \ + (devid) == PCI_CHIP_IVYBRIDGE_S || \ + (devid) == PCI_CHIP_IVYBRIDGE_S_GT2) + +#define IS_VALLEYVIEW(devid) ((devid) == PCI_CHIP_VALLEYVIEW_PO || \ + (devid) == PCI_CHIP_VALLEYVIEW_1 || \ + (devid) == PCI_CHIP_VALLEYVIEW_2 || \ + (devid) == PCI_CHIP_VALLEYVIEW_3) + +#define IS_HSW_GT1(devid) ((devid) == PCI_CHIP_HASWELL_GT1 || \ + (devid) == PCI_CHIP_HASWELL_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_E_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_E_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_E_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_E_GT1) +#define IS_HSW_GT2(devid) ((devid) == PCI_CHIP_HASWELL_GT2 || \ + (devid) == PCI_CHIP_HASWELL_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_E_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_E_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_E_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_E_GT2) +#define IS_HSW_GT3(devid) ((devid) == PCI_CHIP_HASWELL_GT3 || \ + (devid) == PCI_CHIP_HASWELL_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_E_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_E_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_E_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_E_GT3) + +#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ + IS_HSW_GT2(devid) || \ + IS_HSW_GT3(devid)) + +#define IS_BROADWELL(devid) (((devid & 0xff00) != 0x1600) ? 0 : \ + (((devid & 0x00f0) >> 4) > 3) ? 0 : \ + ((devid & 0x000f) == BDW_SPARE) ? 1 : \ + ((devid & 0x000f) == BDW_ULT) ? 1 : \ + ((devid & 0x000f) == BDW_IRIS) ? 1 : \ + ((devid & 0x000f) == BDW_SERVER) ? 1 : \ + ((devid & 0x000f) == BDW_WORKSTATION) ? 1 : \ + ((devid & 0x000f) == BDW_ULX) ? 1 : 0) + +#define IS_CHERRYVIEW(devid) ((devid) == PCI_CHIP_CHERRYVIEW_0 || \ + (devid) == PCI_CHIP_CHERRYVIEW_1 || \ + (devid) == PCI_CHIP_CHERRYVIEW_2 || \ + (devid) == PCI_CHIP_CHERRYVIEW_3) + +#define IS_GEN8(devid) (IS_BROADWELL(devid) || \ + IS_CHERRYVIEW(devid)) + +#define IS_SKL_GT1(devid) ((devid) == PCI_CHIP_SKYLAKE_DT_GT1 || \ + (devid) == PCI_CHIP_SKYLAKE_ULT_GT1 || \ + (devid) == PCI_CHIP_SKYLAKE_SRV_GT1 || \ + (devid) == PCI_CHIP_SKYLAKE_H_GT1 || \ + (devid) == PCI_CHIP_SKYLAKE_ULX_GT1) + +#define IS_SKL_GT2(devid) ((devid) == PCI_CHIP_SKYLAKE_DT_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_FUSED0_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_FUSED1_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_ULT_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_FUSED2_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_SRV_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_HALO_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_WKS_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_ULX_GT2 || \ + (devid) == PCI_CHIP_SKYLAKE_MOBILE_GT2) + +#define IS_SKL_GT3(devid) ((devid) == PCI_CHIP_SKYLAKE_ULT_GT3_0 || \ + (devid) == PCI_CHIP_SKYLAKE_ULT_GT3_1 || \ + (devid) == PCI_CHIP_SKYLAKE_ULT_GT3_2 || \ + (devid) == PCI_CHIP_SKYLAKE_HALO_GT3 || \ + (devid) == PCI_CHIP_SKYLAKE_SRV_GT3) + +#define IS_SKL_GT4(devid) ((devid) == PCI_CHIP_SKYLAKE_SRV_GT4 || \ + (devid) == PCI_CHIP_SKYLAKE_DT_GT4 || \ + (devid) == PCI_CHIP_SKYLAKE_SRV_GT4X || \ + (devid) == PCI_CHIP_SKYLAKE_H_GT4 || \ + (devid) == PCI_CHIP_SKYLAKE_WKS_GT4) + +#define IS_KBL_GT1(devid) ((devid) == PCI_CHIP_KABYLAKE_ULT_GT1_5 || \ + (devid) == PCI_CHIP_KABYLAKE_ULX_GT1_5 || \ + (devid) == PCI_CHIP_KABYLAKE_DT_GT1_5 || \ + (devid) == PCI_CHIP_KABYLAKE_ULT_GT1 || \ + (devid) == PCI_CHIP_KABYLAKE_ULX_GT1 || \ + (devid) == PCI_CHIP_KABYLAKE_DT_GT1 || \ + (devid) == PCI_CHIP_KABYLAKE_HALO_GT1_0 || \ + (devid) == PCI_CHIP_KABYLAKE_HALO_GT1_1 || \ + (devid) == PCI_CHIP_KABYLAKE_SRV_GT1) + +#define IS_KBL_GT2(devid) ((devid) == PCI_CHIP_KABYLAKE_ULT_GT2 || \ + (devid) == PCI_CHIP_KABYLAKE_ULT_GT2F || \ + (devid) == PCI_CHIP_KABYLAKE_ULX_GT2 || \ + (devid) == PCI_CHIP_KABYLAKE_DT_GT2 || \ + (devid) == PCI_CHIP_KABYLAKE_HALO_GT2 || \ + (devid) == PCI_CHIP_KABYLAKE_SRV_GT2 || \ + (devid) == PCI_CHIP_KABYLAKE_WKS_GT2) + +#define IS_KBL_GT3(devid) ((devid) == PCI_CHIP_KABYLAKE_ULT_GT3_0 || \ + (devid) == PCI_CHIP_KABYLAKE_ULT_GT3_1 || \ + (devid) == PCI_CHIP_KABYLAKE_ULT_GT3_2) + +#define IS_KBL_GT4(devid) ((devid) == PCI_CHIP_KABYLAKE_HALO_GT4) + +#define IS_KABYLAKE(devid) (IS_KBL_GT1(devid) || \ + IS_KBL_GT2(devid) || \ + IS_KBL_GT3(devid) || \ + IS_KBL_GT4(devid)) + +#define IS_SKYLAKE(devid) (IS_SKL_GT1(devid) || \ + IS_SKL_GT2(devid) || \ + IS_SKL_GT3(devid) || \ + IS_SKL_GT4(devid)) + +#define IS_BROXTON(devid) ((devid) == PCI_CHIP_BROXTON_0 || \ + (devid) == PCI_CHIP_BROXTON_1 || \ + (devid) == PCI_CHIP_BROXTON_2 || \ + (devid) == PCI_CHIP_BROXTON_3 || \ + (devid) == PCI_CHIP_BROXTON_4) + +#define IS_GEMINILAKE(devid) ((devid) == PCI_CHIP_GLK || \ + (devid) == PCI_CHIP_GLK_2X6) + +#define IS_GEN9(devid) (IS_SKYLAKE(devid) || \ + IS_BROXTON(devid) || \ + IS_KABYLAKE(devid) || \ + IS_GEMINILAKE(devid)) + +#define IS_9XX(dev) (IS_GEN3(dev) || \ + IS_GEN4(dev) || \ + IS_GEN5(dev) || \ + IS_GEN6(dev) || \ + IS_GEN7(dev) || \ + IS_GEN8(dev) || \ + IS_GEN9(dev)) + + +#endif /* _INTEL_CHIPSET_H */ diff --git a/src/mesa/drivers/dri/i965/libdrm_lists.h b/src/mesa/drivers/dri/i965/libdrm_lists.h new file mode 100644 index 00000000000..8926d8d1a67 --- /dev/null +++ b/src/mesa/drivers/dri/i965/libdrm_lists.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +/* + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + */ + +#include <stddef.h> + +typedef struct _drmMMListHead +{ + struct _drmMMListHead *prev; + struct _drmMMListHead *next; +} drmMMListHead; + +#define DRMINITLISTHEAD(__item) \ + do{ \ + (__item)->prev = (__item); \ + (__item)->next = (__item); \ + } while (0) + +#define DRMLISTADD(__item, __list) \ + do { \ + (__item)->prev = (__list); \ + (__item)->next = (__list)->next; \ + (__list)->next->prev = (__item); \ + (__list)->next = (__item); \ + } while (0) + +#define DRMLISTADDTAIL(__item, __list) \ + do { \ + (__item)->next = (__list); \ + (__item)->prev = (__list)->prev; \ + (__list)->prev->next = (__item); \ + (__list)->prev = (__item); \ + } while(0) + +#define DRMLISTDEL(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + } while(0) + +#define DRMLISTDELINIT(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + (__item)->next = (__item); \ + (__item)->prev = (__item); \ + } while(0) + +#define DRMLISTENTRY(__type, __item, __field) \ + ((__type *)(((char *) (__item)) - offsetof(__type, __field))) + +#define DRMLISTEMPTY(__item) ((__item)->next == (__item)) + +#define DRMLISTSINGLE(__list) \ + (!DRMLISTEMPTY(__list) && ((__list)->next == (__list)->prev)) + +#define DRMLISTFOREACH(__item, __list) \ + for ((__item) = (__list)->next; \ + (__item) != (__list); (__item) = (__item)->next) + +#define DRMLISTFOREACHSAFE(__item, __temp, __list) \ + for ((__item) = (__list)->next, (__temp) = (__item)->next; \ + (__item) != (__list); \ + (__item) = (__temp), (__temp) = (__item)->next) + +#define DRMLISTFOREACHSAFEREVERSE(__item, __temp, __list) \ + for ((__item) = (__list)->prev, (__temp) = (__item)->prev; \ + (__item) != (__list); \ + (__item) = (__temp), (__temp) = (__item)->prev) + +#define DRMLISTFOREACHENTRY(__item, __list, __head) \ + for ((__item) = DRMLISTENTRY(typeof(*__item), (__list)->next, __head); \ + &(__item)->__head != (__list); \ + (__item) = DRMLISTENTRY(typeof(*__item), \ + (__item)->__head.next, __head)) + +#define DRMLISTFOREACHENTRYSAFE(__item, __temp, __list, __head) \ + for ((__item) = DRMLISTENTRY(typeof(*__item), (__list)->next, __head), \ + (__temp) = DRMLISTENTRY(typeof(*__item), \ + (__item)->__head.next, __head); \ + &(__item)->__head != (__list); \ + (__item) = (__temp), \ + (__temp) = DRMLISTENTRY(typeof(*__item), \ + (__temp)->__head.next, __head)) + +#define DRMLISTJOIN(__list, __join) if (!DRMLISTEMPTY(__list)) { \ + (__list)->next->prev = (__join); \ + (__list)->prev->next = (__join)->next; \ + (__join)->next->prev = (__list)->prev; \ + (__join)->next = (__list)->next; \ +} diff --git a/src/mesa/drivers/dri/i965/libdrm_macros.h b/src/mesa/drivers/dri/i965/libdrm_macros.h new file mode 100644 index 00000000000..639d09047ef --- /dev/null +++ b/src/mesa/drivers/dri/i965/libdrm_macros.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2014 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIBDRM_LIBDRM_H +#define LIBDRM_LIBDRM_H + +#if defined(HAVE_VISIBILITY) +# define drm_private __attribute__((visibility("hidden"))) +#else +# define drm_private +#endif + + +/** + * Static (compile-time) assertion. + * Basically, use COND to dimension an array. If COND is false/zero the + * array size will be -1 and we'll get a compilation error. + */ +#define STATIC_ASSERT(COND) \ + do { \ + (void) sizeof(char [1 - 2*!(COND)]); \ + } while (0) + + +#include <sys/mman.h> + +#if defined(ANDROID) && !defined(__LP64__) +#include <errno.h> /* for EINVAL */ + +extern void *__mmap2(void *, size_t, int, int, int, size_t); + +static inline void *drm_mmap(void *addr, size_t length, int prot, int flags, + int fd, loff_t offset) +{ + /* offset must be aligned to 4096 (not necessarily the page size) */ + if (offset & 4095) { + errno = EINVAL; + return MAP_FAILED; + } + + return __mmap2(addr, length, prot, flags, fd, (size_t) (offset >> 12)); +} + +# define drm_munmap(addr, length) \ + munmap(addr, length) + + +#else + +/* assume large file support exists */ +# define drm_mmap(addr, length, prot, flags, fd, offset) \ + mmap(addr, length, prot, flags, fd, offset) + + +static inline int drm_munmap(void *addr, size_t length) +{ + /* Copied from configure code generated by AC_SYS_LARGEFILE */ +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \ + (((off_t) 1 << 31) << 31)) + STATIC_ASSERT(LARGE_OFF_T % 2147483629 == 721 && + LARGE_OFF_T % 2147483647 == 1); +#undef LARGE_OFF_T + + return munmap(addr, length); +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i965/uthash.h b/src/mesa/drivers/dri/i965/uthash.h new file mode 100644 index 00000000000..45d1f9fc12a --- /dev/null +++ b/src/mesa/drivers/dri/i965/uthash.h @@ -0,0 +1,1074 @@ +/* +Copyright (c) 2003-2016, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.0.1 + +#include <string.h> /* memcmp,strlen */ +#include <stddef.h> /* ptrdiff_t */ +#include <stdlib.h> /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#define DECLTYPE(x) +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while (0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while (0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ +#if defined(_WIN32) +#if defined(_MSC_VER) && _MSC_VER >= 1600 +#include <stdint.h> +#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) +#include <stdint.h> +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif +#elif defined(__GNUC__) && !defined(__VXWORKS__) +#include <stdint.h> +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif +#ifndef uthash_memcmp +#define uthash_memcmp(a,b,n) memcmp(a,b,n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) + +#define HASH_VALUE(keyptr,keylen,hashv) \ +do { \ + HASH_FCN(keyptr, keylen, hashv); \ +} while (0) + +#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ +do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ + } \ + } \ +} while (0) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ +} while (0) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ +} while (0) + +#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ +} while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ +do { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ +} while (0) + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + struct UT_hash_handle *_hs_iter = &(head)->hh; \ + (add)->hh.tbl = (head)->hh.tbl; \ + do { \ + if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \ + break; \ + } while ((_hs_iter = _hs_iter->next)); \ + if (_hs_iter) { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = _hs_iter->prev)) { \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add); \ + } else { \ + (head) = (add); \ + } \ + _hs_iter->prev = (add); \ + } else { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ +do { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv,num_bkts,bkt) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1U)); \ +} while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + unsigned _hd_bkt; \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ + HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count; \ + char *_prev; \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %u, actual %u\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include <unistd.h> to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key,keylen,hashv) \ +do { \ + unsigned _hb_keylen=(unsigned)keylen; \ + const unsigned char *_hb_key=(const unsigned char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,hashv) \ +do { \ + unsigned _sx_i; \ + const unsigned char *_hs_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ +} while (0) +/* FNV-1a variation */ +#define HASH_FNV(key,keylen,hashv) \ +do { \ + unsigned _fn_i; \ + const unsigned char *_hf_key=(const unsigned char*)(key); \ + hashv = 2166136261U; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ +} while (0) + +#define HASH_OAT(key,keylen,hashv) \ +do { \ + unsigned _ho_i; \ + const unsigned char *_ho_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ +} while (0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,hashv) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned const char *_hj_key=(unsigned const char*)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ + case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ +} while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,hashv) \ +do { \ + unsigned const char *_sfh_key=(unsigned const char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0U; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ +} while (0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6bu; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35u; \ + _h ^= _h >> 16; \ +} while (0) + +#define HASH_MUR(key,keylen,hashv) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (int)(keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353u; \ + uint32_t _mur_c1 = 0xcc9e2d51u; \ + uint32_t _mur_c2 = 0x1b873593u; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ + } \ + _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ + _mur_k1=0; \ + switch((keylen) & 3U) { \ + case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ + case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ + case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (uint32_t)(keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ +} while (0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ +do { \ + if ((head).hh_head != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } else { \ + (out) = NULL; \ + } \ + while ((out) != NULL) { \ + if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ + if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } else { \ + (out) = NULL; \ + } \ + } \ +} while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ + && ((addhh)->tbl->noexpand != 1U)) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ + (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ + _he_thh; } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2U; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1U) : 0U; \ + if (tbl->ineff_expands > 1U) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while (0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) { break; } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail != NULL ) { \ + _hs_tail->next = ((_hs_e != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) { \ + _hs_e->prev = ((_hs_tail != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL){ \ + _hs_tail->next = NULL; \ + } \ + if ( _hs_nmerges <= 1U ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src != NULL) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ + if (dst == NULL) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head != NULL) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while (0) + +#define HASH_OVERHEAD(hh,head) \ + ((head != NULL) ? ( \ + (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + \ + (HASH_BLOOM_BYTELEN))) : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/src/mesa/drivers/dri/i965/xf86atomic.h b/src/mesa/drivers/dri/i965/xf86atomic.h new file mode 100644 index 00000000000..922b37da625 --- /dev/null +++ b/src/mesa/drivers/dri/i965/xf86atomic.h @@ -0,0 +1,117 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Chris Wilson <[email protected]> + * + */ + +/** + * @file xf86atomics.h + * + * Private definitions for atomic operations + */ + +#ifndef LIBDRM_ATOMICS_H +#define LIBDRM_ATOMICS_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if HAVE_LIBDRM_ATOMIC_PRIMITIVES + +#define HAS_ATOMIC_OPS 1 + +typedef struct { + int atomic; +} atomic_t; + +# define atomic_read(x) ((x)->atomic) +# define atomic_set(x, val) ((x)->atomic = (val)) +# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1)) +# define atomic_inc_return(x) (__sync_add_and_fetch (&(x)->atomic, 1)) +# define atomic_dec_and_test(x) (__sync_add_and_fetch (&(x)->atomic, -1) == 0) +# define atomic_add(x, v) ((void) __sync_add_and_fetch(&(x)->atomic, (v))) +# define atomic_dec(x, v) ((void) __sync_sub_and_fetch(&(x)->atomic, (v))) +# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv) + +#endif + +#if HAVE_LIB_ATOMIC_OPS +#include <atomic_ops.h> + +#define HAS_ATOMIC_OPS 1 + +typedef struct { + AO_t atomic; +} atomic_t; + +# define atomic_read(x) AO_load_full(&(x)->atomic) +# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val)) +# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic)) +# define atomic_inc_return(x) (AO_fetch_and_add1_full(&(x)->atomic) + 1) +# define atomic_add(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, (v))) +# define atomic_dec(x, v) ((void) AO_fetch_and_add_full(&(x)->atomic, -(v))) +# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1) +# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv) + +#endif + +#if (defined(__sun) || defined(__NetBSD__)) && !defined(HAS_ATOMIC_OPS) /* Solaris & OpenSolaris & NetBSD */ + +#include <sys/atomic.h> +#define HAS_ATOMIC_OPS 1 + +#if defined(__NetBSD__) +#define LIBDRM_ATOMIC_TYPE int +#else +#define LIBDRM_ATOMIC_TYPE uint_t +#endif + +typedef struct { LIBDRM_ATOMIC_TYPE atomic; } atomic_t; + +# define atomic_read(x) (int) ((x)->atomic) +# define atomic_set(x, val) ((x)->atomic = (LIBDRM_ATOMIC_TYPE)(val)) +# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic)) +# define atomic_inc_return(x) (atomic_inc_uint_nv(&(x)->atomic)) +# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 0) +# define atomic_add(x, v) (atomic_add_int(&(x)->atomic, (v))) +# define atomic_dec(x, v) (atomic_add_int(&(x)->atomic, -(v))) +# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv) + +#endif + +#if ! HAS_ATOMIC_OPS +#error libdrm requires atomic operations, please define them for your CPU/compiler. +#endif + +static inline int atomic_add_unless(atomic_t *v, int add, int unless) +{ + int c, old; + c = atomic_read(v); + while (c != unless && (old = atomic_cmpxchg(v, c, c + add)) != c) + c = old; + return c == unless; +} + +#endif |