summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/Makefile.am18
-rw-r--r--src/gallium/drivers/r600/Makefile.sources6
-rw-r--r--src/gallium/drivers/r600/compute_memory_pool.c397
-rw-r--r--src/gallium/drivers/r600/compute_memory_pool.h98
-rw-r--r--src/gallium/drivers/r600/compute_resource.def38
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c814
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.h69
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.c830
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.h119
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c2
-rw-r--r--src/gallium/drivers/r600/evergreend.h74
-rw-r--r--src/gallium/drivers/r600/llvm_wrapper.cpp19
-rw-r--r--src/gallium/drivers/r600/llvm_wrapper.h16
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h4
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c94
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h23
-rw-r--r--src/gallium/drivers/r600/r600_resource.c18
-rw-r--r--src/gallium/drivers/r600/r600_resource.h8
-rw-r--r--src/gallium/drivers/r600/r600_shader.c31
-rw-r--r--src/gallium/drivers/r600/r600_texture.c8
20 files changed, 2674 insertions, 12 deletions
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index 77d2674d262..31d885a3416 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -18,7 +18,7 @@ AM_CFLAGS = \
libr600_a_SOURCES = \
$(C_SOURCES)
-if USE_R600_LLVM_COMPILER
+if NEED_RADEON_GALLIUM
# This is a hack until we can move the backend into the LLVM project.
# We need to use mklib, because it splits up libradeon.a into object files
@@ -26,18 +26,28 @@ if USE_R600_LLVM_COMPILER
libr600_a_AR = $(top_srcdir)/bin/mklib -o r600 -static
libr600_a_SOURCES += \
- $(LLVM_C_SOURCES)
+ $(LLVM_C_SOURCES) \
+ $(LLVM_CXX_SOURCES)
libr600_a_LIBADD = \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.a
AM_CFLAGS += \
$(LLVM_CFLAGS) \
- -I$(top_srcdir)/src/gallium/drivers/radeon/ \
- -DR600_USE_LLVM
+ -I$(top_srcdir)/src/gallium/drivers/radeon/
AM_CXXFLAGS= \
$(LLVM_CXXFLAGS)
else
libr600_a_AR = $(AR) $(ARFLAGS)
endif
+
+if USE_R600_LLVM_COMPILER
+AM_CFLAGS += \
+ -DR600_USE_LLVM
+endif
+
+if HAVE_GALLIUM_COMPUTE
+AM_CFLAGS += \
+ -DHAVE_OPENCL
+endif
diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
index b7b0d50b637..50546e6fb2f 100644
--- a/src/gallium/drivers/r600/Makefile.sources
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -14,6 +14,10 @@ C_SOURCES = \
evergreen_state.c \
eg_asm.c \
r600_translate.c \
- r600_state_common.c
+ r600_state_common.c \
+ evergreen_compute.c \
+ evergreen_compute_internal.c \
+ compute_memory_pool.c
LLVM_C_SOURCES = r600_llvm.c
+LLVM_CXX_SOURCES = llvm_wrapper.cpp
diff --git a/src/gallium/drivers/r600/compute_memory_pool.c b/src/gallium/drivers/r600/compute_memory_pool.c
new file mode 100644
index 00000000000..01bf0c33dfd
--- /dev/null
+++ b/src/gallium/drivers/r600/compute_memory_pool.c
@@ -0,0 +1,397 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_blitter.h"
+#include "util/u_double_list.h"
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+#include "util/u_pack_color.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
+#include "r600.h"
+#include "r600_resource.h"
+#include "r600_shader.h"
+#include "r600_pipe.h"
+#include "r600_formats.h"
+#include "compute_memory_pool.h"
+#include "evergreen_compute_internal.h"
+
+/**
+ * Creates a new pool
+ */
+struct compute_memory_pool* compute_memory_pool_new(
+ int64_t initial_size_in_dw,
+ struct r600_screen * rscreen)
+{
+ struct compute_memory_pool* pool = (struct compute_memory_pool*)
+ CALLOC(sizeof(struct compute_memory_pool), 1);
+
+ pool->next_id = 1;
+ pool->size_in_dw = initial_size_in_dw;
+ pool->screen = rscreen;
+ pool->bo = (struct r600_resource*)r600_compute_buffer_alloc_vram(
+ pool->screen, pool->size_in_dw*4);
+ pool->shadow = (uint32_t*)CALLOC(4, pool->size_in_dw);
+
+ return pool;
+}
+
+/**
+ * Frees all stuff in the pool and the pool struct itself too
+ */
+void compute_memory_pool_delete(struct compute_memory_pool* pool)
+{
+ free(pool->shadow);
+ pool->screen->screen.resource_destroy((struct pipe_screen *)
+ pool->screen, (struct pipe_resource *)pool->bo);
+ free(pool);
+}
+
+/**
+ * Searches for an empty space in the pool, return with the pointer to the
+ * allocatable space in the pool, returns -1 on failure.
+ */
+int64_t compute_memory_prealloc_chunk(
+ struct compute_memory_pool* pool,
+ int64_t size_in_dw)
+{
+ assert(size_in_dw <= pool->size_in_dw);
+
+ struct compute_memory_item *item;
+
+ int last_end = 0;
+
+ for (item = pool->item_list; item; item = item->next) {
+ if (item->start_in_dw > -1) {
+ if (item->start_in_dw-last_end > size_in_dw) {
+ return last_end;
+ }
+
+ last_end = item->start_in_dw + item->size_in_dw;
+ last_end += (1024 - last_end % 1024);
+ }
+ }
+
+ if (pool->size_in_dw - last_end < size_in_dw) {
+ return -1;
+ }
+
+ return last_end;
+}
+
+/**
+ * Search for the chunk where we can link our new chunk after it.
+ */
+struct compute_memory_item* compute_memory_postalloc_chunk(
+ struct compute_memory_pool* pool,
+ int64_t start_in_dw)
+{
+ struct compute_memory_item* item;
+
+ for (item = pool->item_list; item; item = item->next) {
+ if (item->next) {
+ if (item->start_in_dw < start_in_dw
+ && item->next->start_in_dw > start_in_dw) {
+ return item;
+ }
+ }
+ else {
+ /* end of chain */
+ assert(item->start_in_dw < start_in_dw);
+ return item;
+ }
+ }
+
+ assert(0 && "unreachable");
+ return NULL;
+}
+
+/**
+ * Reallocates pool, conserves data
+ */
+void compute_memory_grow_pool(struct compute_memory_pool* pool,
+ struct pipe_context * pipe, int new_size_in_dw)
+{
+ assert(new_size_in_dw >= pool->size_in_dw);
+
+ new_size_in_dw += 1024 - (new_size_in_dw % 1024);
+
+ compute_memory_shadow(pool, pipe, 1);
+ pool->shadow = (uint32_t*)realloc(pool->shadow, new_size_in_dw*4);
+ pool->size_in_dw = new_size_in_dw;
+ pool->screen->screen.resource_destroy(
+ (struct pipe_screen *)pool->screen,
+ (struct pipe_resource *)pool->bo);
+ pool->bo = r600_compute_buffer_alloc_vram(pool->screen,
+ pool->size_in_dw*4);
+ compute_memory_shadow(pool, pipe, 0);
+}
+
+/**
+ * Copy pool from device to host, or host to device.
+ */
+void compute_memory_shadow(struct compute_memory_pool* pool,
+ struct pipe_context * pipe, int device_to_host)
+{
+ struct compute_memory_item chunk;
+
+ chunk.id = 0;
+ chunk.start_in_dw = 0;
+ chunk.size_in_dw = pool->size_in_dw;
+ chunk.prev = chunk.next = NULL;
+ compute_memory_transfer(pool, pipe, device_to_host, &chunk,
+ pool->shadow, 0, pool->size_in_dw*4);
+}
+
+/**
+ * Allocates pending allocations in the pool
+ */
+void compute_memory_finalize_pending(struct compute_memory_pool* pool,
+ struct pipe_context * pipe)
+{
+ struct compute_memory_item *pending_list = NULL, *end_p = NULL;
+ struct compute_memory_item *item, *next;
+
+ int64_t allocated = 0;
+ int64_t unallocated = 0;
+
+ for (item = pool->item_list; item; item = item->next) {
+ COMPUTE_DBG("list: %i %p\n", item->start_in_dw, item->next);
+ }
+
+ for (item = pool->item_list; item; item = next) {
+ next = item->next;
+
+
+ if (item->start_in_dw == -1) {
+ if (end_p) {
+ end_p->next = item;
+ }
+ else {
+ pending_list = item;
+ }
+
+ if (item->prev) {
+ item->prev->next = next;
+ }
+ else {
+ pool->item_list = next;
+ }
+
+ if (next) {
+ next->prev = item->prev;
+ }
+
+ item->prev = end_p;
+ item->next = NULL;
+ end_p = item;
+
+ unallocated += item->size_in_dw+1024;
+ }
+ else {
+ allocated += item->size_in_dw;
+ }
+ }
+
+ if (pool->size_in_dw < allocated+unallocated) {
+ compute_memory_grow_pool(pool, pipe, allocated+unallocated);
+ }
+
+ for (item = pending_list; item; item = next) {
+ next = item->next;
+
+ int64_t start_in_dw;
+
+ while ((start_in_dw=compute_memory_prealloc_chunk(pool,
+ item->size_in_dw)) == -1) {
+ int64_t need = item->size_in_dw+2048 -
+ (pool->size_in_dw - allocated);
+
+ need += 1024 - (need % 1024);
+
+ if (need > 0) {
+ compute_memory_grow_pool(pool,
+ pipe,
+ pool->size_in_dw + need);
+ }
+ else {
+ need = pool->size_in_dw / 10;
+ need += 1024 - (need % 1024);
+ compute_memory_grow_pool(pool,
+ pipe,
+ pool->size_in_dw + need);
+ }
+ }
+
+ item->start_in_dw = start_in_dw;
+ item->next = NULL;
+ item->prev = NULL;
+
+ if (pool->item_list) {
+ struct compute_memory_item *pos;
+
+ pos = compute_memory_postalloc_chunk(pool, start_in_dw);
+ item->prev = pos;
+ item->next = pos->next;
+ pos->next = item;
+
+ if (item->next) {
+ item->next->prev = item;
+ }
+ }
+ else {
+ pool->item_list = item;
+ }
+
+ allocated += item->size_in_dw;
+ }
+}
+
+
+void compute_memory_free(struct compute_memory_pool* pool, int64_t id)
+{
+ struct compute_memory_item *item, *next;
+
+ for (item = pool->item_list; item; item = next) {
+ next = item->next;
+
+ if (item->id == id) {
+ if (item->prev) {
+ item->prev->next = item->next;
+ }
+ else {
+ pool->item_list = item->next;
+ }
+
+ if (item->next) {
+ item->next->prev = item->prev;
+ }
+
+ free(item);
+
+ return;
+ }
+ }
+
+ fprintf(stderr, "Internal error, invalid id %ld "
+ "for compute_memory_free\n", id);
+
+ assert(0 && "error");
+}
+
+/**
+ * Creates pending allocations
+ */
+struct compute_memory_item* compute_memory_alloc(
+ struct compute_memory_pool* pool,
+ int64_t size_in_dw)
+{
+ struct compute_memory_item *new_item;
+
+ COMPUTE_DBG("Alloc: %i\n", size_in_dw);
+
+ new_item = (struct compute_memory_item *)
+ CALLOC(sizeof(struct compute_memory_item), 1);
+ new_item->size_in_dw = size_in_dw;
+ new_item->start_in_dw = -1; /* mark pending */
+ new_item->id = pool->next_id++;
+ new_item->pool = pool;
+
+ struct compute_memory_item *last_item;
+
+ if (pool->item_list) {
+ for (last_item = pool->item_list; last_item->next;
+ last_item = last_item->next);
+
+ last_item->next = new_item;
+ new_item->prev = last_item;
+ }
+ else {
+ pool->item_list = new_item;
+ }
+
+ return new_item;
+}
+
+/**
+ * Transfer data host<->device, offset and size is in bytes
+ */
+void compute_memory_transfer(
+ struct compute_memory_pool* pool,
+ struct pipe_context * pipe,
+ int device_to_host,
+ struct compute_memory_item* chunk,
+ void* data,
+ int offset_in_chunk,
+ int size)
+{
+ int64_t aligned_size = pool->size_in_dw;
+ struct pipe_resource* gart = (struct pipe_resource*)pool->bo;
+ int64_t internal_offset = chunk->start_in_dw*4 + offset_in_chunk;
+
+ struct pipe_transfer *xfer;
+ uint32_t *map;
+
+ if (device_to_host)
+ {
+ xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_READ,
+ &(struct pipe_box) { .width = aligned_size,
+ .height = 1, .depth = 1 });
+ assert(xfer);
+ map = pipe->transfer_map(pipe, xfer);
+ assert(map);
+ memcpy(data, map + internal_offset, size);
+ pipe->transfer_unmap(pipe, xfer);
+ pipe->transfer_destroy(pipe, xfer);
+ } else {
+ xfer = pipe->get_transfer(pipe, gart, 0, PIPE_TRANSFER_WRITE,
+ &(struct pipe_box) { .width = aligned_size,
+ .height = 1, .depth = 1 });
+ assert(xfer);
+ map = pipe->transfer_map(pipe, xfer);
+ assert(map);
+ memcpy(map + internal_offset, data, size);
+ pipe->transfer_unmap(pipe, xfer);
+ pipe->transfer_destroy(pipe, xfer);
+ }
+}
+
+/**
+ * Transfer data between chunk<->data, it is for VRAM<->GART transfers
+ */
+void compute_memory_transfer_direct(
+ struct compute_memory_pool* pool,
+ int chunk_to_data,
+ struct compute_memory_item* chunk,
+ struct r600_resource* data,
+ int offset_in_chunk,
+ int offset_in_data,
+ int size)
+{
+ ///TODO: DMA
+}
diff --git a/src/gallium/drivers/r600/compute_memory_pool.h b/src/gallium/drivers/r600/compute_memory_pool.h
new file mode 100644
index 00000000000..a14eba1df7e
--- /dev/null
+++ b/src/gallium/drivers/r600/compute_memory_pool.h
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+#ifndef COMPUTE_MEMORY_POOL
+#define COMPUTE_MEMORY_POOL
+
+#include <stdlib.h>
+
+struct compute_memory_pool;
+
+struct compute_memory_item
+{
+ int64_t id; ///ID of the memory chunk
+
+ int untouched; ///True if the memory contains only junk, no need to save it for defrag
+
+ int64_t start_in_dw; ///Start pointer in dwords relative in the pool bo
+ int64_t size_in_dw; ///Size of the chunk in dwords
+
+ struct compute_memory_pool* pool;
+
+ struct compute_memory_item* prev;
+ struct compute_memory_item* next;
+};
+
+struct compute_memory_pool
+{
+ int64_t next_id; ///For generating unique IDs for memory chunks
+ int64_t size_in_dw; ///Size of the pool in dwords
+
+ struct r600_resource *bo; ///The pool buffer object resource
+ struct compute_memory_item* item_list; ///Allocated memory chunks in the buffer,they must be ordered by "start_in_dw"
+ struct r600_screen *screen;
+
+ uint32_t *shadow; ///host copy of the pool, used for defragmentation
+};
+
+
+struct compute_memory_pool* compute_memory_pool_new(int64_t initial_size_in_dw, struct r600_screen *rscreen); ///Creates a new pool
+void compute_memory_pool_delete(struct compute_memory_pool* pool); ///Frees all stuff in the pool and the pool struct itself too
+
+int64_t compute_memory_prealloc_chunk(struct compute_memory_pool* pool, int64_t size_in_dw); ///searches for an empty space in the pool, return with the pointer to the allocatable space in the pool, returns -1 on failure
+
+struct compute_memory_item* compute_memory_postalloc_chunk(struct compute_memory_pool* pool, int64_t start_in_dw); ///search for the chunk where we can link our new chunk after it
+
+/**
+ * reallocates pool, conserves data
+ */
+void compute_memory_grow_pool(struct compute_memory_pool* pool, struct pipe_context * pipe,
+ int new_size_in_dw);
+
+/**
+ * Copy pool from device to host, or host to device
+ */
+void compute_memory_shadow(struct compute_memory_pool* pool,
+ struct pipe_context * pipe, int device_to_host);
+
+/**
+ * Allocates pending allocations in the pool
+ */
+void compute_memory_finalize_pending(struct compute_memory_pool* pool,
+ struct pipe_context * pipe);
+void compute_memory_defrag(struct compute_memory_pool* pool); ///Defragment the memory pool, always heavy memory usage
+void compute_memory_free(struct compute_memory_pool* pool, int64_t id);
+struct compute_memory_item* compute_memory_alloc(struct compute_memory_pool* pool, int64_t size_in_dw); ///Creates pending allocations
+
+/**
+ * Transfer data host<->device, offset and size is in bytes
+ */
+void compute_memory_transfer(struct compute_memory_pool* pool,
+ struct pipe_context * pipe, int device_to_host,
+ struct compute_memory_item* chunk, void* data,
+ int offset_in_chunk, int size);
+
+void compute_memory_transfer_direct(struct compute_memory_pool* pool, int chunk_to_data, struct compute_memory_item* chunk, struct r600_resource* data, int offset_in_chunk, int offset_in_data, int size); ///Transfer data between chunk<->data, it is for VRAM<->GART transfers
+
+#endif
diff --git a/src/gallium/drivers/r600/compute_resource.def b/src/gallium/drivers/r600/compute_resource.def
new file mode 100644
index 00000000000..161f5062ff7
--- /dev/null
+++ b/src/gallium/drivers/r600/compute_resource.def
@@ -0,0 +1,38 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+
+DECL_COMPUTE_RESOURCE(CONFIG, 1)
+DECL_COMPUTE_RESOURCE(CONST_MEM, 16)
+DECL_COMPUTE_RESOURCE(RAT, 12)
+DECL_COMPUTE_RESOURCE(VERT, 16)
+DECL_COMPUTE_RESOURCE(TEX, 16)
+DECL_COMPUTE_RESOURCE(SAMPLER, 18)
+DECL_COMPUTE_RESOURCE(LOOP, 32)
+DECL_COMPUTE_RESOURCE(LDS, 1)
+DECL_COMPUTE_RESOURCE(GDS, 1)
+DECL_COMPUTE_RESOURCE(EXPORT, 1)
+DECL_COMPUTE_RESOURCE(SHADER, 1)
+DECL_COMPUTE_RESOURCE(TMPRING, 4)
+DECL_COMPUTE_RESOURCE(DISPATCH, 1)
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
new file mode 100644
index 00000000000..7aeb4038ae1
--- /dev/null
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright 2011 Adam Rak <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_blitter.h"
+#include "util/u_double_list.h"
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+#include "util/u_pack_color.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
+#include "pipebuffer/pb_buffer.h"
+#include "r600.h"
+#include "evergreend.h"
+#include "r600_resource.h"
+#include "r600_shader.h"
+#include "r600_pipe.h"
+#include "r600_formats.h"
+#include "evergreen_compute.h"
+#include "r600_hw_context_priv.h"
+#include "evergreen_compute_internal.h"
+#include "compute_memory_pool.h"
+#ifdef HAVE_OPENCL
+#include "llvm_wrapper.h"
+#endif
+
+/**
+RAT0 is for global binding write
+VTX1 is for global binding read
+
+for wrting images RAT1...
+for reading images TEX2...
+ TEX2-RAT1 is paired
+
+TEX2... consumes the same fetch resources, that VTX2... would consume
+
+CONST0 and VTX0 is for parameters
+ CONST0 is binding smaller input parameter buffer, and for constant indexing,
+ also constant cached
+ VTX0 is for indirect/non-constant indexing, or if the input is bigger than
+ the constant cache can handle
+
+RAT-s are limited to 12, so we can only bind at most 11 texture for writing
+because we reserve RAT0 for global bindings. With byteaddressing enabled,
+we should reserve another one too.=> 10 image binding for writing max.
+
+from Nvidia OpenCL:
+ CL_DEVICE_MAX_READ_IMAGE_ARGS: 128
+ CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 8
+
+so 10 for writing is enough. 176 is the max for reading according to the docs
+
+writable images should be listed first < 10, so their id corresponds to RAT(id+1)
+writable images will consume TEX slots, VTX slots too because of linear indexing
+
+*/
+
+const struct u_resource_vtbl r600_global_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ r600_compute_global_buffer_destroy, /* resource_destroy */
+ r600_compute_global_get_transfer, /* get_transfer */
+ r600_compute_global_transfer_destroy, /* transfer_destroy */
+ r600_compute_global_transfer_map, /* transfer_map */
+ r600_compute_global_transfer_flush_region,/* transfer_flush_region */
+ r600_compute_global_transfer_unmap, /* transfer_unmap */
+ r600_compute_global_transfer_inline_write /* transfer_inline_write */
+};
+
+
+void *evergreen_create_compute_state(
+ struct pipe_context *ctx_,
+ const const struct pipe_compute_state *cso)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+
+#ifdef HAVE_OPENCL
+ const struct pipe_llvm_program_header * header;
+ const unsigned char * code;
+
+ header = cso->prog;
+ code = cso->prog + sizeof(struct pipe_llvm_program_header);
+#endif
+
+ if (!ctx->screen->screen.get_param(&ctx->screen->screen,
+ PIPE_CAP_COMPUTE)) {
+ fprintf(stderr, "Compute is not supported\n");
+ return NULL;
+ }
+ struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
+
+ shader->ctx = (struct r600_context*)ctx;
+ shader->resources = (struct evergreen_compute_resource*)
+ CALLOC(sizeof(struct evergreen_compute_resource),
+ get_compute_resource_num());
+ shader->local_size = cso->req_local_mem; ///TODO: assert it
+ shader->private_size = cso->req_private_mem;
+ shader->input_size = cso->req_input_mem;
+
+#ifdef HAVE_OPENCL
+ shader->mod = llvm_parse_bitcode(code, header->num_bytes);
+
+ r600_compute_shader_create(ctx_, shader->mod, &shader->bc);
+#endif
+ return shader;
+}
+
+void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
+{
+ struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
+
+ free(shader->resources);
+ free(shader);
+}
+
+static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+
+ ctx->cs_shader = (struct r600_pipe_compute *)state;
+
+ assert(!ctx->cs_shader->shader_code_bo);
+
+ ctx->cs_shader->shader_code_bo =
+ r600_compute_buffer_alloc_vram(ctx->screen,
+ ctx->cs_shader->bc.ndw * 4);
+
+ void *p = ctx->ws->buffer_map(ctx->cs_shader->shader_code_bo->cs_buf,
+ ctx->cs, PIPE_TRANSFER_WRITE);
+
+ memcpy(p, ctx->cs_shader->bc.bytecode, ctx->cs_shader->bc.ndw * 4);
+
+ ctx->ws->buffer_unmap(ctx->cs_shader->shader_code_bo->cs_buf);
+
+ evergreen_compute_init_config(ctx);
+
+ struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
+ COMPUTE_RESOURCE_SHADER, 0);
+
+ evergreen_reg_set(res, R_008C0C_SQ_GPR_RESOURCE_MGMT_3,
+ S_008C0C_NUM_LS_GPRS(ctx->cs_shader->bc.ngpr));
+
+ ///maybe we can use it later
+ evergreen_reg_set(res, R_0286C8_SPI_THREAD_GROUPING, 0);
+ ///maybe we can use it later
+ evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
+
+ evergreen_reg_set(res, R_0288D4_SQ_PGM_RESOURCES_LS,
+ S_0288D4_NUM_GPRS(ctx->cs_shader->bc.ngpr)
+ | S_0288D4_STACK_SIZE(ctx->cs_shader->bc.nstack));
+ evergreen_reg_set(res, R_0288D8_SQ_PGM_RESOURCES_LS_2, 0);
+
+ evergreen_reg_set(res, R_0288D0_SQ_PGM_START_LS, 0);
+ res->bo = ctx->cs_shader->shader_code_bo;
+ res->usage = RADEON_USAGE_READ;
+ res->coher_bo_size = ctx->cs_shader->bc.ndw*4;
+ res->flags = COMPUTE_RES_SH_FLUSH;
+
+ /* We can't always determine the
+ * number of iterations in a loop before it's executed,
+ * so we just need to set up the loop counter to give us the maximum
+ * number of iterations possible. Currently, loops in shader code
+ * ignore the loop counter and use a break instruction to exit the
+ * loop at the correct time.
+ */
+ evergreen_set_loop_const(ctx->cs_shader,
+ 0, /* index */
+ 0xFFF, /* Maximum value of the loop counter (i.e. when the loop
+ * counter reaches this value, the program will break
+ * out of the loop. */
+ 0x0, /* Starting value of the loop counter. */
+ 0x1); /* Amount to increment the loop counter each iteration. */
+}
+
+/* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
+ * kernel parameters there are inplicit parameters that need to be stored
+ * in the vertex buffer as well. Here is how these parameters are organized in
+ * the buffer:
+ *
+ * DWORDS 0-2: Number of work groups in each dimension (x,y,z)
+ * DWORDS 3-5: Number of global work items in each dimension (x,y,z)
+ * DWORDS 6-8: Number of work items within each work group in each dimension
+ * (x,y,z)
+ * DWORDS 9+ : Kernel parameters
+ */
+void evergreen_compute_upload_input(
+ struct pipe_context *ctx_,
+ const uint *block_layout,
+ const uint *grid_layout,
+ const void *input)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ int i;
+ unsigned kernel_parameters_offset_bytes = 36;
+ uint32_t * num_work_groups_start;
+ uint32_t * global_size_start;
+ uint32_t * local_size_start;
+ uint32_t * kernel_parameters_start;
+
+ if (ctx->cs_shader->input_size == 0) {
+ return;
+ }
+
+ if (!ctx->cs_shader->kernel_param) {
+ unsigned buffer_size = ctx->cs_shader->input_size;
+
+ /* Add space for the grid dimensions */
+ buffer_size += kernel_parameters_offset_bytes * sizeof(uint);
+ ctx->cs_shader->kernel_param =
+ r600_compute_buffer_alloc_vram(ctx->screen,
+ buffer_size);
+ }
+
+ num_work_groups_start = ctx->ws->buffer_map(
+ ctx->cs_shader->kernel_param->cs_buf,
+ ctx->cs, PIPE_TRANSFER_WRITE);
+ global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
+ local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
+ kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4);
+
+ /* Copy the work group size */
+ memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint));
+
+ /* Copy the global size */
+ for (i = 0; i < 3; i++) {
+ global_size_start[i] = grid_layout[i] * block_layout[i];
+ }
+
+ /* Copy the local dimensions */
+ memcpy(local_size_start, block_layout, 3 * sizeof(uint));
+
+ /* Copy the kernel inputs */
+ memcpy(kernel_parameters_start, input, ctx->cs_shader->input_size);
+
+ for (i = 0; i < (kernel_parameters_offset_bytes / 4) +
+ (ctx->cs_shader->input_size / 4); i++) {
+ COMPUTE_DBG("input %i : %i\n", i,
+ ((unsigned*)num_work_groups_start)[i]);
+ }
+
+ ctx->ws->buffer_unmap(ctx->cs_shader->kernel_param->cs_buf);
+
+ ///ID=0 is reserved for the parameters
+ evergreen_set_vtx_resource(ctx->cs_shader,
+ ctx->cs_shader->kernel_param, 0, 0, 0);
+ ///ID=0 is reserved for parameters
+ evergreen_set_const_cache(ctx->cs_shader, 0,
+ ctx->cs_shader->kernel_param, ctx->cs_shader->input_size, 0);
+}
+
+void evergreen_direct_dispatch(
+ struct pipe_context *ctx_,
+ const uint *block_layout, const uint *grid_layout)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+
+ int i;
+
+ struct evergreen_compute_resource* res = get_empty_res(ctx->cs_shader,
+ COMPUTE_RESOURCE_DISPATCH, 0);
+
+ evergreen_reg_set(res, R_008958_VGT_PRIMITIVE_TYPE, V_008958_DI_PT_POINTLIST);
+
+ evergreen_reg_set(res, R_00899C_VGT_COMPUTE_START_X, 0);
+ evergreen_reg_set(res, R_0089A0_VGT_COMPUTE_START_Y, 0);
+ evergreen_reg_set(res, R_0089A4_VGT_COMPUTE_START_Z, 0);
+
+ evergreen_reg_set(res, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, block_layout[0]);
+ evergreen_reg_set(res, R_0286F0_SPI_COMPUTE_NUM_THREAD_Y, block_layout[1]);
+ evergreen_reg_set(res, R_0286F4_SPI_COMPUTE_NUM_THREAD_Z, block_layout[2]);
+
+ int group_size = 1;
+
+ int grid_size = 1;
+
+ for (i = 0; i < 3; i++) {
+ group_size *= block_layout[i];
+ }
+
+ for (i = 0; i < 3; i++) {
+ grid_size *= grid_layout[i];
+ }
+
+ evergreen_reg_set(res, R_008970_VGT_NUM_INDICES, group_size);
+ evergreen_reg_set(res, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE, group_size);
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0));
+ evergreen_emit_raw_value(res, grid_layout[0]);
+ evergreen_emit_raw_value(res, grid_layout[1]);
+ evergreen_emit_raw_value(res, grid_layout[2]);
+ ///VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN
+ evergreen_emit_raw_value(res, 1);
+}
+
+static void compute_emit_cs(struct r600_context *ctx)
+{
+ struct radeon_winsys_cs *cs = ctx->cs;
+ int i;
+
+ r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
+
+ struct r600_resource *onebo = NULL;
+
+ for (i = 0; i < get_compute_resource_num(); i++) {
+ if (ctx->cs_shader->resources[i].enabled) {
+ int j;
+ COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw);
+
+ for (j = 0; j < ctx->cs_shader->resources[i].cs_end; j++) {
+ if (ctx->cs_shader->resources[i].do_reloc[j]) {
+ assert(ctx->cs_shader->resources[i].bo);
+ evergreen_emit_ctx_reloc(ctx,
+ ctx->cs_shader->resources[i].bo,
+ ctx->cs_shader->resources[i].usage);
+ }
+
+ cs->buf[cs->cdw++] = ctx->cs_shader->resources[i].cs[j];
+ }
+
+ if (ctx->cs_shader->resources[i].bo) {
+ onebo = ctx->cs_shader->resources[i].bo;
+ evergreen_emit_ctx_reloc(ctx,
+ ctx->cs_shader->resources[i].bo,
+ ctx->cs_shader->resources[i].usage);
+
+ ///special case for textures
+ if (ctx->cs_shader->resources[i].do_reloc
+ [ctx->cs_shader->resources[i].cs_end] == 2) {
+ evergreen_emit_ctx_reloc(ctx,
+ ctx->cs_shader->resources[i].bo,
+ ctx->cs_shader->resources[i].usage);
+ }
+
+ evergreen_set_buffer_sync(ctx, ctx->cs_shader->resources[i].bo,
+ ctx->cs_shader->resources[i].coher_bo_size,
+ ctx->cs_shader->resources[i].flags,
+ ctx->cs_shader->resources[i].usage);
+ }
+ }
+ }
+
+#if 0
+ COMPUTE_DBG("cdw: %i\n", cs->cdw);
+ for (i = 0; i < cs->cdw; i++) {
+ COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]);
+ }
+#endif
+
+ ctx->ws->cs_flush(ctx->cs, RADEON_FLUSH_ASYNC);
+
+ ctx->pm4_dirty_cdwords = 0;
+ ctx->flags = 0;
+
+ COMPUTE_DBG("shader started\n");
+
+ ctx->ws->buffer_wait(onebo->buf, 0);
+
+ COMPUTE_DBG("...\n");
+
+ r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
+
+ ctx->streamout_start = TRUE;
+ ctx->streamout_append_bitmask = ~0;
+
+}
+
+static void evergreen_launch_grid(
+ struct pipe_context *ctx_,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t pc, const void *input)
+{
+ COMPUTE_DBG("PC: %i\n", pc);
+
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ unsigned num_waves;
+ unsigned num_pipes = ctx->screen->info.r600_max_pipes;
+ unsigned wave_divisor = (16 * num_pipes);
+
+ /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
+ num_waves = (block_layout[0] * block_layout[1] * block_layout[2] +
+ wave_divisor - 1) / wave_divisor;
+
+ COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n",
+ num_pipes, num_waves);
+
+ evergreen_set_lds(ctx->cs_shader, 0, 0, num_waves);
+ evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
+ evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
+ compute_emit_cs(ctx);
+}
+
+static void evergreen_set_compute_resources(struct pipe_context * ctx_,
+ unsigned start, unsigned count,
+ struct pipe_surface ** surfaces)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct r600_surface **resources = (struct r600_surface **)surfaces;
+ for (int i = 0; i < count; i++) {
+ if (resources[i]) {
+ struct r600_resource_global *buffer =
+ (struct r600_resource_global*)resources[i]->base.texture;
+ if (resources[i]->base.writable) {
+ assert(i+1 < 12);
+ struct r600_resource_global *buffer =
+ (struct r600_resource_global*)
+ resources[i]->base.texture;
+
+ evergreen_set_rat(ctx->cs_shader, i+1,
+ (struct r600_resource *)resources[i]->base.texture,
+ buffer->chunk->start_in_dw*4,
+ resources[i]->base.texture->width0);
+ }
+
+ evergreen_set_vtx_resource(ctx->cs_shader,
+ (struct r600_resource *)resources[i]->base.texture, i+2,
+ buffer->chunk->start_in_dw*4, resources[i]->base.writable);
+ }
+ }
+
+}
+
+static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
+ unsigned start_slot, unsigned count,
+ struct pipe_sampler_view **views)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct r600_pipe_sampler_view **resource =
+ (struct r600_pipe_sampler_view **)views;
+
+ for (int i = 0; i < count; i++) {
+ if (resource[i]) {
+ assert(i+1 < 12);
+ ///FETCH0 = VTX0 (param buffer),
+ //FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
+ evergreen_set_tex_resource(ctx->cs_shader, resource[i], i+2);
+ }
+ }
+}
+
+static void evergreen_bind_compute_sampler_states(
+ struct pipe_context *ctx_,
+ unsigned start_slot,
+ unsigned num_samplers,
+ void **samplers_)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct compute_sampler_state ** samplers =
+ (struct compute_sampler_state **)samplers_;
+
+ for (int i = 0; i < num_samplers; i++) {
+ if (samplers[i]) {
+ evergreen_set_sampler_resource(ctx->cs_shader, samplers[i], i);
+ }
+ }
+}
+
+static void evergreen_set_global_binding(
+ struct pipe_context *ctx_, unsigned first, unsigned n,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct compute_memory_pool *pool = ctx->screen->global_pool;
+ struct r600_resource_global **buffers =
+ (struct r600_resource_global **)resources;
+
+ if (!resources) {
+ /* XXX: Unset */
+ return;
+ }
+
+ compute_memory_finalize_pending(pool, ctx_);
+
+ for (int i = 0; i < n; i++)
+ {
+ assert(resources[i]->target == PIPE_BUFFER);
+ assert(resources[i]->bind & PIPE_BIND_GLOBAL);
+
+ *(handles[i]) = buffers[i]->chunk->start_in_dw * 4;
+ }
+
+ evergreen_set_rat(ctx->cs_shader, 0, pool->bo, 0, pool->size_in_dw * 4);
+ evergreen_set_vtx_resource(ctx->cs_shader, pool->bo, 1, 0, 1);
+}
+
+
+void evergreen_compute_init_config(struct r600_context *ctx)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(ctx->cs_shader, COMPUTE_RESOURCE_CONFIG, 0);
+
+ int num_threads;
+ int num_stack_entries;
+ int num_temp_gprs;
+
+ enum radeon_family family;
+ unsigned tmp;
+
+ family = ctx->family;
+
+ switch (family) {
+ case CHIP_CEDAR:
+ default:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 256;
+ break;
+ case CHIP_REDWOOD:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 256;
+ break;
+ case CHIP_JUNIPER:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 512;
+ break;
+ case CHIP_CYPRESS:
+ case CHIP_HEMLOCK:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 512;
+ break;
+ case CHIP_PALM:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 256;
+ break;
+ case CHIP_SUMO:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 256;
+ break;
+ case CHIP_SUMO2:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 512;
+ break;
+ case CHIP_BARTS:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 512;
+ break;
+ case CHIP_TURKS:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 256;
+ break;
+ case CHIP_CAICOS:
+ num_temp_gprs = 4;
+ num_threads = 128;
+ num_stack_entries = 256;
+ break;
+ }
+
+ tmp = 0x00000000;
+ switch (family) {
+ case CHIP_CEDAR:
+ case CHIP_PALM:
+ case CHIP_SUMO:
+ case CHIP_SUMO2:
+ case CHIP_CAICOS:
+ break;
+ default:
+ tmp |= S_008C00_VC_ENABLE(1);
+ break;
+ }
+ tmp |= S_008C00_EXPORT_SRC_C(1);
+ tmp |= S_008C00_CS_PRIO(0);
+ tmp |= S_008C00_LS_PRIO(0);
+ tmp |= S_008C00_HS_PRIO(0);
+ tmp |= S_008C00_PS_PRIO(0);
+ tmp |= S_008C00_VS_PRIO(0);
+ tmp |= S_008C00_GS_PRIO(0);
+ tmp |= S_008C00_ES_PRIO(0);
+
+ evergreen_reg_set(res, R_008C00_SQ_CONFIG, tmp);
+
+ evergreen_reg_set(res, R_008C04_SQ_GPR_RESOURCE_MGMT_1,
+ S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+ evergreen_reg_set(res, R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0);
+ evergreen_reg_set(res, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0);
+ evergreen_reg_set(res, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0);
+ evergreen_reg_set(res, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8));
+ /* workaround for hw issues with dyn gpr - must set all limits to 240
+ * instead of 0, 0x1e == 240/8 */
+ evergreen_reg_set(res, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1,
+ S_028838_PS_GPRS(0x1e) |
+ S_028838_VS_GPRS(0x1e) |
+ S_028838_GS_GPRS(0x1e) |
+ S_028838_ES_GPRS(0x1e) |
+ S_028838_HS_GPRS(0x1e) |
+ S_028838_LS_GPRS(0x1e));
+
+
+ evergreen_reg_set(res, R_008E20_SQ_STATIC_THREAD_MGMT1, 0xFFFFFFFF);
+ evergreen_reg_set(res, R_008E24_SQ_STATIC_THREAD_MGMT2, 0xFFFFFFFF);
+ evergreen_reg_set(res, R_008E28_SQ_STATIC_THREAD_MGMT3, 0xFFFFFFFF);
+ evergreen_reg_set(res, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0);
+ tmp = S_008C1C_NUM_LS_THREADS(num_threads);
+ evergreen_reg_set(res, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, tmp);
+ evergreen_reg_set(res, R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0);
+ evergreen_reg_set(res, R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0);
+ tmp = S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries);
+ evergreen_reg_set(res, R_008C28_SQ_STACK_RESOURCE_MGMT_3, tmp);
+ evergreen_reg_set(res, R_0286CC_SPI_PS_IN_CONTROL_0, S_0286CC_LINEAR_GRADIENT_ENA(1));
+ evergreen_reg_set(res, R_0286D0_SPI_PS_IN_CONTROL_1, 0);
+ evergreen_reg_set(res, R_0286E4_SPI_PS_IN_CONTROL_2, 0);
+ evergreen_reg_set(res, R_0286D8_SPI_INPUT_Z, 0);
+ evergreen_reg_set(res, R_0286E0_SPI_BARYC_CNTL, 1 << 20);
+ tmp = S_0286E8_TID_IN_GROUP_ENA | S_0286E8_TGID_ENA | S_0286E8_DISABLE_INDEX_PACK;
+ evergreen_reg_set(res, R_0286E8_SPI_COMPUTE_INPUT_CNTL, tmp);
+ tmp = S_028A40_COMPUTE_MODE(1) | S_028A40_PARTIAL_THD_AT_EOI(1);
+ evergreen_reg_set(res, R_028A40_VGT_GS_MODE, tmp);
+ evergreen_reg_set(res, R_028B54_VGT_SHADER_STAGES_EN, 2/*CS_ON*/);
+ evergreen_reg_set(res, R_028800_DB_DEPTH_CONTROL, 0);
+ evergreen_reg_set(res, R_02880C_DB_SHADER_CONTROL, 0);
+ evergreen_reg_set(res, R_028000_DB_RENDER_CONTROL, S_028000_COLOR_DISABLE(1));
+ evergreen_reg_set(res, R_02800C_DB_RENDER_OVERRIDE, 0);
+ evergreen_reg_set(res, R_0286E8_SPI_COMPUTE_INPUT_CNTL,
+ S_0286E8_TID_IN_GROUP_ENA
+ | S_0286E8_TGID_ENA
+ | S_0286E8_DISABLE_INDEX_PACK)
+ ;
+}
+
+void evergreen_init_compute_state_functions(struct r600_context *ctx)
+{
+ ctx->context.create_compute_state = evergreen_create_compute_state;
+ ctx->context.delete_compute_state = evergreen_delete_compute_state;
+ ctx->context.bind_compute_state = evergreen_bind_compute_state;
+// ctx->context.create_sampler_view = evergreen_compute_create_sampler_view;
+ ctx->context.set_compute_resources = evergreen_set_compute_resources;
+ ctx->context.set_compute_sampler_views = evergreen_set_cs_sampler_view;
+ ctx->context.bind_compute_sampler_states = evergreen_bind_compute_sampler_states;
+ ctx->context.set_global_binding = evergreen_set_global_binding;
+ ctx->context.launch_grid = evergreen_launch_grid;
+}
+
+
+struct pipe_resource *r600_compute_global_buffer_create(
+ struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ assert(templ->target == PIPE_BUFFER);
+ assert(templ->bind & PIPE_BIND_GLOBAL);
+ assert(templ->array_size == 1 || templ->array_size == 0);
+ assert(templ->depth0 == 1 || templ->depth0 == 0);
+ assert(templ->height0 == 1 || templ->height0 == 0);
+
+ struct r600_resource_global* result = (struct r600_resource_global*)
+ CALLOC(sizeof(struct r600_resource_global), 1);
+ struct r600_screen* rscreen = (struct r600_screen*)screen;
+
+ result->base.b.vtbl = &r600_global_buffer_vtbl;
+ result->base.b.b.screen = screen;
+ result->base.b.b = *templ;
+ pipe_reference_init(&result->base.b.b.reference, 1);
+
+ int size_in_dw = (templ->width0+3) / 4;
+
+ result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw);
+
+ if (result->chunk == NULL)
+ {
+ free(result);
+ return NULL;
+ }
+
+ return &result->base.b.b;
+}
+
+void r600_compute_global_buffer_destroy(
+ struct pipe_screen *screen,
+ struct pipe_resource *res)
+{
+ assert(res->target == PIPE_BUFFER);
+ assert(res->bind & PIPE_BIND_GLOBAL);
+
+ struct r600_resource_global* buffer = (struct r600_resource_global*)res;
+ struct r600_screen* rscreen = (struct r600_screen*)screen;
+
+ compute_memory_free(rscreen->global_pool, buffer->chunk->id);
+
+ buffer->chunk = NULL;
+ free(res);
+}
+
+void* r600_compute_global_transfer_map(
+ struct pipe_context *ctx_,
+ struct pipe_transfer* transfer)
+{
+ assert(transfer->resource->target == PIPE_BUFFER);
+ assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
+ assert(transfer->box.x >= 0);
+ assert(transfer->box.y == 0);
+ assert(transfer->box.z == 0);
+
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct r600_resource_global* buffer =
+ (struct r600_resource_global*)transfer->resource;
+
+ uint32_t* map;
+ ///TODO: do it better, mapping is not possible if the pool is too big
+
+ if (!(map = ctx->ws->buffer_map(buffer->chunk->pool->bo->cs_buf,
+ ctx->cs, transfer->usage))) {
+ return NULL;
+ }
+
+ COMPUTE_DBG("buffer start: %lli\n", buffer->chunk->start_in_dw);
+ return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x;
+}
+
+void r600_compute_global_transfer_unmap(
+ struct pipe_context *ctx_,
+ struct pipe_transfer* transfer)
+{
+ assert(transfer->resource->target == PIPE_BUFFER);
+ assert(transfer->resource->bind & PIPE_BIND_GLOBAL);
+
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct r600_resource_global* buffer =
+ (struct r600_resource_global*)transfer->resource;
+
+ ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf);
+}
+
+struct pipe_transfer * r600_compute_global_get_transfer(
+ struct pipe_context *ctx_,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ struct compute_memory_pool *pool = ctx->screen->global_pool;
+
+ compute_memory_finalize_pending(pool, ctx_);
+
+ assert(resource->target == PIPE_BUFFER);
+ struct r600_context *rctx = (struct r600_context*)ctx_;
+ struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
+
+ transfer->resource = resource;
+ transfer->level = level;
+ transfer->usage = usage;
+ transfer->box = *box;
+ transfer->stride = 0;
+ transfer->layer_stride = 0;
+ transfer->data = NULL;
+
+ /* Note strides are zero, this is ok for buffers, but not for
+ * textures 2d & higher at least.
+ */
+ return transfer;
+}
+
+void r600_compute_global_transfer_destroy(
+ struct pipe_context *ctx_,
+ struct pipe_transfer *transfer)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx_;
+ util_slab_free(&rctx->pool_transfers, transfer);
+}
+
+void r600_compute_global_transfer_flush_region(
+ struct pipe_context *ctx_,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ assert(0 && "TODO");
+}
+
+void r600_compute_global_transfer_inline_write(
+ struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ const void *data,
+ unsigned stride,
+ unsigned layer_stride)
+{
+ assert(0 && "TODO");
+}
diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h
new file mode 100644
index 00000000000..a0881cde7db
--- /dev/null
+++ b/src/gallium/drivers/r600/evergreen_compute.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2011 Adam Rak <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+#ifndef EVERGREEN_COMPUTE_H
+#define EVERGREEN_COMPUTE_H
+#include "r600.h"
+#include "r600_pipe.h"
+
+struct evergreen_compute_resource;
+
+void *evergreen_create_compute_state(struct pipe_context *ctx, const const struct pipe_compute_state *cso);
+void evergreen_delete_compute_state(struct pipe_context *ctx, void *state);
+void evergreen_direct_dispatch( struct pipe_context *context, const uint *block_layout, const uint *grid_layout);
+void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input);
+void evergreen_compute_init_config(struct r600_context *rctx);
+void evergreen_init_compute_state_functions(struct r600_context *rctx);
+
+struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ);
+void r600_compute_global_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *res);
+void* r600_compute_global_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer);
+void r600_compute_global_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer);
+struct pipe_transfer * r600_compute_global_get_transfer(struct pipe_context *, struct pipe_resource *, unsigned level,
+ unsigned usage, const struct pipe_box *);
+void r600_compute_global_transfer_destroy(struct pipe_context *, struct pipe_transfer *);
+void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pipe_transfer *, const struct pipe_box *);
+void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level,
+ unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride);
+
+
+static inline void COMPUTE_DBG(const char *fmt, ...)
+{
+ static bool check_debug = false, debug = false;
+
+ if (!check_debug) {
+ debug = debug_get_bool_option("R600_COMPUTE_DEBUG", FALSE);
+ }
+
+ if (debug) {
+ va_list ap;
+ va_start(ap, fmt);
+ _debug_vprintf(fmt, ap);
+ va_end(ap);
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c
new file mode 100644
index 00000000000..209f064d1de
--- /dev/null
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -0,0 +1,830 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_blitter.h"
+#include "util/u_double_list.h"
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+#include "util/u_pack_color.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
+#include "r600.h"
+#include "r600_resource.h"
+#include "r600_shader.h"
+#include "r600_pipe.h"
+#include "r600_formats.h"
+#include "evergreend.h"
+#include "evergreen_compute_internal.h"
+#include "r600_hw_context_priv.h"
+
+int get_compute_resource_num(void)
+{
+ int num = 0;
+#define DECL_COMPUTE_RESOURCE(name, n) num += n;
+#include "compute_resource.def"
+#undef DECL_COMPUTE_RESOURCE
+ return num;
+}
+
+void evergreen_emit_raw_value(
+ struct evergreen_compute_resource* res,
+ unsigned value)
+{
+ res->cs[res->cs_end++] = value;
+}
+
+void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
+{
+ ctx->cs->buf[ctx->cs->cdw++] = value;
+}
+
+void evergreen_mult_reg_set_(
+ struct evergreen_compute_resource* res,
+ int index,
+ u32* array,
+ int size)
+{
+ int i = 0;
+
+ evergreen_emit_raw_reg_set(res, index, size / 4);
+
+ for (i = 0; i < size; i+=4) {
+ res->cs[res->cs_end++] = array[i / 4];
+ }
+}
+
+void evergreen_reg_set(
+ struct evergreen_compute_resource* res,
+ unsigned index,
+ unsigned value)
+{
+ evergreen_emit_raw_reg_set(res, index, 1);
+ res->cs[res->cs_end++] = value;
+}
+
+struct evergreen_compute_resource* get_empty_res(
+ struct r600_pipe_compute* pipe,
+ enum evergreen_compute_resources res_code,
+ int offset_index)
+{
+ int code_index = -1;
+ int code_size = -1;
+
+ {
+ int i = 0;
+ #define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name == res_code) {code_index = i; code_size = n;} i += n;
+ #include "compute_resource.def"
+ #undef DECL_COMPUTE_RESOURCE
+ }
+
+ assert(code_index != -1 && "internal error: resouce index not found");
+ assert(offset_index < code_size && "internal error: overindexing resource");
+
+ int index = code_index + offset_index;
+
+ struct evergreen_compute_resource* res = &pipe->resources[index];
+
+ res->enabled = true;
+ res->bo = NULL;
+ res->cs_end = 0;
+ bzero(&res->do_reloc, sizeof(res->do_reloc));
+
+ return res;
+}
+
+void evergreen_emit_raw_reg_set(
+ struct evergreen_compute_resource* res,
+ unsigned index,
+ int num)
+{
+ res->enabled = 1;
+ int cs_end = res->cs_end;
+
+ if (index >= EVERGREEN_CONFIG_REG_OFFSET
+ && index < EVERGREEN_CONFIG_REG_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
+ && index < EVERGREEN_CONTEXT_REG_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_RESOURCE_OFFSET
+ && index < EVERGREEN_RESOURCE_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_SAMPLER_OFFSET
+ && index < EVERGREEN_SAMPLER_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CTL_CONST_OFFSET
+ && index < EVERGREEN_CTL_CONST_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_LOOP_CONST_OFFSET
+ && index < EVERGREEN_LOOP_CONST_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_BOOL_CONST_OFFSET
+ && index < EVERGREEN_BOOL_CONST_END) {
+ res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
+ res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
+ } else {
+ res->cs[cs_end] = PKT0(index, num-1);
+ res->cs_end--;
+ }
+
+ res->cs_end += 2;
+}
+
+void evergreen_emit_force_reloc(struct evergreen_compute_resource* res)
+{
+ res->do_reloc[res->cs_end] += 1;
+}
+
+void evergreen_emit_ctx_reg_set(
+ struct r600_context *ctx,
+ unsigned index,
+ int num)
+{
+
+ if (index >= EVERGREEN_CONFIG_REG_OFFSET
+ && index < EVERGREEN_CONFIG_REG_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
+ && index < EVERGREEN_CONTEXT_REG_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_RESOURCE_OFFSET
+ && index < EVERGREEN_RESOURCE_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_SAMPLER_OFFSET
+ && index < EVERGREEN_SAMPLER_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_CTL_CONST_OFFSET
+ && index < EVERGREEN_CTL_CONST_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_LOOP_CONST_OFFSET
+ && index < EVERGREEN_LOOP_CONST_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
+ } else if (index >= EVERGREEN_BOOL_CONST_OFFSET
+ && index < EVERGREEN_BOOL_CONST_END) {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
+ } else {
+ ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
+ }
+}
+
+void evergreen_emit_ctx_reloc(
+ struct r600_context *ctx,
+ struct r600_resource *bo,
+ enum radeon_bo_usage usage)
+{
+ assert(bo);
+
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ u32 rr = r600_context_bo_reloc(ctx, bo, usage);
+ ctx->cs->buf[ctx->cs->cdw++] = rr;
+}
+
+void evergreen_set_buffer_sync(
+ struct r600_context *ctx,
+ struct r600_resource* bo,
+ int size,
+ int flags,
+ enum radeon_bo_usage usage)
+{
+ assert(bo);
+ int32_t cp_coher_size = 0;
+
+ if (size == 0xffffffff || size == 0) {
+ cp_coher_size = 0xffffffff;
+ }
+ else {
+ cp_coher_size = ((size + 255) >> 8);
+ }
+
+ uint32_t sync_flags = 0;
+
+ if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) {
+ sync_flags |= S_0085F0_TC_ACTION_ENA(1);
+ }
+
+ if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) {
+ sync_flags |= S_0085F0_VC_ACTION_ENA(1);
+ }
+
+ if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) {
+ sync_flags |= S_0085F0_SH_ACTION_ENA(1);
+ }
+
+ if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) {
+ sync_flags |= S_0085F0_CB_ACTION_ENA(1);
+
+ switch((flags >> 8) & 0xF) {
+ case 0:
+ sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1);
+ break;
+ case 1:
+ sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1);
+ break;
+ case 2:
+ sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1);
+ break;
+ case 3:
+ sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1);
+ break;
+ case 4:
+ sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1);
+ break;
+ case 5:
+ sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1);
+ break;
+ case 6:
+ sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1);
+ break;
+ case 7:
+ sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1);
+ break;
+ case 8:
+ sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1);
+ break;
+ case 9:
+ sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1);
+ break;
+ case 10:
+ sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1);
+ break;
+ case 11:
+ sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1);
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ int32_t poll_interval = 10;
+
+ ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
+ ctx->cs->buf[ctx->cs->cdw++] = sync_flags;
+ ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size;
+ ctx->cs->buf[ctx->cs->cdw++] = 0;
+ ctx->cs->buf[ctx->cs->cdw++] = poll_interval;
+
+ if (cp_coher_size != 0xffffffff) {
+ evergreen_emit_ctx_reloc(ctx, bo, usage);
+ }
+}
+
+int evergreen_compute_get_gpu_format(
+ struct number_type_and_format* fmt,
+ struct r600_resource *bo)
+{
+ switch (bo->b.b.format)
+ {
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R32_UNORM:
+ fmt->format = V_028C70_COLOR_32;
+ fmt->number_type = V_028C70_NUMBER_UNORM;
+ fmt->num_format_all = 0;
+ break;
+ case PIPE_FORMAT_R32_FLOAT:
+ fmt->format = V_028C70_COLOR_32_FLOAT;
+ fmt->number_type = V_028C70_NUMBER_FLOAT;
+ fmt->num_format_all = 0;
+ break;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT;
+ fmt->number_type = V_028C70_NUMBER_FLOAT;
+ fmt->num_format_all = 0;
+ break;
+
+ ///TODO: other formats...
+
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+void evergreen_set_rat(
+ struct r600_pipe_compute *pipe,
+ int id,
+ struct r600_resource* bo,
+ int start,
+ int size)
+{
+ assert(id < 12);
+ assert((size & 3) == 0);
+ assert((start & 0xFF) == 0);
+
+ int offset;
+ COMPUTE_DBG("bind rat: %i \n", id);
+
+ if (id < 8) {
+ offset = id*0x3c;
+ }
+ else {
+ offset = 8*0x3c + (id-8)*0x1c;
+ }
+
+ int linear = 0;
+
+ if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1
+ && bo->b.b.target == PIPE_BUFFER) {
+ linear = 1;
+ }
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id);
+
+ evergreen_emit_force_reloc(res);
+
+ evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D?
+ evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0);
+
+ struct number_type_and_format fmt;
+
+ ///default config
+ if (bo->b.b.format == PIPE_FORMAT_NONE) {
+ fmt.format = V_028C70_COLOR_32;
+ fmt.number_type = V_028C70_NUMBER_FLOAT;
+ } else {
+ evergreen_compute_get_gpu_format(&fmt, bo);
+ }
+
+ if (linear) {
+ evergreen_reg_set(res,
+ R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1)
+ | S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED)
+ | S_028C70_FORMAT(fmt.format)
+ | S_028C70_NUMBER_TYPE(fmt.number_type)
+ );
+ evergreen_emit_force_reloc(res);
+ } else {
+ assert(0 && "TODO");
+ ///TODO
+// evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????));
+// evergreen_emit_force_reloc(res);
+ }
+
+ evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1));
+ evergreen_emit_force_reloc(res);
+
+ if (linear) {
+ /* XXX: Why are we using size instead of bo->b.b.b.width0 ? */
+ evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size);
+ } else {
+ evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM,
+ S_028C78_WIDTH_MAX(bo->b.b.width0)
+ | S_028C78_HEIGHT_MAX(bo->b.b.height0));
+ }
+
+ if (id < 8) {
+ evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0);
+ evergreen_emit_force_reloc(res);
+ evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0);
+ evergreen_emit_force_reloc(res);
+ }
+
+ evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8);
+
+ res->bo = bo;
+ res->usage = RADEON_USAGE_READWRITE;
+ res->coher_bo_size = size;
+ res->flags = COMPUTE_RES_CB_FLUSH(id);
+}
+
+void evergreen_set_lds(
+ struct r600_pipe_compute *pipe,
+ int num_lds,
+ int size,
+ int num_waves)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
+
+ evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
+ S_008E2C_NUM_LS_LDS(num_lds));
+ evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
+}
+
+void evergreen_set_gds(
+ struct r600_pipe_compute *pipe,
+ uint32_t addr,
+ uint32_t size)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0);
+
+ evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1);
+ evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr);
+ evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size);
+}
+
+void evergreen_set_export(
+ struct r600_pipe_compute *pipe,
+ struct r600_resource* bo,
+ int offset, int size)
+{
+ #define SX_MEMORY_EXPORT_BASE 0x9010
+ #define SX_MEMORY_EXPORT_SIZE 0x9014
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0);
+
+ evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size);
+
+ if (size) {
+ evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset);
+ res->bo = bo;
+ res->usage = RADEON_USAGE_WRITE;
+ res->coher_bo_size = size;
+ res->flags = 0;
+ }
+}
+
+void evergreen_set_loop_const(
+ struct r600_pipe_compute *pipe,
+ int id, int count, int init, int inc) {
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id);
+
+ assert(id < 32);
+ assert(count <= 0xFFF);
+ assert(init <= 0xFF);
+ assert(inc <= 0xFF);
+
+ /* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to
+ * SQ_LOOP_CONST_191 */
+ evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4),
+ count | init << 12 | inc << 24);
+}
+
+void evergreen_set_tmp_ring(
+ struct r600_pipe_compute *pipe,
+ struct r600_resource* bo,
+ int offset, int size, int se)
+{
+ #define SQ_LSTMP_RING_BASE 0x00008e10
+ #define SQ_LSTMP_RING_SIZE 0x00008e14
+ #define GRBM_GFX_INDEX 0x802C
+ #define INSTANCE_INDEX(x) ((x) << 0)
+ #define SE_INDEX(x) ((x) << 16)
+ #define INSTANCE_BROADCAST_WRITES (1 << 30)
+ #define SE_BROADCAST_WRITES (1 << 31)
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se);
+
+ evergreen_reg_set(res,
+ GRBM_GFX_INDEX,INSTANCE_INDEX(0)
+ | SE_INDEX(se)
+ | INSTANCE_BROADCAST_WRITES);
+ evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size);
+
+ if (size) {
+ assert(bo);
+
+ evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset);
+ res->bo = bo;
+ res->usage = RADEON_USAGE_WRITE;
+ res->coher_bo_size = 0;
+ res->flags = 0;
+ }
+
+ if (size) {
+ evergreen_emit_force_reloc(res);
+ }
+
+ evergreen_reg_set(res,
+ GRBM_GFX_INDEX,INSTANCE_INDEX(0)
+ | SE_INDEX(0)
+ | INSTANCE_BROADCAST_WRITES
+ | SE_BROADCAST_WRITES);
+}
+
+static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
+{
+ if (R600_BIG_ENDIAN) {
+ switch(colorformat) {
+ case V_028C70_COLOR_4_4:
+ return ENDIAN_NONE;
+
+ /* 8-bit buffers. */
+ case V_028C70_COLOR_8:
+ return ENDIAN_NONE;
+
+ /* 16-bit buffers. */
+ case V_028C70_COLOR_5_6_5:
+ case V_028C70_COLOR_1_5_5_5:
+ case V_028C70_COLOR_4_4_4_4:
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_8_8:
+ return ENDIAN_8IN16;
+
+ /* 32-bit buffers. */
+ case V_028C70_COLOR_8_8_8_8:
+ case V_028C70_COLOR_2_10_10_10:
+ case V_028C70_COLOR_8_24:
+ case V_028C70_COLOR_24_8:
+ case V_028C70_COLOR_32_FLOAT:
+ case V_028C70_COLOR_16_16_FLOAT:
+ case V_028C70_COLOR_16_16:
+ return ENDIAN_8IN32;
+
+ /* 64-bit buffers. */
+ case V_028C70_COLOR_16_16_16_16:
+ case V_028C70_COLOR_16_16_16_16_FLOAT:
+ return ENDIAN_8IN16;
+
+ case V_028C70_COLOR_32_32_FLOAT:
+ case V_028C70_COLOR_32_32:
+ case V_028C70_COLOR_X24_8_32_FLOAT:
+ return ENDIAN_8IN32;
+
+ /* 96-bit buffers. */
+ case V_028C70_COLOR_32_32_32_FLOAT:
+ /* 128-bit buffers. */
+ case V_028C70_COLOR_32_32_32_32_FLOAT:
+ case V_028C70_COLOR_32_32_32_32:
+ return ENDIAN_8IN32;
+ default:
+ return ENDIAN_NONE; /* Unsupported. */
+ }
+ } else {
+ return ENDIAN_NONE;
+ }
+}
+
+static unsigned r600_tex_dim(unsigned dim)
+{
+ switch (dim) {
+ default:
+ case PIPE_TEXTURE_1D:
+ return V_030000_SQ_TEX_DIM_1D;
+ case PIPE_TEXTURE_1D_ARRAY:
+ return V_030000_SQ_TEX_DIM_1D_ARRAY;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ return V_030000_SQ_TEX_DIM_2D;
+ case PIPE_TEXTURE_2D_ARRAY:
+ return V_030000_SQ_TEX_DIM_2D_ARRAY;
+ case PIPE_TEXTURE_3D:
+ return V_030000_SQ_TEX_DIM_3D;
+ case PIPE_TEXTURE_CUBE:
+ return V_030000_SQ_TEX_DIM_CUBEMAP;
+ }
+}
+
+void evergreen_set_vtx_resource(
+ struct r600_pipe_compute *pipe,
+ struct r600_resource* bo,
+ int id, uint64_t offset, int writable)
+{
+ assert(id < 16);
+ uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
+ struct number_type_and_format fmt;
+
+ fmt.format = 0;
+
+ assert(bo->b.b.height0 <= 1);
+ assert(bo->b.b.depth0 <= 1);
+
+ int e = evergreen_compute_get_gpu_format(&fmt, bo);
+
+ assert(e && "unknown format");
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
+
+ unsigned size = bo->b.b.width0;
+ unsigned stride = 1;
+
+// size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
+// util_format_get_blocksize(bo->b.b.b.format));
+
+ COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n",
+ id, size, bo->b.b.width0);
+
+ sq_vtx_constant_word2 =
+ S_030008_BASE_ADDRESS_HI(offset >> 32) |
+ S_030008_STRIDE(stride) |
+ S_030008_DATA_FORMAT(fmt.format) |
+ S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
+ S_030008_ENDIAN_SWAP(0);
+
+ COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
+ stride, fmt.format, fmt.num_format_all);
+
+ sq_vtx_constant_word3 =
+ S_03000C_DST_SEL_X(0) |
+ S_03000C_DST_SEL_Y(1) |
+ S_03000C_DST_SEL_Z(2) |
+ S_03000C_DST_SEL_W(3);
+
+ sq_vtx_constant_word4 = 0;
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
+ evergreen_emit_raw_value(res, (id+816)*32 >> 2);
+ evergreen_emit_raw_value(res, (unsigned)((offset) & 0xffffffff));
+ evergreen_emit_raw_value(res, size - 1);
+ evergreen_emit_raw_value(res, sq_vtx_constant_word2);
+ evergreen_emit_raw_value(res, sq_vtx_constant_word3);
+ evergreen_emit_raw_value(res, sq_vtx_constant_word4);
+ evergreen_emit_raw_value(res, 0);
+ evergreen_emit_raw_value(res, 0);
+ evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
+
+ res->bo = bo;
+
+ if (writable) {
+ res->usage = RADEON_USAGE_READWRITE;
+ }
+ else {
+ res->usage = RADEON_USAGE_READ;
+ }
+
+ res->coher_bo_size = size;
+ res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH;
+}
+
+void evergreen_set_tex_resource(
+ struct r600_pipe_compute *pipe,
+ struct r600_pipe_sampler_view* view,
+ int id)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id);
+ struct r600_resource_texture *tmp =
+ (struct r600_resource_texture*)view->base.texture;
+
+ unsigned format, endian;
+ uint32_t word4 = 0, yuv_format = 0, pitch = 0;
+ unsigned char swizzle[4], array_mode = 0, tile_type = 0;
+ unsigned height, depth;
+
+ swizzle[0] = 0;
+ swizzle[1] = 1;
+ swizzle[2] = 2;
+ swizzle[3] = 3;
+
+ format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen,
+ view->base.format, swizzle, &word4, &yuv_format);
+
+ if (format == ~0) {
+ format = 0;
+ }
+
+ endian = r600_colorformat_endian_swap(format);
+
+ height = view->base.texture->height0;
+ depth = view->base.texture->depth0;
+
+ pitch = align(tmp->pitch_in_blocks[0] *
+ util_format_get_blockwidth(tmp->real_format), 8);
+ array_mode = tmp->array_mode[0];
+ tile_type = tmp->tile_type;
+
+ assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
+ assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
+ evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line
+ evergreen_emit_raw_value(res,
+ (S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
+ S_030000_PITCH((pitch / 8) - 1) |
+ S_030000_NON_DISP_TILING_ORDER(tile_type) |
+ S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
+ evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
+ S_030004_TEX_DEPTH(depth - 1) |
+ S_030004_ARRAY_MODE(array_mode)));
+ evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
+ evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
+ evergreen_emit_raw_value(res, (word4 |
+ S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
+ S_030010_ENDIAN_SWAP(endian) |
+ S_030010_BASE_LEVEL(0)));
+ evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) |
+ S_030014_BASE_ARRAY(0) |
+ S_030014_LAST_ARRAY(0)));
+ evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */)));
+ evergreen_emit_raw_value(res,
+ S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)
+ | S_03001C_DATA_FORMAT(format));
+
+ res->bo = (struct r600_resource*)view->base.texture;
+
+ res->usage = RADEON_USAGE_READ;
+
+ res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
+ res->flags = COMPUTE_RES_TC_FLUSH;
+
+ evergreen_emit_force_reloc(res);
+ evergreen_emit_force_reloc(res);
+}
+
+void evergreen_set_sampler_resource(
+ struct r600_pipe_compute *pipe,
+ struct compute_sampler_state *sampler,
+ int id)
+{
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id);
+
+ unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0;
+
+ evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0));
+ evergreen_emit_raw_value(res, (id + 90)*3);
+ evergreen_emit_raw_value(res,
+ S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) |
+ S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) |
+ S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) |
+ S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) |
+ S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) |
+ S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK)
+ );
+ evergreen_emit_raw_value(res,
+ S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) |
+ S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8))
+ );
+ evergreen_emit_raw_value(res,
+ S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) |
+ (sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
+ S_03C008_TYPE(1)
+ );
+}
+
+void evergreen_set_const_cache(
+ struct r600_pipe_compute *pipe,
+ int cache_id,
+ struct r600_resource* cbo,
+ int size, int offset)
+{
+ #define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0
+ #define SQ_ALU_CONST_CACHE_LS_0 0x00028f40
+
+ struct evergreen_compute_resource* res =
+ get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id);
+
+ assert(size < 0x200);
+ assert((offset & 0xFF) == 0);
+ assert(cache_id < 16);
+
+ evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size);
+ evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8);
+ res->bo = cbo;
+ res->usage = RADEON_USAGE_READ;
+ res->coher_bo_size = size;
+ res->flags = COMPUTE_RES_SH_FLUSH;
+}
+
+struct r600_resource* r600_compute_buffer_alloc_vram(
+ struct r600_screen *screen,
+ unsigned size)
+{
+ assert(size);
+
+ struct pipe_resource * buffer = pipe_buffer_create(
+ (struct pipe_screen*) screen,
+ PIPE_BIND_CUSTOM,
+ PIPE_USAGE_IMMUTABLE,
+ size);
+
+ return (struct r600_resource *)buffer;
+}
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
new file mode 100644
index 00000000000..340ff4b557e
--- /dev/null
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -0,0 +1,119 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Adam Rak <[email protected]>
+ */
+
+#ifndef EVERGREEN_COMPUTE_INTERNAL_H
+#define EVERGREEN_COMPUTE_INTERNAL_H
+
+#include "compute_memory_pool.h"
+
+enum evergreen_compute_resources
+{
+#define DECL_COMPUTE_RESOURCE(name, n) COMPUTE_RESOURCE_ ## name ,
+#include "compute_resource.def"
+#undef DECL_COMPUTE_RESOURCE
+__COMPUTE_RESOURCE_END__
+};
+
+typedef unsigned u32;
+
+#define COMPUTE_RES_TC_FLUSH 0xF0001
+#define COMPUTE_RES_VC_FLUSH 0xF0002
+#define COMPUTE_RES_SH_FLUSH 0xF0004
+#define COMPUTE_RES_CB_FLUSH(x) (0xF0008 | x << 8)
+#define COMPUTE_RES_FULL_FLUSH 0xF0010
+
+struct evergreen_compute_resource {
+ int enabled;
+
+ int do_reloc[256];
+ u32 cs[256];
+ int cs_end;
+
+ struct r600_resource *bo;
+ int coher_bo_size;
+ enum radeon_bo_usage usage;
+ int flags; ///flags for COMPUTE_RES_*_FLUSH
+};
+
+struct compute_sampler_state {
+ struct r600_pipe_state base;
+ struct pipe_sampler_state state;
+};
+
+struct number_type_and_format {
+ unsigned format;
+ unsigned number_type;
+ unsigned num_format_all;
+};
+
+struct r600_pipe_compute {
+ struct r600_context *ctx;
+ struct r600_bytecode bc;
+ struct tgsi_token *tokens;
+
+ struct evergreen_compute_resource *resources;
+
+ unsigned local_size;
+ unsigned private_size;
+ unsigned input_size;
+#ifdef HAVE_OPENCL
+ LLVMModuleRef mod;
+#endif
+ struct r600_resource *kernel_param;
+ struct r600_resource *shader_code_bo;
+};
+
+int evergreen_compute_get_gpu_format(struct number_type_and_format* fmt, struct r600_resource *bo); ///get hw format from resource, return 0 on faliure, nonzero on success
+
+
+void evergreen_emit_raw_reg_set(struct evergreen_compute_resource* res, unsigned index, int num);
+void evergreen_emit_ctx_reg_set(struct r600_context *ctx, unsigned index, int num);
+void evergreen_emit_raw_value(struct evergreen_compute_resource* res, unsigned value);
+void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value);
+void evergreen_mult_reg_set_(struct evergreen_compute_resource* res, int index, u32* array, int size);
+void evergreen_emit_ctx_reloc(struct r600_context *ctx, struct r600_resource *bo, enum radeon_bo_usage usage);
+void evergreen_reg_set(struct evergreen_compute_resource* res, unsigned index, unsigned value);
+void evergreen_emit_force_reloc(struct evergreen_compute_resource* res);
+
+void evergreen_set_buffer_sync(struct r600_context *ctx, struct r600_resource* bo, int size, int flags, enum radeon_bo_usage usage);
+
+struct evergreen_compute_resource* get_empty_res(struct r600_pipe_compute*, enum evergreen_compute_resources res_code, int index);
+int get_compute_resource_num(void);
+
+#define evergreen_mult_reg_set(res, index, array) evergreen_mult_reg_set_(res, index, array, sizeof(array))
+
+void evergreen_set_rat(struct r600_pipe_compute *pipe, int id, struct r600_resource* bo, int start, int size);
+void evergreen_set_lds(struct r600_pipe_compute *pipe, int num_lds, int size, int num_waves);
+void evergreen_set_gds(struct r600_pipe_compute *pipe, uint32_t addr, uint32_t size);
+void evergreen_set_export(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size);
+void evergreen_set_loop_const(struct r600_pipe_compute *pipe, int id, int count, int init, int inc);
+void evergreen_set_tmp_ring(struct r600_pipe_compute *pipe, struct r600_resource* bo, int offset, int size, int se);
+void evergreen_set_vtx_resource(struct r600_pipe_compute *pipe, struct r600_resource* bo, int id, uint64_t offset, int writable);
+void evergreen_set_tex_resource(struct r600_pipe_compute *pipe, struct r600_pipe_sampler_view* view, int id);
+void evergreen_set_sampler_resource(struct r600_pipe_compute *pipe, struct compute_sampler_state *sampler, int id);
+void evergreen_set_const_cache(struct r600_pipe_compute *pipe, int cache_id, struct r600_resource* cbo, int size, int offset);
+
+struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size);
+
+#endif
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index ec0afe52adf..b618ca881ba 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -28,6 +28,7 @@
#include "util/u_memory.h"
#include "util/u_framebuffer.h"
#include "util/u_dual_blend.h"
+#include "evergreen_compute.h"
static uint32_t eg_num_banks(uint32_t nbanks)
{
@@ -1881,6 +1882,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
rctx->context.create_stream_output_target = r600_create_so_target;
rctx->context.stream_output_target_destroy = r600_so_target_destroy;
rctx->context.set_stream_output_targets = r600_set_so_targets;
+ evergreen_init_compute_state_functions(rctx);
}
static void cayman_init_atom_start_cs(struct r600_context *rctx)
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 105d80f061d..3b6d7304551 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -61,6 +61,8 @@
#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
#define PKT3_NOP 0x10
+#define PKT3_DISPATCH_DIRECT 0x15
+#define PKT3_DISPATCH_INDIRECT 0x16
#define PKT3_INDIRECT_BUFFER_END 0x17
#define PKT3_SET_PREDICATION 0x20
#define PKT3_REG_RMW 0x21
@@ -114,6 +116,11 @@
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
+#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002
+
+/*Evergreen Compute packet3*/
+#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
+
/* Registers */
#define R_0084FC_CP_STRMOUT_CNTL 0x000084FC
#define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
@@ -241,6 +248,15 @@
#define G_008CF0_ALU_UPDATE_FIFO_HIWATER(x) (((x) >> 24) & 0x1F)
#define C_008CF0_ALU_UPDATE_FIFO_HIWATER(x) 0xE0FFFFFF
+#define R_008E20_SQ_STATIC_THREAD_MGMT1 0x8E20
+#define R_008E24_SQ_STATIC_THREAD_MGMT2 0x8E24
+#define R_008E28_SQ_STATIC_THREAD_MGMT3 0x8E28
+
+#define R_00899C_VGT_COMPUTE_START_X 0x0000899C
+#define R_0089A0_VGT_COMPUTE_START_Y 0x000089A0
+#define R_0089A4_VGT_COMPUTE_START_Z 0x000089A4
+#define R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE 0x000089AC
+
#define R_009100_SPI_CONFIG_CNTL 0x00009100
#define R_00913C_SPI_CONFIG_CNTL_1 0x0000913C
#define S_00913C_VTX_DONE_DELAY(x) (((x) & 0xF) << 0)
@@ -397,6 +413,11 @@
#define G_028410_ALPHA_TEST_BYPASS(x) (((x) >> 8) & 0x1)
#define C_028410_ALPHA_TEST_BYPASS 0xFFFFFEFF
+#define R_0286EC_SPI_COMPUTE_NUM_THREAD_X 0x0286EC
+#define R_0286F0_SPI_COMPUTE_NUM_THREAD_Y 0x0286F0
+#define R_0286F4_SPI_COMPUTE_NUM_THREAD_Z 0x0286F4
+#define R_028B74_VGT_DISPATCH_INITIATOR 0x028B74
+
#define R_028800_DB_DEPTH_CONTROL 0x028800
#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0)
#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1)
@@ -747,6 +768,8 @@
#define S_028A40_CUT_MODE(x) (((x) & 0x3) << 3)
#define G_028A40_CUT_MODE(x) (((x) >> 3) & 0x3)
#define C_028A40_CUT_MODE 0xFFFFFFE7
+#define S_028A40_COMPUTE_MODE(x) (x << 14)
+#define S_028A40_PARTIAL_THD_AT_EOI(x) (x << 17)
#define R_028A6C_VGT_GS_OUT_PRIM_TYPE 0x028A6C
#define S_028A6C_OUTPRIM_TYPE(x) (((x) & 0x3F) << 0)
#define V_028A6C_OUTPRIM_TYPE_POINTLIST 0
@@ -1434,6 +1457,50 @@
#define G_028848_ALLOW_DOUBLE_DENORM_OUT(x) (((x) >> 7) & 0x1)
#define C_028848_ALLOW_DOUBLE_DENORM_OUT 0xFFFFFF7F
+#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
+#define S_0288D4_NUM_GPRS(x) (((x) & 0xFF) << 0)
+#define G_0288D4_NUM_GPRS(x) (((x) >> 0) & 0xFF)
+#define C_0288D4_NUM_GPRS 0xFFFFFF00
+#define S_0288D4_STACK_SIZE(x) (((x) & 0xFF) << 8)
+#define G_0288D4_STACK_SIZE(x) (((x) >> 8) & 0xFF)
+#define C_0288D4_STACK_SIZE 0xFFFF00FF
+#define S_0288D4_DX10_CLAMP(x) (((x) & 0x1) << 21)
+#define G_0288D4_DX10_CLAMP(x) (((x) >> 21) & 0x1)
+#define C_0288D4_DX10_CLAMP 0xFFDFFFFF
+#define S_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) & 0x1) << 23)
+#define G_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) >> 23) & 0x1)
+#define S_0288D4_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
+#define G_0288D4_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
+#define C_0288D4_UNCACHED_FIRST_INST 0xEFFFFFFF
+#define S_0288D4_CLAMP_CONSTS(x) (((x) & 0x1) << 31)
+#define G_0288D4_CLAMP_CONSTS(x) (((x) >> 31) & 0x1)
+#define C_0288D4_CLAMP_CONSTS 0x7FFFFFFF
+
+#define R_0288D8_SQ_PGM_RESOURCES_LS_2 0x0288d8
+
+
+#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
+#define S_0288D4_NUM_GPRS(x) (((x) & 0xFF) << 0)
+#define G_0288D4_NUM_GPRS(x) (((x) >> 0) & 0xFF)
+#define C_0288D4_NUM_GPRS 0xFFFFFF00
+#define S_0288D4_STACK_SIZE(x) (((x) & 0xFF) << 8)
+#define G_0288D4_STACK_SIZE(x) (((x) >> 8) & 0xFF)
+#define C_0288D4_STACK_SIZE 0xFFFF00FF
+#define S_0288D4_DX10_CLAMP(x) (((x) & 0x1) << 21)
+#define G_0288D4_DX10_CLAMP(x) (((x) >> 21) & 0x1)
+#define C_0288D4_DX10_CLAMP 0xFFDFFFFF
+#define S_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) & 0x1) << 23)
+#define G_0288D4_PRIME_CACHE_ON_DRAW(x) (((x) >> 23) & 0x1)
+#define S_0288D4_UNCACHED_FIRST_INST(x) (((x) & 0x1) << 28)
+#define G_0288D4_UNCACHED_FIRST_INST(x) (((x) >> 28) & 0x1)
+#define C_0288D4_UNCACHED_FIRST_INST 0xEFFFFFFF
+#define S_0288D4_CLAMP_CONSTS(x) (((x) & 0x1) << 31)
+#define G_0288D4_CLAMP_CONSTS(x) (((x) >> 31) & 0x1)
+#define C_0288D4_CLAMP_CONSTS 0x7FFFFFFF
+
+#define R_0288D8_SQ_PGM_RESOURCES_LS_2 0x0288d8
+
+
#define R_028644_SPI_PS_INPUT_CNTL_0 0x028644
#define S_028644_SEMANTIC(x) (((x) & 0xFF) << 0)
#define G_028644_SEMANTIC(x) (((x) >> 0) & 0xFF)
@@ -1710,6 +1777,12 @@
#define R_0286DC_SPI_FOG_CNTL 0x000286DC
#define R_0286E4_SPI_PS_IN_CONTROL_2 0x000286E4
#define R_0286E8_SPI_COMPUTE_INPUT_CNTL 0x000286E8
+#define S_0286E8_TID_IN_GROUP_ENA 1
+#define S_0286E8_TGID_ENA 2
+#define S_0286E8_DISABLE_INDEX_PACK 4
+#define R_028720_GDS_ADDR_BASE 0x00028720
+#define R_028724_GDS_ADDR_SIZE 0x00028724
+#define R_028728_GDS_ORDERED_WAVE_PER_SE 0x00028728
#define R_028784_CB_BLEND1_CONTROL 0x00028784
#define R_028788_CB_BLEND2_CONTROL 0x00028788
#define R_02878C_CB_BLEND3_CONTROL 0x0002878C
@@ -1736,6 +1809,7 @@
#define C_02884C_EXPORT_Z 0xFFFFFFFE
#define R_02885C_SQ_PGM_START_VS 0x0002885C
#define R_0288A4_SQ_PGM_START_FS 0x000288A4
+#define R_0288D0_SQ_PGM_START_LS 0x000288d0
#define R_0288A8_SQ_PGM_RESOURCES_FS 0x000288A8
#define R_0288EC_SQ_LDS_ALLOC_PS 0x000288EC
#define R_028900_SQ_ESGS_RING_ITEMSIZE 0x00028900
diff --git a/src/gallium/drivers/r600/llvm_wrapper.cpp b/src/gallium/drivers/r600/llvm_wrapper.cpp
new file mode 100644
index 00000000000..174fb013c83
--- /dev/null
+++ b/src/gallium/drivers/r600/llvm_wrapper.cpp
@@ -0,0 +1,19 @@
+#include <llvm/ADT/OwningPtr.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/LLVMContext.h>
+#include <llvm/Support/IRReader.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/SourceMgr.h>
+
+#include "llvm_wrapper.h"
+
+
+extern "C" LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len)
+{
+ llvm::OwningPtr<llvm::Module> M;
+ llvm::StringRef str((const char*)bitcode, bitcode_len);
+ llvm::MemoryBuffer* buffer = llvm::MemoryBuffer::getMemBufferCopy(str);
+ llvm::SMDiagnostic Err;
+ M.reset(llvm::ParseIR(buffer, Err, llvm::getGlobalContext()));
+ return wrap(M.take());
+}
diff --git a/src/gallium/drivers/r600/llvm_wrapper.h b/src/gallium/drivers/r600/llvm_wrapper.h
new file mode 100644
index 00000000000..3a696455cdf
--- /dev/null
+++ b/src/gallium/drivers/r600/llvm_wrapper.h
@@ -0,0 +1,16 @@
+#ifndef LLVM_WRAPPER_H
+#define LLVM_WRAPPER_H
+
+#include <llvm-c/Core.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+LLVMModuleRef llvm_parse_bitcode(const unsigned char * bitcode, unsigned bitcode_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
index 0f6a1f88341..090d909a475 100644
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ b/src/gallium/drivers/r600/r600_llvm.h
@@ -2,7 +2,7 @@
#ifndef R600_LLVM_H
#define R600_LLVM_H
-#ifdef R600_USE_LLVM
+#if defined R600_USE_LLVM || defined HAVE_OPENCL
#include "radeon_llvm.h"
#include <llvm-c/Core.h>
@@ -24,6 +24,6 @@ unsigned r600_llvm_compile(
enum radeon_family family,
unsigned dump);
-#endif /* R600_USE_LLVM */
+#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
#endif /* R600_LLVM_H */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index cb13ca767c9..e0ee823ce39 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -382,6 +382,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_COMPUTE:
return 1;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
@@ -409,7 +410,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_COMPUTE:
return 0;
/* Stream output. */
@@ -491,6 +491,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
{
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_COMPUTE:
break;
case PIPE_SHADER_GEOMETRY:
/* XXX: support and enable geometry programs */
@@ -538,8 +539,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
return rscreen->glsl_feature_level >= 130;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16;
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ if (shader == PIPE_SHADER_COMPUTE) {
+ return PIPE_SHADER_IR_LLVM;
+ } else {
+ return PIPE_SHADER_IR_TGSI;
+ }
}
return 0;
}
@@ -569,6 +574,81 @@ static int r600_get_video_param(struct pipe_screen *screen,
}
}
+static int r600_get_compute_param(struct pipe_screen *screen,
+ enum pipe_compute_cap param,
+ void *ret)
+{
+ //TODO: select these params by asic
+ switch (param) {
+ case PIPE_COMPUTE_CAP_IR_TARGET:
+ if (ret) {
+ strcpy(ret, "r600--");
+ }
+ return 7 * sizeof(char);
+
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ if (ret) {
+ uint64_t * grid_dimension = ret;
+ grid_dimension[0] = 3;
+ }
+ return 1 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ if (ret) {
+ uint64_t * grid_size = ret;
+ grid_size[0] = 65535;
+ grid_size[1] = 65535;
+ grid_size[2] = 1;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ if (ret) {
+ uint64_t * block_size = ret;
+ block_size[0] = 256;
+ block_size[1] = 256;
+ block_size[2] = 256;
+ }
+ return 3 * sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t * max_threads_per_block = ret;
+ *max_threads_per_block = 256;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ if (ret) {
+ uint64_t * max_global_size = ret;
+ /* XXX: This is what the proprietary driver reports, we
+ * may want to use a different value. */
+ *max_global_size = 201326592;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ if (ret) {
+ uint64_t * max_input_size = ret;
+ *max_input_size = 1024;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ if (ret) {
+ uint64_t * max_local_size = ret;
+ /* XXX: This is what the proprietary driver reports, we
+ * may want to use a different value. */
+ *max_local_size = 32768;
+ }
+ return sizeof(uint64_t);
+
+ default:
+ fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+ return 0;
+ }
+}
+
static void r600_destroy_screen(struct pipe_screen* pscreen)
{
struct r600_screen *rscreen = (struct r600_screen *)pscreen;
@@ -576,6 +656,10 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
if (rscreen == NULL)
return;
+ if (rscreen->global_pool) {
+ compute_memory_pool_delete(rscreen->global_pool);
+ }
+
if (rscreen->fences.bo) {
struct r600_fence_block *entry, *tmp;
@@ -833,6 +917,8 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->screen.get_shader_param = r600_get_shader_param;
rscreen->screen.get_paramf = r600_get_paramf;
rscreen->screen.get_video_param = r600_get_video_param;
+ rscreen->screen.get_compute_param = r600_get_compute_param;
+
if (rscreen->chip_class >= EVERGREEN) {
rscreen->screen.is_format_supported = evergreen_is_format_supported;
} else {
@@ -857,5 +943,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->use_surface_alloc = debug_get_bool_option("R600_SURF", TRUE);
rscreen->glsl_feature_level = debug_get_bool_option("R600_GLSL130", TRUE) ? 130 : 120;
+ rscreen->global_pool = compute_memory_pool_new(1024*16, rscreen);
+
return &rscreen->screen;
}
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index e5ba49c5ac5..f2865d2a22e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -28,8 +28,11 @@
#include "util/u_slab.h"
#include "r600.h"
+#include "r600_llvm.h"
+#include "r600_public.h"
#include "r600_shader.h"
#include "r600_resource.h"
+#include "evergreen_compute.h"
#define R600_MAX_CONST_BUFFERS 2
#define R600_MAX_CONST_BUFFER_SIZE 4096
@@ -98,9 +101,16 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_RESOURCE,
R600_PIPE_STATE_POLYGON_OFFSET,
R600_PIPE_STATE_FETCH_SHADER,
+ R600_PIPE_STATE_SPI,
R600_PIPE_NSTATES
};
+struct compute_memory_pool;
+void compute_memory_pool_delete(struct compute_memory_pool* pool);
+struct compute_memory_pool* compute_memory_pool_new(
+ int64_t initial_size_in_dw,
+ struct r600_screen *rscreen);
+
struct r600_pipe_fences {
struct r600_resource *bo;
unsigned *data;
@@ -123,6 +133,12 @@ struct r600_screen {
bool use_surface_alloc;
int glsl_feature_level;
+
+ /*for compute global memory binding, we allocate stuff here, instead of
+ * buffers.
+ * XXX: Not sure if this is the best place for global_pool. Also,
+ * it's not thread safe, so it won't work with multiple contexts. */
+ struct compute_memory_pool *global_pool;
};
struct r600_pipe_sampler_view {
@@ -257,6 +273,7 @@ struct r600_context {
struct pipe_clip_state clip;
struct r600_pipe_shader *ps_shader;
struct r600_pipe_shader *vs_shader;
+ struct r600_pipe_compute *cs_shader;
struct r600_pipe_rasterizer *rasterizer;
struct r600_pipe_state vgt;
struct r600_pipe_state spi;
@@ -266,7 +283,9 @@ struct r600_context {
unsigned saved_render_cond_mode;
/* shader information */
boolean two_side;
+ boolean spi_dirty;
unsigned sprite_coord_enable;
+ boolean flatshade;
boolean export_16bpc;
unsigned alpha_ref;
boolean alpha_ref_dirty;
@@ -412,6 +431,10 @@ void r600_init_context_resource_functions(struct r600_context *r600);
/* r600_shader.c */
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+#ifdef HAVE_OPENCL
+int r600_compute_shader_create(struct pipe_context * ctx,
+ LLVMModuleRef mod, struct r600_bytecode * bytecode);
+#endif
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id);
diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c
index ef0b4ceffd0..0c14a2dc6bc 100644
--- a/src/gallium/drivers/r600/r600_resource.c
+++ b/src/gallium/drivers/r600/r600_resource.c
@@ -27,7 +27,12 @@ static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER) {
- return r600_buffer_create(screen, templ);
+ if (templ->bind & PIPE_BIND_GLOBAL) {
+ return r600_compute_global_buffer_create(screen, templ);
+ }
+ else {
+ return r600_buffer_create(screen, templ);
+ }
} else {
return r600_texture_create(screen, templ);
}
@@ -44,12 +49,21 @@ static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * scre
}
}
+void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res)
+{
+ if (res->target == PIPE_BUFFER && (res->bind & PIPE_BIND_GLOBAL)) {
+ r600_compute_global_buffer_destroy(screen, res);
+ } else {
+ u_resource_destroy_vtbl(screen, res);
+ }
+}
+
void r600_init_screen_resource_functions(struct pipe_screen *screen)
{
screen->resource_create = r600_resource_create;
screen->resource_from_handle = r600_resource_from_handle;
screen->resource_get_handle = u_resource_get_handle_vtbl;
- screen->resource_destroy = u_resource_destroy_vtbl;
+ screen->resource_destroy = r600_resource_destroy;
}
void r600_init_context_resource_functions(struct r600_context *r600)
diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h
index 87bef730654..d401e40c5ba 100644
--- a/src/gallium/drivers/r600/r600_resource.h
+++ b/src/gallium/drivers/r600/r600_resource.h
@@ -34,6 +34,13 @@ struct r600_transfer {
unsigned offset;
};
+struct compute_memory_item;
+
+struct r600_resource_global {
+ struct r600_resource base;
+ struct compute_memory_item *chunk;
+};
+
struct r600_resource_texture {
struct r600_resource resource;
@@ -65,6 +72,7 @@ struct r600_surface {
unsigned aligned_height;
};
+void r600_resource_destroy(struct pipe_screen *screen, struct pipe_resource *res);
void r600_init_screen_resource_functions(struct pipe_screen *screen);
/* r600_texture */
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index dc208b923cb..5f3c76eafbb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -225,6 +225,37 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
* struct r600_bytecode.
*/
+static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
+ unsigned char * bytes, unsigned num_bytes);
+
+#ifdef HAVE_OPENCL
+int r600_compute_shader_create(struct pipe_context * ctx,
+ LLVMModuleRef mod, struct r600_bytecode * bytecode)
+{
+ struct r600_context *r600_ctx = (struct r600_context *)ctx;
+ unsigned char * bytes;
+ unsigned byte_count;
+ struct r600_shader_ctx shader_ctx;
+ unsigned dump = 0;
+
+ if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
+ dump = 1;
+ }
+
+ r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
+ shader_ctx.bc = bytecode;
+ r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family);
+ shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
+ r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
+ r600_bytecode_build(shader_ctx.bc);
+ if (dump) {
+ r600_bytecode_dump(shader_ctx.bc);
+ }
+ return 1;
+}
+
+#endif /* HAVE_OPENCL */
+
static unsigned r600_src_from_byte_stream(unsigned char * bytes,
unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
{
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index d6f85c38c32..5b159908adb 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -916,6 +916,10 @@ void* r600_texture_transfer_map(struct pipe_context *ctx,
unsigned offset = 0;
char *map;
+ if ((transfer->resource->bind & PIPE_BIND_GLOBAL) && transfer->resource->target == PIPE_BUFFER) {
+ return r600_compute_global_transfer_map(ctx, transfer);
+ }
+
if (rtransfer->staging) {
buf = ((struct r600_resource *)rtransfer->staging)->cs_buf;
} else {
@@ -945,6 +949,10 @@ void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context*)ctx;
struct radeon_winsys_cs_handle *buf;
+ if ((transfer->resource->bind & PIPE_BIND_GLOBAL) && transfer->resource->target == PIPE_BUFFER) {
+ return r600_compute_global_transfer_unmap(ctx, transfer);
+ }
+
if (rtransfer->staging) {
buf = ((struct r600_resource *)rtransfer->staging)->cs_buf;
} else {