summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/r600/eg_asm.c35
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c97
-rw-r--r--src/gallium/drivers/r600/r600_asm.c317
-rw-r--r--src/gallium/drivers/r600/r600_asm.h8
-rw-r--r--src/gallium/drivers/r600/r600_buffer.c5
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c2
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h20
-rw-r--r--src/gallium/drivers/r600/r600_shader.c18
-rw-r--r--src/gallium/drivers/r600/r600_state.c83
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c35
-rw-r--r--src/gallium/drivers/r600/r600_translate.c43
11 files changed, 619 insertions, 44 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 21d66fa9564..b79875c7c75 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -27,6 +27,7 @@
#include "r600_asm.h"
#include "eg_sq.h"
#include "r600_opcodes.h"
+#include "evergreend.h"
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
{
@@ -89,3 +90,37 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
}
return 0;
}
+
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
+ (r600_bo_offset(ve->fetch_shader)) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index ebd541d5f8d..b313d525012 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1259,6 +1259,90 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, offset;
+
+ /* we don't update until we know vertex elements */
+ if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+ return;
+
+ /* delete previous translated vertex elements */
+ if (rctx->tran.new_velems) {
+ r600_end_vertex_translate(rctx);
+ }
+
+ if (rctx->vertex_elements->incompatible_layout) {
+ /* translate rebind new vertex elements so
+ * return once translated
+ */
+ r600_begin_vertex_translate(rctx);
+ return;
+ }
+
+ if (rctx->any_user_vbs) {
+ r600_upload_user_buffers(rctx);
+ rctx->any_user_vbs = FALSE;
+ }
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ rctx->nvs_resource = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ rctx->nvs_resource = rctx->nvertex_buffer;
+ }
+
+ for (i = 0 ; i < rctx->nvs_resource; i++) {
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->vbuffer_offset[i] +
+ vertex_buffer->buffer_offset +
+ r600_bo_offset(rbuffer->bo);
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = vertex_buffer->buffer_offset +
+ r600_bo_offset(rbuffer->bo);
+ }
+
+ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+ rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+ S_030008_STRIDE(vertex_buffer->stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+ S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+ S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+ S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+ evergreen_fs_resource_set(&rctx->ctx, rstate, i);
+ }
+}
+
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
{
@@ -1273,6 +1357,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct r600_drawl draw;
boolean translate = FALSE;
+#if 0
if (rctx->vertex_elements->incompatible_layout) {
r600_begin_vertex_translate(rctx);
translate = TRUE;
@@ -1282,6 +1367,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_upload_user_buffers(rctx);
rctx->any_user_vbs = FALSE;
}
+#endif
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
@@ -1338,6 +1424,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
return;
+#if 0
for (i = 0 ; i < rctx->vertex_elements->count; i++) {
uint32_t word3, word2;
uint32_t format;
@@ -1372,6 +1459,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL);
evergreen_fs_resource_set(&rctx->ctx, rstate, i);
}
+#endif
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
@@ -1588,14 +1676,17 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
R_028864_SQ_PGM_RESOURCES_2_VS,
0x0, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288A8_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
R_02885C_SQ_PGM_START_VS,
(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+
+#if 0
+ r600_pipe_state_add_reg(rstate,
+ R_0288A8_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
R_0288A4_SQ_PGM_START_FS,
(r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo_fetch);
+#endif
r600_pipe_state_add_reg(rstate,
R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 73daa000809..e13c606434f 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -22,12 +22,15 @@
*/
#include <stdio.h>
#include <errno.h>
+#include "util/u_format.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "r600_pipe.h"
#include "r600_sq.h"
#include "r600_opcodes.h"
#include "r600_asm.h"
+#include "r600_formats.h"
+#include "r600d.h"
static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
{
@@ -972,3 +975,317 @@ void r600_bc_dump(struct r600_bc *bc)
}
fprintf(stderr, "--------------------------------------\n");
}
+
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+ struct r600_pipe_state *rstate;
+ unsigned i = 0;
+
+ if (count > 8) {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(8 - 1);
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT((count - 8) - 1);
+ } else {
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+ S_SQ_CF_WORD1_BARRIER(1) |
+ S_SQ_CF_WORD1_COUNT(count - 1);
+ }
+ bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+ bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+ S_SQ_CF_WORD1_BARRIER(1);
+
+ rstate = &ve->rstate;
+ rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+ rstate->nregs = 0;
+ r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+ r600_bo_offset(ve->fetch_shader) >> 8,
+ 0xFFFFFFFF, ve->fetch_shader);
+}
+
+static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+ unsigned *num_format, unsigned *format_comp)
+{
+ const struct util_format_description *desc;
+ unsigned i;
+
+ *format = 0;
+ *num_format = 0;
+ *format_comp = 0;
+
+ desc = util_format_description(pformat);
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ goto out_unknown;
+ }
+
+ /* Find the first non-VOID channel. */
+ for (i = 0; i < 4; i++) {
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+ break;
+ }
+ }
+
+ switch (desc->channel[i].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[i].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16_FLOAT;
+ break;
+ case 2:
+ *format = FMT_16_16_FLOAT;
+ break;
+ case 3:
+ *format = FMT_16_16_16_FLOAT;
+ break;
+ case 4:
+ *format = FMT_16_16_16_16_FLOAT;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32_FLOAT;
+ break;
+ case 2:
+ *format = FMT_32_32_FLOAT;
+ break;
+ case 3:
+ *format = FMT_32_32_32_FLOAT;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32_FLOAT;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[i].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_8;
+ break;
+ case 2:
+ *format = FMT_8_8;
+ break;
+ case 3:
+ // *format = FMT_8_8_8; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_8_8_8_8;
+ break;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_16;
+ break;
+ case 2:
+ *format = FMT_16_16;
+ break;
+ case 3:
+ // *format = FMT_16_16_16; /* fails piglit draw-vertices test */
+ // break;
+ case 4:
+ *format = FMT_16_16_16_16;
+ break;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ *format = FMT_32;
+ break;
+ case 2:
+ *format = FMT_32_32;
+ break;
+ case 3:
+ *format = FMT_32_32_32;
+ break;
+ case 4:
+ *format = FMT_32_32_32_32;
+ break;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+ break;
+ default:
+ goto out_unknown;
+ }
+
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ *format_comp = 1;
+ }
+ if (desc->channel[i].normalized) {
+ *num_format = 0;
+ } else {
+ *num_format = 2;
+ }
+ return;
+out_unknown:
+ R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
+}
+
+void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode)
+{
+ unsigned i;
+ char chip = '6';
+
+ switch (chiprev) {
+ case 1:
+ chip = '7';
+ break;
+ case 2:
+ chip = 'E';
+ break;
+ case 0:
+ default:
+ chip = '6';
+ break;
+ }
+ fprintf(stderr, "bytecode %d dw -----------------------\n", ndw);
+ fprintf(stderr, " %c\n", chip);
+ for (i = 0; i < ndw; i++) {
+ fprintf(stderr, "0x%08X\n", bytecode[i]);
+ }
+ fprintf(stderr, "--------------------------------------\n");
+}
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
+{
+ unsigned ndw, i;
+ u32 *bytecode;
+ unsigned fetch_resource_start = 0, format, num_format, format_comp;
+ struct pipe_vertex_element *elements = ve->elements;
+ const struct util_format_description *desc;
+
+ /* 2 dwords for cf aligned to 4 + 4 dwords per input */
+ ndw = 8 + ve->count * 4;
+ ve->fs_size = ndw * 4;
+
+ /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+ ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+ if (ve->fetch_shader == NULL) {
+ return -ENOMEM;
+ }
+
+ bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+ if (bytecode == NULL) {
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -ENOMEM;
+ }
+
+ if (rctx->family >= CHIP_CEDAR) {
+ eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ } else {
+ r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+ fetch_resource_start = 160;
+ }
+
+ /* vertex elements offset need special handling, if offset is bigger
+ * than what we can put in fetch instruction then we need to alterate
+ * the vertex resource offset. In such case in order to simplify code
+ * we will bound one resource per elements. It's a worst case scenario.
+ */
+ for (i = 0; i < ve->count; i++) {
+ ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
+ if (ve->vbuffer_offset[i]) {
+ ve->vbuffer_need_offset = 1;
+ }
+ }
+
+ for (i = 0; i < ve->count; i++) {
+ unsigned vbuffer_index;
+ r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
+ desc = util_format_description(ve->hw_format[i]);
+ if (desc == NULL) {
+ R600_ERR("unknown format %d\n", ve->hw_format[i]);
+ r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+ return -EINVAL;
+ }
+
+ /* see above for vbuffer_need_offset explanation */
+ vbuffer_index = elements[i].vertex_buffer_index;
+ if (ve->vbuffer_need_offset) {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
+ } else {
+ bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+ }
+ bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
+ S_SQ_VTX_WORD0_SRC_SEL_X(0) |
+ S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
+ bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
+ S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
+ S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
+ S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
+ S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
+ S_SQ_VTX_WORD1_DATA_FORMAT(format) |
+ S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
+ S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
+ S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
+ S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
+ bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
+ S_SQ_VTX_WORD2_MEGA_FETCH(1);
+ bytecode[8 + i * 4 + 3] = 0;
+ }
+ r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+ return 0;
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 1be5e4a396a..b147f0f5c88 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -28,6 +28,9 @@
#define NUM_OF_CYCLES 3
#define NUM_OF_COMPONENTS 4
+struct r600_vertex_element;
+struct r600_pipe_context;
+
struct r600_bc_alu_src {
unsigned sel;
unsigned chan;
@@ -188,6 +191,7 @@ struct r600_bc {
/* eg_asm.c */
int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
/* r600_asm.c */
int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
@@ -201,6 +205,10 @@ int r600_bc_build(struct r600_bc *bc);
int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
void r600_bc_dump(struct r600_bc *bc);
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
/* r700_asm.c */
int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c
index 51b8abaaa12..03a61a3213c 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -267,10 +267,11 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx)
int i, nr;
nr = rctx->vertex_elements->count;
+ nr = rctx->nvertex_buffer;
for (i = 0; i < nr; i++) {
- struct pipe_vertex_buffer *vb =
- &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
+// struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
+ struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
if (r600_buffer_is_user_buffer(vb->buffer)) {
struct pipe_resource *upload_buffer = NULL;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index fa0b635636b..ea57fba8e47 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -90,6 +90,8 @@ static void r600_destroy_context(struct pipe_context *context)
u_upload_destroy(rctx->upload_vb);
u_upload_destroy(rctx->upload_ib);
+ r600_end_vertex_translate(rctx);
+
if (rctx->tran.translate_cache)
translate_cache_destroy(rctx->tran.translate_cache);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index deec946e5d4..ce9f99a7667 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -54,6 +54,7 @@ enum r600_pipe_state_id {
R600_PIPE_STATE_SAMPLER,
R600_PIPE_STATE_RESOURCE,
R600_PIPE_STATE_POLYGON_OFFSET,
+ R600_PIPE_STATE_FETCH_SHADER,
R600_PIPE_NSTATES
};
@@ -87,7 +88,15 @@ struct r600_vertex_element
struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS];
enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
unsigned hw_format_size[PIPE_MAX_ATTRIBS];
- boolean incompatible_layout;
+ boolean incompatible_layout;
+ struct r600_bo *fetch_shader;
+ unsigned fs_size;
+ struct r600_pipe_state rstate;
+ /* if offset is to big for fetch instructio we need to alterate
+ * offset of vertex buffer, record here the offset need to add
+ */
+ unsigned vbuffer_need_offset;
+ unsigned vbuffer_offset[PIPE_MAX_ATTRIBS];
};
struct r600_pipe_shader {
@@ -108,14 +117,14 @@ struct r600_textures_info {
unsigned n_samplers;
};
+/* vertex buffer translation context, used to translate vertex input that
+ * hw doesn't natively support, so far only FLOAT64 is unsupported.
+ */
struct r600_translate_context {
/* Translate cache for incompatible vertex offset/stride/format fallback. */
struct translate_cache *translate_cache;
-
/* The vertex buffer slot containing the translated buffer. */
unsigned vb_slot;
- /* Saved and new vertex element state. */
- void *saved_velems;
void *new_velems;
};
@@ -142,6 +151,7 @@ struct r600_pipe_context {
struct pipe_stencil_ref stencil_ref;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
+ unsigned nvs_resource;
struct r600_pipe_state *vs_resource;
struct r600_pipe_state *ps_resource;
struct r600_pipe_state config;
@@ -182,6 +192,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
/* r600_blit.c */
void r600_init_blit_functions(struct r600_pipe_context *rctx);
@@ -220,6 +231,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
void r600_init_config(struct r600_pipe_context *rctx);
void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
void r600_polygon_offset_update(struct r600_pipe_context *rctx);
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
/* r600_helper.h */
int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f53124d1009..e40cd1dbcf1 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -67,21 +67,23 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade
S_028868_STACK_SIZE(rshader->bc.nstack),
0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288A4_SQ_PGM_RESOURCES_FS,
+ R_0288D0_SQ_PGM_CF_OFFSET_VS,
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_0288D0_SQ_PGM_CF_OFFSET_VS,
+ R_028858_SQ_PGM_START_VS,
+ r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+
+#if 0
+ r600_pipe_state_add_reg(rstate,
+ R_0288A4_SQ_PGM_RESOURCES_FS,
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
R_0288DC_SQ_PGM_CF_OFFSET_FS,
0x00000000, 0xFFFFFFFF, NULL);
r600_pipe_state_add_reg(rstate,
- R_028858_SQ_PGM_START_VS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
R_028894_SQ_PGM_START_FS,
r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
-
+#endif
r600_pipe_state_add_reg(rstate,
R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
0xFFFFFFFF, NULL);
@@ -261,6 +263,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
{
+#if 0
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *shader = &rshader->shader;
const struct util_format_description *desc;
@@ -304,6 +307,9 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader
}
}
return r600_bc_build(&shader->bc_fetch);
+#else
+ return 0;
+#endif
}
int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index c592ef2bd05..9b70942eebf 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -94,6 +94,84 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx)
}
}
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+ struct r600_pipe_state *rstate;
+ struct r600_resource *rbuffer;
+ struct pipe_vertex_buffer *vertex_buffer;
+ unsigned i, offset;
+
+ /* we don't update until we know vertex elements */
+ if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+ return;
+
+ /* delete previous translated vertex elements */
+ if (rctx->tran.new_velems) {
+ r600_end_vertex_translate(rctx);
+ }
+
+ if (rctx->vertex_elements->incompatible_layout) {
+ /* translate rebind new vertex elements so
+ * return once translated
+ */
+ r600_begin_vertex_translate(rctx);
+ return;
+ }
+
+ if (rctx->any_user_vbs) {
+ r600_upload_user_buffers(rctx);
+ rctx->any_user_vbs = FALSE;
+ }
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ rctx->nvs_resource = rctx->vertex_elements->count;
+ } else {
+ /* bind vertex buffer once */
+ rctx->nvs_resource = rctx->nvertex_buffer;
+ }
+
+ for (i = 0 ; i < rctx->nvs_resource; i++) {
+ rstate = &rctx->vs_resource[i];
+ rstate->id = R600_PIPE_STATE_RESOURCE;
+ rstate->nregs = 0;
+
+ if (rctx->vertex_elements->vbuffer_need_offset) {
+ /* one resource per vertex elements */
+ unsigned vbuffer_index;
+ vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+ vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = rctx->vertex_elements->vbuffer_offset[i] +
+ vertex_buffer->buffer_offset +
+ r600_bo_offset(rbuffer->bo);
+ } else {
+ /* bind vertex buffer once */
+ vertex_buffer = &rctx->vertex_buffer[i];
+ rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+ offset = vertex_buffer->buffer_offset +
+ r600_bo_offset(rbuffer->bo);
+ }
+
+ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+ offset, 0xFFFFFFFF, rbuffer->bo);
+ r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+ rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+ S_038008_STRIDE(vertex_buffer->stride),
+ 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+ 0x00000000, 0xFFFFFFFF, NULL);
+ r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+ 0xC0000000, 0xFFFFFFFF, NULL);
+ r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+ }
+}
+
static void r600_draw_common(struct r600_drawl *draw)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
@@ -132,6 +210,7 @@ static void r600_draw_common(struct r600_drawl *draw)
if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
return;
+#if 0
for (i = 0 ; i < rctx->vertex_elements->count; i++) {
uint32_t word2, format;
@@ -159,6 +238,7 @@ static void r600_draw_common(struct r600_drawl *draw)
r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL);
r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
}
+#endif
mask = 0;
for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
@@ -195,6 +275,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
struct r600_drawl draw;
boolean translate = FALSE;
+#if 0
if (rctx->vertex_elements->incompatible_layout) {
r600_begin_vertex_translate(rctx);
translate = TRUE;
@@ -204,6 +285,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_upload_user_buffers(rctx);
rctx->any_user_vbs = FALSE;
}
+#endif
+
memset(&draw, 0, sizeof(struct r600_drawl));
draw.ctx = ctx;
draw.mode = info->mode;
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 856f79158c0..889432732cf 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -121,6 +121,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
rctx->vertex_elements = v;
if (v) {
+ rctx->states[v->rstate.id] = &v->rstate;
+ r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_vertex_buffer_update(rctx);
+ } else {
+ r600_vertex_buffer_update(rctx);
+ }
+ }
+
+ if (v) {
// rctx->vs_rebuild = TRUE;
}
}
@@ -128,11 +138,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+ struct r600_vertex_element *v = (struct r600_vertex_element*)state;
- FREE(state);
-
+ if (rctx->states[v->rstate.id] == &v->rstate) {
+ rctx->states[v->rstate.id] = NULL;
+ }
if (rctx->vertex_elements == state)
rctx->vertex_elements = NULL;
+
+ r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+ FREE(state);
}
@@ -182,6 +197,11 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
}
rctx->nvertex_buffer = count;
rctx->vb_max_index = max_index;
+ if (rctx->family >= CHIP_CEDAR) {
+ evergreen_vertex_buffer_update(rctx);
+ } else {
+ r600_vertex_buffer_update(rctx);
+ }
}
@@ -192,9 +212,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
+ struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
- int i;
enum pipe_format *format;
+ int i;
assert(count < 32);
if (!v)
@@ -216,12 +237,16 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
}
v->incompatible_layout =
v->incompatible_layout ||
- v->elements[i].src_format != v->hw_format[i] ||
- v->elements[i].src_offset % 4 != 0;
+ v->elements[i].src_format != v->hw_format[i];
v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
}
+ if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
+ FREE(v);
+ return NULL;
+ }
+
return v;
}
diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c
index 2e082f1cff0..d927f53398d 100644
--- a/src/gallium/drivers/r600/r600_translate.c
+++ b/src/gallium/drivers/r600/r600_translate.c
@@ -41,6 +41,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
struct pipe_resource *out_buffer;
unsigned i, num_verts;
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
/* Initialize the translate key, i.e. the recipe how vertices should be
* translated. */
@@ -51,9 +52,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
unsigned output_format_size = ve->hw_format_size[i];
/* Check for support. */
- if (ve->elements[i].src_format == ve->hw_format[i] &&
- (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 &&
- vb->stride % 4 == 0) {
+ if (ve->elements[i].src_format == ve->hw_format[i]) {
continue;
}
@@ -147,29 +146,22 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
}
/* Save and replace vertex elements. */
- {
- struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
- rctx->tran.saved_velems = rctx->vertex_elements;
-
- for (i = 0; i < ve->count; i++) {
- if (vb_translated[ve->elements[i].vertex_buffer_index]) {
- te = &key.element[tr_elem_index[i]];
- new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
- new_velems[i].src_format = te->output_format;
- new_velems[i].src_offset = te->output_offset;
- new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
- } else {
- memcpy(&new_velems[i], &ve->elements[i],
- sizeof(struct pipe_vertex_element));
- }
+ for (i = 0; i < ve->count; i++) {
+ if (vb_translated[ve->elements[i].vertex_buffer_index]) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
+ } else {
+ memcpy(&new_velems[i], &ve->elements[i],
+ sizeof(struct pipe_vertex_element));
}
-
- rctx->tran.new_velems =
- pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
- pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
}
+ rctx->tran.new_velems = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+ pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
+
pipe_resource_reference(&out_buffer, NULL);
}
@@ -178,8 +170,11 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx)
struct pipe_context *pipe = &rctx->context;
/* Restore vertex elements. */
- pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems);
+ if (rctx->vertex_elements == rctx->tran.new_velems) {
+ pipe->bind_vertex_elements_state(pipe, NULL);
+ }
pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
+ rctx->tran.new_velems = NULL;
/* Delete the now-unused VBO. */
pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,