diff options
author | Christian König <[email protected]> | 2011-04-23 14:27:40 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2011-04-23 14:27:40 +0200 |
commit | fa31b1095eeea97695125ad5770239805bed37da (patch) | |
tree | 6c421666719a9a1afc419de33d06f4e66584a8a1 /src/gallium/drivers/r600 | |
parent | 24d76d2966a5c666c9627034e6751621b17024c8 (diff) | |
parent | 15eaf8297ecb39337109b95480e61f37a6b20f0a (diff) |
Merge remote branch 'origin/master' into pipe-video
Conflicts:
configs/linux-dri
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_state.c
src/gallium/include/pipe/p_format.h
src/gallium/tests/graw/fragment-shader/frag-abs.sh
src/gallium/tests/graw/fragment-shader/frag-add.sh
src/gallium/tests/graw/fragment-shader/frag-cb-1d.sh
src/gallium/tests/graw/fragment-shader/frag-cb-2d.sh
src/gallium/tests/graw/fragment-shader/frag-dp3.sh
src/gallium/tests/graw/fragment-shader/frag-dp4.sh
src/gallium/tests/graw/fragment-shader/frag-dst.sh
src/gallium/tests/graw/fragment-shader/frag-ex2.sh
src/gallium/tests/graw/fragment-shader/frag-face.sh
src/gallium/tests/graw/fragment-shader/frag-flr.sh
src/gallium/tests/graw/fragment-shader/frag-frc.sh
src/gallium/tests/graw/fragment-shader/frag-kil.sh
src/gallium/tests/graw/fragment-shader/frag-lg2.sh
src/gallium/tests/graw/fragment-shader/frag-lit.sh
src/gallium/tests/graw/fragment-shader/frag-lrp.sh
src/gallium/tests/graw/fragment-shader/frag-mad-immx.sh
src/gallium/tests/graw/fragment-shader/frag-mad.sh
src/gallium/tests/graw/fragment-shader/frag-max.sh
src/gallium/tests/graw/fragment-shader/frag-min.sh
src/gallium/tests/graw/fragment-shader/frag-mov.sh
src/gallium/tests/graw/fragment-shader/frag-mul.sh
src/gallium/tests/graw/fragment-shader/frag-rcp.sh
src/gallium/tests/graw/fragment-shader/frag-rsq.sh
src/gallium/tests/graw/fragment-shader/frag-sge.sh
src/gallium/tests/graw/fragment-shader/frag-slt.sh
src/gallium/tests/graw/fragment-shader/frag-srcmod-abs.sh
src/gallium/tests/graw/fragment-shader/frag-srcmod-absneg.sh
src/gallium/tests/graw/fragment-shader/frag-srcmod-neg.sh
src/gallium/tests/graw/fragment-shader/frag-srcmod-swz.sh
src/gallium/tests/graw/fragment-shader/frag-sub.sh
src/gallium/tests/graw/fragment-shader/frag-tempx.sh
src/gallium/tests/graw/fragment-shader/frag-xpd.sh
src/gallium/tests/graw/vertex-shader/vert-abs.sh
src/gallium/tests/graw/vertex-shader/vert-add.sh
src/gallium/tests/graw/vertex-shader/vert-arl.sh
src/gallium/tests/graw/vertex-shader/vert-arr.sh
src/gallium/tests/graw/vertex-shader/vert-cb-1d.sh
src/gallium/tests/graw/vertex-shader/vert-cb-2d.sh
src/gallium/tests/graw/vertex-shader/vert-dp3.sh
src/gallium/tests/graw/vertex-shader/vert-dp4.sh
src/gallium/tests/graw/vertex-shader/vert-dst.sh
src/gallium/tests/graw/vertex-shader/vert-ex2.sh
src/gallium/tests/graw/vertex-shader/vert-flr.sh
src/gallium/tests/graw/vertex-shader/vert-frc.sh
src/gallium/tests/graw/vertex-shader/vert-lg2.sh
src/gallium/tests/graw/vertex-shader/vert-lit.sh
src/gallium/tests/graw/vertex-shader/vert-lrp.sh
src/gallium/tests/graw/vertex-shader/vert-mad.sh
src/gallium/tests/graw/vertex-shader/vert-max.sh
src/gallium/tests/graw/vertex-shader/vert-min.sh
src/gallium/tests/graw/vertex-shader/vert-mov.sh
src/gallium/tests/graw/vertex-shader/vert-mul.sh
src/gallium/tests/graw/vertex-shader/vert-rcp.sh
src/gallium/tests/graw/vertex-shader/vert-rsq.sh
src/gallium/tests/graw/vertex-shader/vert-sge.sh
src/gallium/tests/graw/vertex-shader/vert-slt.sh
src/gallium/tests/graw/vertex-shader/vert-srcmod-abs.sh
src/gallium/tests/graw/vertex-shader/vert-srcmod-absneg.sh
src/gallium/tests/graw/vertex-shader/vert-srcmod-neg.sh
src/gallium/tests/graw/vertex-shader/vert-srcmod-swz.sh
src/gallium/tests/graw/vertex-shader/vert-sub.sh
src/gallium/tests/graw/vertex-shader/vert-xpd.sh
src/gallium/tools/trace/dump.py
src/gallium/tools/trace/format.py
src/gallium/tools/trace/model.py
src/gallium/tools/trace/parse.py
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/eg_state_inlines.h | 54 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 74 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 156 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 27 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 58 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 74 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_inlines.h | 54 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600d.h | 6 |
15 files changed, 535 insertions, 71 deletions
diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 487adddcb72..f20d45f48de 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -367,10 +367,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_B10G10R10A2_UNORM: return V_028C70_SWAP_ALT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_SSCALED: @@ -504,6 +507,57 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } +static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + switch(colorformat) { + case V_0280A0_COLOR_4_4: + return(ENDIAN_NONE); + + /* 8-bit buffers. */ + case V_0280A0_COLOR_8: + return(ENDIAN_NONE); + + /* 16-bit buffers. */ + case V_0280A0_COLOR_5_6_5: + case V_0280A0_COLOR_1_5_5_5: + case V_0280A0_COLOR_4_4_4_4: + case V_0280A0_COLOR_16: + case V_0280A0_COLOR_8_8: + return(ENDIAN_8IN16); + + /* 32-bit buffers. */ + case V_0280A0_COLOR_8_8_8_8: + case V_0280A0_COLOR_2_10_10_10: + case V_0280A0_COLOR_8_24: + case V_0280A0_COLOR_24_8: + case V_0280A0_COLOR_32_FLOAT: + case V_0280A0_COLOR_16_16_FLOAT: + case V_0280A0_COLOR_16_16: + return(ENDIAN_8IN32); + + /* 64-bit buffers. */ + case V_0280A0_COLOR_16_16_16_16: + case V_0280A0_COLOR_16_16_16_16_FLOAT: + return(ENDIAN_8IN16); + + case V_0280A0_COLOR_32_32_FLOAT: + case V_0280A0_COLOR_32_32: + return(ENDIAN_8IN32); + + /* 128-bit buffers. */ + case V_0280A0_COLOR_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32: + return(ENDIAN_8IN32); + default: + return ENDIAN_NONE; /* Unsupported. */ + } +#else + return ENDIAN_NONE; +#endif +} + static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 77432661b64..a972f82fb1d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -107,15 +107,18 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; + /* state->rt entries > 0 only written if independent blending */ + const int j = state->independent_blend_enable ? i : 0; + + unsigned eqRGB = state->rt[j].rgb_func; + unsigned srcRGB = state->rt[j].rgb_src_factor; + unsigned dstRGB = state->rt[j].rgb_dst_factor; + unsigned eqA = state->rt[j].alpha_func; + unsigned srcA = state->rt[j].alpha_src_factor; + unsigned dstA = state->rt[j].alpha_dst_factor; blend_cntl[i] = 0; - if (!state->rt[i].blend_enable) + if (!state->rt[j].blend_enable) continue; blend_cntl[i] |= S_028780_BLEND_CONTROL_ENABLE(1); @@ -354,7 +357,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - unsigned format; + unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; @@ -391,6 +394,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte tmp = tmp->flushed_depth_texture; } + endian = r600_colorformat_endian_swap(format); + if (tmp->force_int_type) { word4 &= C_030010_NUM_FORMAT_ALL; word4 |= S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_INT); @@ -422,6 +427,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) | + S_030010_ENDIAN_SWAP(endian) | S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->u.tex.last_level) | @@ -652,7 +658,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; - unsigned format, swap, ntype; + unsigned format, swap, ntype, endian; unsigned offset; unsigned tile_type; const struct util_format_description *desc; @@ -677,35 +683,43 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; - ntype = 0; desc = util_format_description(surf->base.format); + for (i = 0; i < 4; i++) { + if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { + break; + } + } + ntype = V_028C70_NUMBER_UNORM; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) ntype = V_028C70_NUMBER_SRGB; + else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) + ntype = V_028C70_NUMBER_SNORM; format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); + if (rbuffer->b.b.b.usage == PIPE_USAGE_STAGING) { + endian = ENDIAN_NONE; + } else { + endian = r600_colorformat_endian_swap(format); + } /* disable when gallium grows int textures */ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) - ntype = 4; + ntype = V_028C70_NUMBER_UINT; color_info = S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_ARRAY_MODE(rtex->array_mode[level]) | S_028C70_BLEND_CLAMP(1) | - S_028C70_NUMBER_TYPE(ntype); + S_028C70_NUMBER_TYPE(ntype) | + S_028C70_ENDIAN(endian); - for (i = 0; i < 4; i++) { - if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { - break; - } - } /* we can only set the export size if any thing is snorm/unorm component is > 11 bits, if we aren't a float, sint or uint */ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && desc->channel[i].size < 12 && desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT && - ntype != 4 && ntype != 5) + ntype != V_028C70_NUMBER_UINT && ntype != V_028C70_NUMBER_SINT) color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC); if (rtex->array_mode[level] > V_028C70_ARRAY_LINEAR_ALIGNED) { @@ -808,6 +822,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; + evergreen_context_flush_dest_caches(&rctx->ctx); + rctx->ctx.num_dest_buffers = state->nr_cbufs; + /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; @@ -819,6 +836,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } if (state->zsbuf) { evergreen_db(rctx, rstate, state); + rctx->ctx.num_dest_buffers++; } target_mask = 0x00000000; @@ -880,6 +898,19 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, } } +static void evergreen_texture_barrier(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1) | + S_0085F0_CB8_DEST_BASE_ENA(1) | S_0085F0_CB9_DEST_BASE_ENA(1) | + S_0085F0_CB10_DEST_BASE_ENA(1) | S_0085F0_CB11_DEST_BASE_ENA(1)); +} + void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; @@ -920,6 +951,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; + rctx->context.texture_barrier = evergreen_texture_barrier; } void evergreen_init_config(struct r600_pipe_context *rctx) @@ -1542,8 +1574,10 @@ void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_STRIDE(stride), - 0xFFFFFFFF, NULL); +#ifdef PIPE_ARCH_BIG_ENDIAN + S_030008_ENDIAN_SWAP(ENDIAN_8IN32) | +#endif + S_030008_STRIDE(stride), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index c51a163bd06..de445b879a1 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -43,6 +43,7 @@ #define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 #define EVERGREEN_CTL_CONST_END 0x0003E200 +#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 #define EVENT_TYPE(x) ((x) << 0) @@ -1885,4 +1886,10 @@ #define R_03CFF4_SQ_VTX_START_INST_LOC 0x03CFF4 #define R_03A200_SQ_LOOP_CONST_0 0x3A200 + +#define ENDIAN_NONE 0 +#define ENDIAN_8IN16 1 +#define ENDIAN_8IN32 2 +#define ENDIAN_8IN64 3 + #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 0b7d6f70968..0b0df9d019b 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -179,11 +179,13 @@ struct r600_block_reloc { struct r600_block { struct list_head list; unsigned status; + unsigned flags; unsigned start_offset; unsigned pm4_ndwords; unsigned pm4_flush_ndwords; unsigned nbo; - unsigned nreg; + u16 nreg; + u16 nreg_dirty; u32 *reg; u32 pm4[R600_BLOCK_MAX_REG]; unsigned pm4_bo_index[R600_BLOCK_MAX_REG]; @@ -231,6 +233,8 @@ struct r600_query { #define R600_QUERY_STATE_ENDED (1 << 1) #define R600_QUERY_STATE_SUSPENDED (1 << 2) +#define R600_CONTEXT_DRAW_PENDING (1 << 0) +#define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) struct r600_context { struct radeon *radeon; @@ -253,6 +257,8 @@ struct r600_context { unsigned num_query_running; struct list_head fenced_bo; unsigned max_db; /* for OQ */ + unsigned num_dest_buffers; + unsigned flags; boolean predicate_drawing; }; @@ -288,9 +294,14 @@ void r600_context_queries_suspend(struct r600_context *ctx); void r600_context_queries_resume(struct r600_context *ctx); void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait); +void r600_context_emit_fence(struct r600_context *ctx, struct r600_bo *fence, + unsigned offset, unsigned value); +void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags); +void r600_context_flush_dest_caches(struct r600_context *ctx); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); +void evergreen_context_flush_dest_caches(struct r600_context *ctx); void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 240093f9b9d..7e854b1b81d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -22,6 +22,7 @@ */ #include <stdio.h> #include <errno.h> +#include <byteswap.h> #include "util/u_format.h" #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" @@ -32,6 +33,12 @@ #include "r600_formats.h" #include "r600d.h" +#ifdef PIPE_ARCH_BIG_ENDIAN +#define CPU_TO_LE32(x) bswap_32(x) +#else +#define CPU_TO_LE32(x) (x) +#endif + #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 @@ -953,10 +960,17 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], } else result[i] = slots[i]; - // let's check source gprs alu = slots[i]; num_once_inst += is_alu_once_inst(bc, alu); + // let's check dst gpr + if (alu->dst.rel) { + if (have_mova) + return 0; + have_rel = 1; + } + + // let's check source gprs num_src = r600_bc_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (alu->src[src].rel) { @@ -1376,6 +1390,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | + S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) | S_SQ_VTX_WORD2_MEGA_FETCH(1); bc->bytecode[id++] = 0; return 0; @@ -1910,6 +1925,7 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all); id++; fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); + fprintf(stderr, "ENDIAN:%d ", vtx->endian); fprintf(stderr, "OFFSET:%d\n", vtx->offset); //TODO id++; @@ -1922,7 +1938,7 @@ void r600_bc_dump(struct r600_bc *bc) } static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, - unsigned *num_format, unsigned *format_comp) + unsigned *num_format, unsigned *format_comp, unsigned *endian) { const struct util_format_description *desc; unsigned i; @@ -1930,6 +1946,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = 0; *num_format = 0; *format_comp = 0; + *endian = ENDIAN_NONE; desc = util_format_description(pformat); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { @@ -1960,6 +1977,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_16_16_FLOAT; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN16; +#endif break; case 32: switch (desc->nr_channels) { @@ -1976,6 +1996,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_32_32_32_32_FLOAT; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN32; +#endif break; default: goto out_unknown; @@ -2013,6 +2036,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_16_16_16_16; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN16; +#endif break; case 32: switch (desc->nr_channels) { @@ -2029,6 +2055,9 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, *format = FMT_32_32_32_32; break; } +#ifdef PIPE_ARCH_BIG_ENDIAN + *endian = ENDIAN_8IN32; +#endif break; default: goto out_unknown; @@ -2060,7 +2089,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru struct pipe_vertex_element *elements = ve->elements; const struct util_format_description *desc; unsigned fetch_resource_start = rctx->family >= CHIP_CEDAR ? 0 : 160; - unsigned format, num_format, format_comp; + unsigned format, num_format, format_comp, endian; u32 *bytecode; int i, r; @@ -2107,7 +2136,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru for (i = 0; i < ve->count; i++) { unsigned vbuffer_index; - r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp); + r600_vertex_data_type(ve->elements[i].src_format, &format, &num_format, &format_comp, &endian); desc = util_format_description(ve->elements[i].src_format); if (desc == NULL) { r600_bc_clear(&bc); @@ -2133,6 +2162,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru vtx.format_comp_all = format_comp; vtx.srf_mode_all = 1; vtx.offset = elements[i].src_offset; + vtx.endian = endian; if ((r = r600_bc_add_vtx(&bc, &vtx))) { r600_bc_clear(&bc); @@ -2172,7 +2202,9 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru return -ENOMEM; } - memcpy(bytecode, bc.bytecode, ve->fs_size); + for(i = 0; i < ve->fs_size / 4; i++) { + *(bytecode + i) = CPU_TO_LE32(*(bc.bytecode + i)); + } r600_bo_unmap(rctx->radeon, ve->fetch_shader); r600_bc_clear(&bc); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 27ea293ebe5..26d337fe125 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -104,6 +104,7 @@ struct r600_bc_vtx { unsigned format_comp_all; unsigned srf_mode_all; unsigned offset; + unsigned endian; }; struct r600_bc_output { diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 04408a5cc8e..151f48a8bf8 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -277,6 +277,13 @@ static void r600_resource_copy_region(struct pipe_context *ctx, struct texture_orig_info orig_info[2]; boolean restore_orig[2]; + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + util_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + if (rsrc->depth && !rsrc->is_flushing_texture) r600_texture_depth_flush(ctx, src, FALSE); diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 6ced719c8f0..71b47e1b056 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -24,6 +24,8 @@ * Jerome Glisse * Corbin Simpson <[email protected]> */ +#include <byteswap.h> + #include <pipe/p_screen.h> #include <util/u_format.h> #include <util/u_math.h> @@ -266,11 +268,31 @@ void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resour uint8_t *ptr = (*rbuffer)->r.b.user_ptr; unsigned size = (*rbuffer)->r.b.b.b.width0; boolean flushed; +#ifdef PIPE_ARCH_BIG_ENDIAN + int i; + uint32_t *tmpPtr; + + *rbuffer = NULL; + + tmpPtr = (uint32_t *)malloc(size); + /* big endian swap */ + if(tmpPtr == NULL) { + return; + } + for(i = 0; i < size / 4; i++) { + tmpPtr[i] = bswap_32(*((uint32_t *)ptr + i)); + } + + u_upload_data(rctx->vbuf_mgr->uploader, 0, size, tmpPtr, const_offset, + (struct pipe_resource**)rbuffer, &flushed); + free(tmpPtr); +#else *rbuffer = NULL; u_upload_data(rctx->vbuf_mgr->uploader, 0, size, ptr, const_offset, (struct pipe_resource**)rbuffer, &flushed); +#endif } else { *const_offset = 0; } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 4a30eddb621..89b46f5ad7e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -30,6 +30,7 @@ #include <tgsi/tgsi_util.h> #include <util/u_blitter.h> #include <util/u_double_list.h> +#include "util/u_format.h" #include <util/u_format_s3tc.h> #include <util/u_transfer.h> #include <util/u_surface.h> @@ -37,6 +38,7 @@ #include <util/u_memory.h> #include <util/u_inlines.h> #include "util/u_upload_mgr.h" +#include "os/os_time.h" #include <pipebuffer/pb_buffer.h> #include "r600.h" #include "r600d.h" @@ -49,15 +51,82 @@ /* * pipe_context */ +static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx) +{ + struct r600_fence *fence = NULL; + + if (!ctx->fences.bo) { + /* Create the shared buffer object */ + ctx->fences.bo = r600_bo(ctx->radeon, 4096, 0, 0, 0); + if (!ctx->fences.bo) { + R600_ERR("r600: failed to create bo for fence objects\n"); + return NULL; + } + ctx->fences.data = r600_bo_map(ctx->radeon, ctx->fences.bo, PB_USAGE_UNSYNCHRONIZED, NULL); + } + + if (!LIST_IS_EMPTY(&ctx->fences.pool)) { + struct r600_fence *entry; + + /* Try to find a freed fence that has been signalled */ + LIST_FOR_EACH_ENTRY(entry, &ctx->fences.pool, head) { + if (ctx->fences.data[entry->index] != 0) { + LIST_DELINIT(&entry->head); + fence = entry; + break; + } + } + } + + if (!fence) { + /* Allocate a new fence */ + struct r600_fence_block *block; + unsigned index; + + if ((ctx->fences.next_index + 1) >= 1024) { + R600_ERR("r600: too many concurrent fences\n"); + return NULL; + } + + index = ctx->fences.next_index++; + + if (!(index % FENCE_BLOCK_SIZE)) { + /* Allocate a new block */ + block = CALLOC_STRUCT(r600_fence_block); + if (block == NULL) + return NULL; + + LIST_ADD(&block->head, &ctx->fences.blocks); + } else { + block = LIST_ENTRY(struct r600_fence_block, ctx->fences.blocks.next, head); + } + + fence = &block->fences[index % FENCE_BLOCK_SIZE]; + fence->ctx = ctx; + fence->index = index; + } + + pipe_reference_init(&fence->reference, 1); + + ctx->fences.data[fence->index] = 0; + r600_context_emit_fence(&ctx->ctx, ctx->fences.bo, fence->index, 1); + return fence; +} + static void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_fence **rfence = (struct r600_fence**)fence; + #if 0 static int dc = 0; char dname[256]; #endif + if (rfence) + *rfence = r600_create_fence(rctx); + if (!rctx->ctx.pm4_cdwords) return; @@ -101,6 +170,7 @@ static void r600_destroy_context(struct pipe_context *context) struct r600_pipe_context *rctx = (struct r600_pipe_context *)context; rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush); + util_unreference_framebuffer_state(&rctx->framebuffer); r600_context_fini(&rctx->ctx); @@ -113,6 +183,18 @@ static void r600_destroy_context(struct pipe_context *context) u_vbuf_mgr_destroy(rctx->vbuf_mgr); util_slab_destroy(&rctx->pool_transfers); + if (rctx->fences.bo) { + struct r600_fence_block *entry, *tmp; + + LIST_FOR_EACH_ENTRY_SAFE(entry, tmp, &rctx->fences.blocks, head) { + LIST_DEL(&entry->head); + FREE(entry); + } + + r600_bo_unmap(rctx->radeon, rctx->fences.bo); + r600_bo_reference(rctx->radeon, &rctx->fences.bo, NULL); + } + r600_update_num_contexts(rctx->screen, -1); FREE(rctx); @@ -140,6 +222,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->radeon = rscreen->radeon; rctx->family = r600_get_family(rctx->radeon); + rctx->fences.bo = NULL; + rctx->fences.data = NULL; + rctx->fences.next_index = 0; + LIST_INITHEAD(&rctx->fences.pool); + LIST_INITHEAD(&rctx->fences.blocks); + r600_init_blit_functions(rctx); r600_init_query_functions(rctx); r600_init_context_resource_functions(rctx); @@ -284,8 +372,8 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 1; case PIPE_CAP_INDEP_BLEND_ENABLE: /* R600 doesn't support per-MRT blends */ @@ -294,10 +382,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) else return 1; + case PIPE_CAP_TGSI_INSTANCEID: + return 0; + /* Unsupported features (boolean caps). */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: /* R600 doesn't support per-MRT blends */ if (family == CHIP_R600) return 0; @@ -435,6 +527,9 @@ static boolean r600_is_format_supported(struct pipe_screen* screen, return FALSE; } + if (!util_format_is_supported(format, usage)) + return FALSE; + /* Multisample */ if (sample_count > 1) return FALSE; @@ -492,6 +587,62 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) FREE(rscreen); } +static void r600_fence_reference(struct pipe_screen *pscreen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct r600_fence **oldf = (struct r600_fence**)ptr; + struct r600_fence *newf = (struct r600_fence*)fence; + + if (pipe_reference(&(*oldf)->reference, &newf->reference)) { + struct r600_pipe_context *ctx = (*oldf)->ctx; + LIST_ADDTAIL(&(*oldf)->head, &ctx->fences.pool); + } + + *ptr = fence; +} + +static boolean r600_fence_signalled(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence) +{ + struct r600_fence *rfence = (struct r600_fence*)fence; + struct r600_pipe_context *ctx = rfence->ctx; + + return ctx->fences.data[rfence->index]; +} + +static boolean r600_fence_finish(struct pipe_screen *pscreen, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct r600_fence *rfence = (struct r600_fence*)fence; + struct r600_pipe_context *ctx = rfence->ctx; + int64_t start_time = 0; + unsigned spins = 0; + + if (timeout != PIPE_TIMEOUT_INFINITE) { + start_time = os_time_get(); + + /* Convert to microseconds. */ + timeout /= 1000; + } + + while (ctx->fences.data[rfence->index] == 0) { + if (++spins % 256) + continue; +#ifdef PIPE_OS_UNIX + sched_yield(); +#else + os_time_sleep(10); +#endif + if (timeout != PIPE_TIMEOUT_INFINITE && + os_time_get() - start_time >= timeout) { + return FALSE; + } + } + + return TRUE; +} struct pipe_screen *r600_screen_create(struct radeon *radeon) { @@ -513,6 +664,9 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) rscreen->screen.is_format_supported = r600_is_format_supported; rscreen->screen.context_create = r600_create_context; rscreen->screen.video_context_create = r600_video_create; + rscreen->screen.fence_reference = r600_fence_reference; + rscreen->screen.fence_signalled = r600_fence_signalled; + rscreen->screen.fence_finish = r600_fence_finish; r600_init_screen_resource_functions(&rscreen->screen); rscreen->tiling_info = r600_get_tiling_info(radeon); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 396801e4a41..88aff0e81bb 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -124,6 +124,30 @@ struct r600_textures_info { unsigned n_samplers; }; +struct r600_fence { + struct pipe_reference reference; + struct r600_pipe_context *ctx; + unsigned index; /* in the shared bo */ + struct list_head head; +}; + +#define FENCE_BLOCK_SIZE 16 + +struct r600_fence_block { + struct r600_fence fences[FENCE_BLOCK_SIZE]; + struct list_head head; +}; + +struct r600_pipe_fences { + struct r600_bo *bo; + unsigned *data; + unsigned next_index; + /* linked list of preallocated blocks */ + struct list_head blocks; + /* linked list of freed fences */ + struct list_head pool; +}; + #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 @@ -158,9 +182,12 @@ struct r600_pipe_context { bool flatshade; struct r600_textures_info ps_samplers; + struct r600_pipe_fences fences; + struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; bool blit; + }; struct r600_drawl { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index e7285d624e3..188cea0ff88 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -33,6 +33,13 @@ #include "r600d.h" #include <stdio.h> #include <errno.h> +#include <byteswap.h> + +#ifdef PIPE_ARCH_BIG_ENDIAN +#define CPU_TO_LE32(x) bswap_32(x) +#else +#define CPU_TO_LE32(x) (x) +#endif int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) @@ -52,7 +59,8 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; - void *ptr; + uint32_t *ptr; + int i; /* copy new shader */ if (shader->bo == NULL) { @@ -60,8 +68,10 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s if (shader->bo == NULL) { return -ENOMEM; } - ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); - memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); + ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); + for(i = 0; i < rshader->bc.ndw; i++) { + *(ptr + i) = CPU_TO_LE32(*(rshader->bc.bytecode + i)); + } r600_bo_unmap(rctx->radeon, shader->bo); } /* build state */ @@ -467,6 +477,11 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ +#ifdef PIPE_ARCH_BIG_ENDIAN + vtx.endian = ENDIAN_8IN32; +#else + vtx.endian = ENDIAN_NONE; +#endif if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) return r; @@ -1526,9 +1541,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) unsigned src_gpr; int r, i; int opcode; - boolean src_not_temp = - inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && - inst->Src[0].Register.File != TGSI_FILE_INPUT; + /* Texture fetch instructions can only use gprs as source. + * Also they cannot negate the source or take the absolute value */ + const boolean src_requires_loading = + (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[0].Register.File != TGSI_FILE_INPUT) || + ctx->src[0].neg || ctx->src[0].abs; + boolean src_loaded = FALSE; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1570,7 +1589,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - src_not_temp = FALSE; + src_loaded = TRUE; src_gpr = ctx->temp_reg; } @@ -1655,11 +1674,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; - src_not_temp = FALSE; + src_loaded = TRUE; src_gpr = ctx->temp_reg; } - if (src_not_temp) { + if (src_requires_loading && !src_loaded) { for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); @@ -1673,6 +1692,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; } + src_loaded = TRUE; src_gpr = ctx->temp_reg; } @@ -1691,10 +1711,18 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; - tex.src_sel_x = 0; - tex.src_sel_y = 1; - tex.src_sel_z = 2; - tex.src_sel_w = 3; + if (src_loaded) { + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + } else { + tex.src_sel_x = ctx->src[0].swizzle[0]; + tex.src_sel_y = ctx->src[0].swizzle[1]; + tex.src_sel_z = ctx->src[0].swizzle[2]; + tex.src_sel_w = ctx->src[0].swizzle[3]; + tex.src_rel = ctx->src[0].rel; + } if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { tex.src_sel_x = 1; @@ -1712,12 +1740,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { tex.coord_type_z = 0; - tex.src_sel_z = 1; + tex.src_sel_z = tex.src_sel_y; } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) tex.coord_type_z = 0; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) - tex.src_sel_w = 2; + tex.src_sel_w = tex.src_sel_z; r = r600_bc_add_tex(ctx->bc, &tex); if (r) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index efb68cbd139..ac2e8986b97 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -161,16 +161,19 @@ static void *r600_create_blend_state(struct pipe_context *ctx, color_control, 0xFFFFFFFD, NULL); for (int i = 0; i < 8; i++) { - unsigned eqRGB = state->rt[i].rgb_func; - unsigned srcRGB = state->rt[i].rgb_src_factor; - unsigned dstRGB = state->rt[i].rgb_dst_factor; + /* state->rt entries > 0 only written if independent blending */ + const int j = state->independent_blend_enable ? i : 0; - unsigned eqA = state->rt[i].alpha_func; - unsigned srcA = state->rt[i].alpha_src_factor; - unsigned dstA = state->rt[i].alpha_dst_factor; + unsigned eqRGB = state->rt[j].rgb_func; + unsigned srcRGB = state->rt[j].rgb_src_factor; + unsigned dstRGB = state->rt[j].rgb_dst_factor; + + unsigned eqA = state->rt[j].alpha_func; + unsigned srcA = state->rt[j].alpha_src_factor; + unsigned dstA = state->rt[j].alpha_dst_factor; uint32_t bc = 0; - if (!state->rt[i].blend_enable) + if (!state->rt[j].blend_enable) continue; bc |= S_028804_COLOR_COMB_FCN(r600_translate_blend_function(eqRGB)); @@ -410,7 +413,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - unsigned format; + unsigned format, endian; uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; @@ -447,6 +450,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c r600_texture_depth_flush(ctx, texture, TRUE); tmp = tmp->flushed_depth_texture; } + endian = r600_colorformat_endian_swap(format); if (tmp->force_int_type) { word4 &= C_038010_NUM_FORMAT_ALL; @@ -487,6 +491,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) | S_038010_REQUEST_SIZE(1) | + S_038010_ENDIAN_SWAP(endian) | S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, S_038014_LAST_LEVEL(state->u.tex.last_level) | @@ -715,7 +720,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta unsigned level = state->cbufs[cb]->u.tex.level; unsigned pitch, slice; unsigned color_info; - unsigned format, swap, ntype; + unsigned format, swap, ntype, endian; unsigned offset; const struct util_format_description *desc; struct r600_bo *bo[3]; @@ -739,40 +744,37 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta level, state->cbufs[cb]->u.tex.first_layer); pitch = rtex->pitch_in_blocks[level] / 8 - 1; slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; - ntype = 0; desc = util_format_description(surf->base.format); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - ntype = V_0280A0_NUMBER_SRGB; - else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { - switch(desc->channel[0].type) { - case UTIL_FORMAT_TYPE_UNSIGNED: - ntype = V_0280A0_NUMBER_UNORM; - break; - - case UTIL_FORMAT_TYPE_SIGNED: - ntype = V_0280A0_NUMBER_SNORM; - break; - } - } for (i = 0; i < 4; i++) { if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) { break; } } + ntype = V_0280A0_NUMBER_UNORM; + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_0280A0_NUMBER_SRGB; + else if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) + ntype = V_0280A0_NUMBER_SNORM; format = r600_translate_colorformat(surf->base.format); swap = r600_translate_colorswap(surf->base.format); + if(rbuffer->b.b.b.usage == PIPE_USAGE_STAGING) { + endian = ENDIAN_NONE; + } else { + endian = r600_colorformat_endian_swap(format); + } /* disable when gallium grows int textures */ if ((format == FMT_32_32_32_32 || format == FMT_16_16_16_16) && rtex->force_int_type) - ntype = 4; + ntype = V_0280A0_NUMBER_UINT; color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | - S_0280A0_NUMBER_TYPE(ntype); + S_0280A0_NUMBER_TYPE(ntype) | + S_0280A0_ENDIAN(endian); /* on R600 this can't be set if BLEND_CLAMP isn't set, if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */ @@ -855,6 +857,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (rstate == NULL) return; + r600_context_flush_dest_caches(&rctx->ctx); + rctx->ctx.num_dest_buffers = state->nr_cbufs; + /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; @@ -866,6 +871,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } if (state->zsbuf) { r600_db(rctx, rstate, state); + rctx->ctx.num_dest_buffers++; } target_mask = 0x00000000; @@ -945,6 +951,17 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, } } +static void r600_texture_barrier(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_context_flush_all(&rctx->ctx, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_CB_ACTION_ENA(1) | + S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | + S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | + S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | + S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1)); +} + void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; @@ -985,6 +1002,7 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_viewport_state = r600_set_viewport_state; rctx->context.sampler_view_destroy = r600_sampler_view_destroy; rctx->context.redefine_user_buffer = u_default_redefine_user_buffer; + rctx->context.texture_barrier = r600_texture_barrier; } void r600_init_config(struct r600_pipe_context *rctx) @@ -1443,8 +1461,10 @@ void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_STRIDE(stride), - 0xFFFFFFFF, NULL); +#ifdef PIPE_ARCH_BIG_ENDIAN + S_038008_ENDIAN_SWAP(ENDIAN_8IN32) | +#endif + S_038008_STRIDE(stride), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 43dad0c8023..997c9a597ee 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -435,7 +435,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_draw_initiator, mask; + u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask; struct r600_draw rdraw; struct r600_pipe_state vgt; struct r600_drawl draw = {}; @@ -467,14 +467,21 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.info.index_bias = info->start; } + vgt_dma_swap_mode = 0; switch (draw.index_size) { case 2: vgt_draw_initiator = 0; vgt_dma_index_type = 0; +#ifdef PIPE_ARCH_BIG_ENDIAN + vgt_dma_swap_mode = ENDIAN_8IN16; +#endif break; case 4: vgt_draw_initiator = 0; vgt_dma_index_type = 1; +#ifdef PIPE_ARCH_BIG_ENDIAN + vgt_dma_swap_mode = ENDIAN_8IN32; +#endif break; case 0: vgt_draw_initiator = 2; @@ -521,7 +528,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = draw.info.instance_count; - rdraw.vgt_index_type = vgt_dma_index_type; + rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2); rdraw.vgt_draw_initiator = vgt_draw_initiator; rdraw.indices = NULL; if (draw.index_buffer) { diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 2a40f41bbf5..5eabfdc2bc6 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -363,9 +363,12 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT; case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: return V_0280A0_SWAP_STD; /* 64-bit buffers. */ + case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_SSCALED: @@ -501,6 +504,57 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) } } +static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + switch(colorformat) { + case V_0280A0_COLOR_4_4: + return(ENDIAN_NONE); + + /* 8-bit buffers. */ + case V_0280A0_COLOR_8: + return(ENDIAN_NONE); + + /* 16-bit buffers. */ + case V_0280A0_COLOR_5_6_5: + case V_0280A0_COLOR_1_5_5_5: + case V_0280A0_COLOR_4_4_4_4: + case V_0280A0_COLOR_16: + case V_0280A0_COLOR_8_8: + return(ENDIAN_8IN16); + + /* 32-bit buffers. */ + case V_0280A0_COLOR_8_8_8_8: + case V_0280A0_COLOR_2_10_10_10: + case V_0280A0_COLOR_8_24: + case V_0280A0_COLOR_24_8: + case V_0280A0_COLOR_32_FLOAT: + case V_0280A0_COLOR_16_16_FLOAT: + case V_0280A0_COLOR_16_16: + return(ENDIAN_8IN32); + + /* 64-bit buffers. */ + case V_0280A0_COLOR_16_16_16_16: + case V_0280A0_COLOR_16_16_16_16_FLOAT: + return(ENDIAN_8IN16); + + case V_0280A0_COLOR_32_32_FLOAT: + case V_0280A0_COLOR_32_32: + return(ENDIAN_8IN32); + + /* 128-bit buffers. */ + case V_0280A0_COLOR_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32_FLOAT: + case V_0280A0_COLOR_32_32_32_32: + return(ENDIAN_8IN32); + default: + return ENDIAN_NONE; /* Unsupported. */ + } +#else + return ENDIAN_NONE; +#endif +} + static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) { return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index df70e2889e2..2bff52bec8c 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3460,4 +3460,10 @@ #define SQ_TEX_INST_SAMPLE 0x10 #define SQ_TEX_INST_SAMPLE_L 0x11 #define SQ_TEX_INST_SAMPLE_C 0x18 + +#define ENDIAN_NONE 0 +#define ENDIAN_8IN16 1 +#define ENDIAN_8IN32 2 +#define ENDIAN_8IN64 3 + #endif |