diff options
author | Christian König <[email protected]> | 2011-07-04 15:04:41 +0200 |
---|---|---|
committer | Christian König <[email protected]> | 2011-07-04 15:04:41 +0200 |
commit | c3b2230b71cb3a00a7f4c0987197d397bada650b (patch) | |
tree | 018f5df0f8b5976ddb56ef4f13e9466587838998 /src/gallium/drivers/r600 | |
parent | 003401f95c9b59471c22368b7da16fe7a951e490 (diff) | |
parent | 424b1210d951c206e7c2fb8f2778acbd384eb247 (diff) |
Merge remote-tracking branch 'origin/master' into pipe-video
Conflicts:
configure.ac
src/gallium/drivers/r600/r600_state_inlines.h
src/gallium/tests/trivial/Makefile
src/gallium/winsys/g3dvl/dri/XF86dri.c
src/gallium/winsys/g3dvl/dri/driclient.c
src/gallium/winsys/g3dvl/dri/driclient.h
src/gallium/winsys/g3dvl/dri/xf86dri.h
src/gallium/winsys/g3dvl/dri/xf86dristr.h
src/gallium/winsys/r600/drm/r600_bo.c
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/SConscript | 6 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_state_inlines.h | 45 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 49 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 22 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_query.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 323 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 150 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 127 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_inlines.h | 41 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_translate.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600d.h | 8 |
17 files changed, 597 insertions, 246 deletions
diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index 0135808f10a..19f07b2bef8 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -2,11 +2,7 @@ Import('*') env = env.Clone() -try: - env.ParseConfig('pkg-config --cflags libdrm_radeon') -except OSError: - print 'warning: not building r600' - Return() +env.PkgUseModules('DRM_RADEON') env.Append(CPPPATH = [ '#/include', diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index b780dba3e33..b5590116e8f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -292,7 +292,7 @@ static inline uint32_t r600_translate_stencilformat(enum pipe_format format) static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_L4A4_UNORM: return V_028C70_SWAP_ALT; @@ -305,7 +305,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_028C70_SWAP_STD; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_028C70_SWAP_STD_REV; @@ -327,9 +327,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) return V_028C70_SWAP_STD; case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16_FLOAT: return V_028C70_SWAP_STD; - /* 32-bit buffers. */ + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: return V_028C70_SWAP_STD_REV; case PIPE_FORMAT_B8G8R8A8_SRGB: @@ -343,6 +344,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_X8R8G8B8_UNORM: return V_028C70_SWAP_ALT_REV; case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8X8_UNORM: return V_028C70_SWAP_STD; @@ -373,13 +375,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16G16_UNORM: return V_028C70_SWAP_STD; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: - /* 128-bit buffers. */ + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: @@ -394,7 +396,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) { switch (format) { - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_L4A4_UNORM: return V_028C70_COLOR_4_4; @@ -406,7 +408,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_028C70_COLOR_8; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_028C70_COLOR_5_6_5; @@ -429,7 +431,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R16_UNORM: return V_028C70_COLOR_16; - /* 32-bit buffers. */ + case PIPE_FORMAT_R16_FLOAT: + return V_028C70_COLOR_16_FLOAT; + + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -472,7 +477,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: return V_028C70_COLOR_10_11_11_FLOAT; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16_USCALED: case PIPE_FORMAT_R16G16B16A16_USCALED: case PIPE_FORMAT_R16G16B16_SSCALED: @@ -492,20 +497,21 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R32G32_SSCALED: return V_028C70_COLOR_32_32; - /* 128-bit buffers. */ + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return V_028C70_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: return V_028C70_COLOR_32_32_32_32; - case PIPE_FORMAT_R32G32B32_FLOAT: - return V_028C70_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_028C70_COLOR_32_32_32_32_FLOAT; - /* YUV buffers. */ + /* YUV buffers. */ case PIPE_FORMAT_UYVY: case PIPE_FORMAT_YUYV: default: - /* R600_ERR("unsupported color format %d\n", format); */ return ~0; /* Unsupported. */ } } @@ -517,11 +523,11 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_4_4: return(ENDIAN_NONE); - /* 8-bit buffers. */ + /* 8-bit buffers. */ case V_028C70_COLOR_8: return(ENDIAN_NONE); - /* 16-bit buffers. */ + /* 16-bit buffers. */ case V_028C70_COLOR_5_6_5: case V_028C70_COLOR_1_5_5_5: case V_028C70_COLOR_4_4_4_4: @@ -529,7 +535,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_8_8: return(ENDIAN_8IN16); - /* 32-bit buffers. */ + /* 32-bit buffers. */ case V_028C70_COLOR_8_8_8_8: case V_028C70_COLOR_2_10_10_10: case V_028C70_COLOR_8_24: @@ -539,7 +545,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_16_16: return(ENDIAN_8IN32); - /* 64-bit buffers. */ + /* 64-bit buffers. */ case V_028C70_COLOR_16_16_16_16: case V_028C70_COLOR_16_16_16_16_FLOAT: return(ENDIAN_8IN16); @@ -548,8 +554,9 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_028C70_COLOR_32_32: return(ENDIAN_8IN32); - /* 128-bit buffers. */ + /* 96-bit buffers. */ case V_028C70_COLOR_32_32_32_FLOAT: + /* 128-bit buffers. */ case V_028C70_COLOR_32_32_32_32_FLOAT: case V_028C70_COLOR_32_32_32_32: return(ENDIAN_8IN32); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 9ebfe54c76d..dc182611482 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -256,6 +256,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, } rstate = &rs->rstate; + rs->clamp_vertex_color = state->clamp_vertex_color; + rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; @@ -482,19 +484,27 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; int i; + int has_depth = 0; for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { - if (resource[i]) + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i + R600_MAX_CONST_BUFFERS); - else + } else evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i + R600_MAX_CONST_BUFFERS); pipe_sampler_view_reference( (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], views[i]); + } else { + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; + } } } for (i = count; i < NUM_TEX_UNITS; i++) { @@ -504,6 +514,7 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } + rctx->have_depth_texture = has_depth; rctx->ps_samplers.n_views = count; } @@ -689,6 +700,9 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + if (rtex->depth) + rctx->have_depth_fb = TRUE; + if (rtex->depth && !rtex->is_flushing_texture) { r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); rtex = rtex->flushed_depth_texture; @@ -870,6 +884,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); /* build states */ + rctx->have_depth_fb = 0; + rctx->nr_cbufs = state->nr_cbufs; for (int i = 0; i < state->nr_cbufs; i++) { evergreen_cb(rctx, rstate, state, i); } @@ -1616,7 +1632,10 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { - num_cout++; + if (rshader->fs_write_all) + num_cout = rshader->nr_cbufs; + else + num_cout++; } } exports_ps |= S_02884C_EXPORT_COLORS(num_cout); diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index bf7138d9e4e..151e831e5c6 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -245,6 +245,7 @@ struct r600_context { unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; unsigned ctx_pm4_ndwords; + unsigned init_dwords; unsigned nreloc; unsigned creloc; struct r600_reloc *reloc; @@ -261,6 +262,7 @@ struct r600_context { struct r600_range vs_resources; struct r600_range fs_resources; int num_ps_resources, num_vs_resources, num_fs_resources; + boolean have_depth_texture, have_depth_fb; }; struct r600_draw { diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3196d97dbbb..065f955ebcb 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1383,6 +1383,9 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) break; } } + /* slight hack to make gradients always go into same cf */ + if (ntex->inst == SQ_TEX_INST_SET_GRADIENTS_H) + bc->force_add_cf = 1; } /* cf can contains only alu or only vtx or only tex */ @@ -1860,6 +1863,8 @@ void r600_bc_dump(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]); fprintf(stderr, "GPR:%X ", cf->output.gpr); fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size); @@ -2258,7 +2263,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru ve->fs_size = bc.ndw*4; /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ - ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, 0); + ve->fetch_shader = r600_bo(rctx->radeon, ve->fs_size, 256, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (ve->fetch_shader == NULL) { r600_bc_clear(&bc); return -ENOMEM; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 151f48a8bf8..6171d285bb9 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -27,9 +27,18 @@ enum r600_blitter_op /* bitmask */ { - R600_CLEAR = 1, - R600_CLEAR_SURFACE = 2, - R600_COPY = 4 + R600_SAVE_TEXTURES = 1, + R600_SAVE_FRAMEBUFFER = 2, + R600_DISABLE_RENDER_COND = 4, + + R600_CLEAR = 0, + + R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER, + + R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES | + R600_DISABLE_RENDER_COND, + + R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND, }; static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) @@ -58,10 +67,10 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op rctx->vbuf_mgr->nr_vertex_buffers, rctx->vbuf_mgr->vertex_buffer); - if (op & (R600_CLEAR_SURFACE | R600_COPY)) + if (op & R600_SAVE_FRAMEBUFFER) util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); - if (op & R600_COPY) { + if (op & R600_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( rctx->blitter, rctx->ps_samplers.n_samplers, (void**)rctx->ps_samplers.samplers); @@ -71,11 +80,23 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op (struct pipe_sampler_view**)rctx->ps_samplers.views); } + if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { + rctx->saved_render_cond = rctx->current_render_cond; + rctx->saved_render_cond_mode = rctx->current_render_cond_mode; + rctx->context.render_condition(&rctx->context, NULL, 0); + } + } static void r600_blitter_end(struct pipe_context *ctx) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + if (rctx->saved_render_cond) { + rctx->context.render_condition(&rctx->context, + rctx->saved_render_cond, + rctx->saved_render_cond_mode); + rctx->saved_render_cond = NULL; + } r600_context_queries_resume(&rctx->ctx); rctx->blit = false; } @@ -107,7 +128,7 @@ void r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_t rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) depth = 0.0f; - r600_blitter_begin(ctx, R600_CLEAR_SURFACE); + r600_blitter_begin(ctx, R600_DECOMPRESS); util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); r600_blitter_end(ctx); @@ -121,8 +142,6 @@ void r600_flush_depth_textures(struct r600_pipe_context *rctx) { unsigned int i; - if (rctx->blit) return; - /* FIXME: This handles fragment shader textures only. */ for (i = 0; i < rctx->ps_samplers.n_views; ++i) { @@ -275,6 +294,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx, { struct r600_resource_texture *rsrc = (struct r600_resource_texture*)src; struct texture_orig_info orig_info[2]; + struct pipe_box sbox; + const struct pipe_box *psbox; boolean restore_orig[2]; /* Fallback for buffers. */ @@ -292,7 +313,15 @@ static void r600_resource_copy_region(struct pipe_context *ctx, if (util_format_is_compressed(src->format)) { r600_compressed_to_blittable(src, src_level, &orig_info[0]); restore_orig[0] = TRUE; - } + sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); + sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y); + sbox.z = src_box->z; + sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); + sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height); + sbox.depth = src_box->depth; + psbox=&sbox; + } else + psbox=src_box; if (util_format_is_compressed(dst->format)) { r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); @@ -303,7 +332,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx, } r600_hw_copy_region(ctx, dst, dst_level, dstx, dsty, dstz, - src, src_level, src_box); + src, src_level, psbox); if (restore_orig[0]) r600_reset_blittable_to_compressed(src, src_level, &orig_info[0]); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 70e3619de4b..049a4daae66 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -127,9 +127,6 @@ static void r600_flush(struct pipe_context *ctx, if (rfence) *rfence = r600_create_fence(rctx); - if (!rctx->ctx.pm4_cdwords) - return; - #if 0 sprintf(dname, "gallium-%08d.bof", dc); if (dc < 20) { @@ -139,11 +136,6 @@ static void r600_flush(struct pipe_context *ctx, dc++; #endif r600_context_flush(&rctx->ctx); - - /* XXX This shouldn't be really necessary, but removing it breaks some tests. - * Needless buffer reallocations may significantly increase memory consumption, - * so getting rid of this call is important. */ - u_upload_flush(rctx->vbuf_mgr->uploader); } static void r600_update_num_contexts(struct r600_screen *rscreen, int diff) @@ -373,7 +365,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_MIRROR_REPEAT: case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_SM3: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: @@ -382,6 +373,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: return 1; /* Supported except the original R600. */ @@ -391,14 +385,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return family == CHIP_R600 ? 0 : 1; /* Supported on Evergreen. */ - case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return family >= CHIP_CEDAR ? 1 : 0; /* Unsupported features. */ case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: case PIPE_CAP_TGSI_INSTANCEID: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: @@ -487,9 +479,9 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e return 8; /* FIXME */ case PIPE_SHADER_CAP_MAX_INPUTS: if(shader == PIPE_SHADER_FRAGMENT) - return 10; + return 34; else - return 16; + return 32; case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_ADDRS: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d92b74ebc4e..2667c80bcef 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -50,6 +50,7 @@ enum r600_pipe_state_id { R600_PIPE_STATE_BLEND = 0, R600_PIPE_STATE_BLEND_COLOR, R600_PIPE_STATE_CONFIG, + R600_PIPE_STATE_SEAMLESS_CUBEMAP, R600_PIPE_STATE_CLIP, R600_PIPE_STATE_SCISSOR, R600_PIPE_STATE_VIEWPORT, @@ -87,6 +88,8 @@ struct r600_pipe_sampler_view { struct r600_pipe_rasterizer { struct r600_pipe_state rstate; + boolean clamp_vertex_color; + boolean clamp_fragment_color; boolean flatshade; unsigned sprite_coord_enable; float offset_units; @@ -124,6 +127,12 @@ struct r600_pipe_shader { struct r600_bo *bo; struct r600_bo *bo_fetch; struct r600_vertex_element vertex_elements; + struct tgsi_token *tokens; +}; + +struct r600_pipe_sampler_state { + struct r600_pipe_state rstate; + boolean seamless_cube_map; }; /* needed for blitter save */ @@ -191,12 +200,20 @@ struct r600_pipe_context { struct r600_pipe_rasterizer *rasterizer; struct r600_pipe_state vgt; struct r600_pipe_state spi; + struct pipe_query *current_render_cond; + unsigned current_render_cond_mode; + struct pipe_query *saved_render_cond; + unsigned saved_render_cond_mode; /* shader information */ + boolean clamp_vertex_color; + boolean clamp_fragment_color; + boolean spi_dirty; unsigned sprite_coord_enable; boolean flatshade; boolean export_16bpc; unsigned alpha_ref; boolean alpha_ref_dirty; + unsigned nr_cbufs; struct r600_textures_info ps_samplers; struct r600_pipe_fences fences; @@ -204,7 +221,9 @@ struct r600_pipe_context { struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; boolean blit; + boolean have_depth_texture, have_depth_fb; + unsigned default_ps_gprs, default_vs_gprs; }; struct r600_drawl { @@ -252,7 +271,7 @@ void r600_init_query_functions(struct r600_pipe_context *rctx); void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader); void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id); @@ -270,6 +289,7 @@ void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); +void r600_adjust_gprs(struct r600_pipe_context *rctx); /* r600_texture.c */ void r600_init_screen_texture_functions(struct pipe_screen *screen); diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 181ea3f9e49..bedb48b6031 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -75,6 +75,9 @@ static void r600_render_condition(struct pipe_context *ctx, struct r600_query *rquery = (struct r600_query *)query; int wait_flag = 0; + rctx->current_render_cond = query; + rctx->current_render_cond_mode = mode; + if (!query) { rctx->ctx.predicate_drawing = false; r600_query_predication(&rctx->ctx, NULL, PREDICATION_OP_CLEAR, 1); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 39e6d85d7b4..f83d7079b29 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -63,10 +63,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, { struct r600_shader_io *input = &ps->input[id]; - /* position/face doesn't get/need a semantic index */ - if (input->name == TGSI_SEMANTIC_POSITION || input->name == TGSI_SEMANTIC_FACE) - return 0; - for (int i = 0; i < vs->noutput; i++) { if (input->name == vs->output[i].name && input->sid == vs->output[i].sid) { @@ -85,7 +81,8 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s /* copy new shader */ if (shader->bo == NULL) { - shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); + /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ + shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); if (shader->bo == NULL) { return -ENOMEM; } @@ -121,9 +118,9 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } -static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) +int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) { static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -136,10 +133,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s if (dump_shaders) { fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(tokens, 0); + tgsi_dump(shader->tokens, 0); } shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); + r = r600_shader_from_tgsi(rctx, shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; @@ -162,6 +159,8 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader r600_bo_reference(rctx->radeon, &shader->bo, NULL); r600_bc_clear(&shader->shader.bc); + + memset(&shader->shader,0,sizeof(struct r600_shader)); } /* @@ -189,7 +188,7 @@ struct r600_shader_ctx { struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; - struct r600_shader_src src[3]; + struct r600_shader_src src[4]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -597,15 +596,17 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) return 0; } -static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) { + struct r600_shader *shader = &pipeshader->shader; + struct tgsi_token *tokens = pipeshader->tokens; struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned output_done, noutput; unsigned opcode; - int i, r = 0, pos0; + int i, j, r = 0, pos0; ctx.bc = &shader->bc; ctx.shader = shader; @@ -619,6 +620,11 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; + shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || + ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); + + shader->nr_cbufs = rctx->nr_cbufs; + /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. * Values [128,159] correspond to constant buffer bank 0 @@ -728,52 +734,103 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } - /* export output */ + noutput = shader->noutput; + + /* clamp color outputs */ + if (shader->clamp_color) { + for (i = 0; i < noutput; i++) { + if (shader->output[i].name == TGSI_SEMANTIC_COLOR || + shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { + + int j; + for (j = 0; j < 4; j++) { + struct r600_bc_alu alu; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + /* MOV_SAT R, R */ + alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.dst.sel = shader->output[i].gpr; + alu.dst.chan = j; + alu.dst.write = 1; + alu.dst.clamp = 1; + alu.src[0].sel = alu.dst.sel; + alu.src[0].chan = j; + + if (j == 3) { + alu.last = 1; + } + r = r600_bc_add_alu(ctx.bc, &alu); + if (r) + return r; + } + } + } + } + + /* export output */ + j = 0; for (i = 0, pos0 = 0; i < noutput; i++) { memset(&output[i], 0, sizeof(struct r600_bc_output)); - output[i].gpr = shader->output[i].gpr; - output[i].elem_size = 3; - output[i].swizzle_x = 0; - output[i].swizzle_y = 1; - output[i].swizzle_z = 2; - output[i].swizzle_w = 3; - output[i].burst_count = 1; - output[i].barrier = 1; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output[i].array_base = i - pos0; - output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[i + j].array_base = i - pos0; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 60; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 60; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { - output[i].array_base = 61; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[i + j].array_base = 61; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; /* position doesn't count in array_base */ pos0++; } break; case TGSI_PROCESSOR_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - output[i].array_base = shader->output[i].sid; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = shader->output[i].sid; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + if (shader->fs_write_all && (shader->family >= CHIP_CEDAR)) { + for (j = 1; j < shader->nr_cbufs; j++) { + memset(&output[i + j], 0, sizeof(struct r600_bc_output)); + output[i + j].gpr = shader->output[i].gpr; + output[i + j].elem_size = 3; + output[i + j].swizzle_x = 0; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = 2; + output[i + j].swizzle_w = 3; + output[i + j].burst_count = 1; + output[i + j].barrier = 1; + output[i + j].array_base = shader->output[i].sid + j; + output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } + j--; + } } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[i].array_base = 61; - output[i].swizzle_x = 2; - output[i].swizzle_y = 7; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 2; + output[i + j].swizzle_y = 7; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { - output[i].array_base = 61; - output[i].swizzle_x = 7; - output[i].swizzle_y = 1; - output[i].swizzle_z = output[i].swizzle_w = 7; - output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[i + j].array_base = 61; + output[i + j].swizzle_x = 7; + output[i + j].swizzle_y = 1; + output[i + j].swizzle_z = output[i + j].swizzle_w = 7; + output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); r = -EINVAL; @@ -786,6 +843,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; } } + noutput += j; /* add fake param output for vertex shader if no param is exported */ if (ctx.type == TGSI_PROCESSOR_VERTEX) { for (i = 0, pos0 = 0; i < noutput; i++) { @@ -1306,41 +1364,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - /* dst.x, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - - /* dst.y = max(src.x, 0.0) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ - alu.src[1].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - - /* dst.w, <- 1.0 */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = V_SQ_ALU_SRC_1; - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1369,7 +1392,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1426,6 +1451,42 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) return r; } } + + /* dst.x, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.y = max(src.x, 0.0) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.w, <- 1.0 */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; } @@ -1748,6 +1809,22 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) return 0; } +static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, + unsigned index) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && + inst->Src[index].Register.File != TGSI_FILE_INPUT) || + ctx->src[index].neg || ctx->src[index].abs; +} + +static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, + unsigned index) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; +} + static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; @@ -1755,19 +1832,70 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) struct r600_bc_tex tex; struct r600_bc_alu alu; unsigned src_gpr; - int r, i; + int r, i, j; int opcode; /* Texture fetch instructions can only use gprs as source. * Also they cannot negate the source or take the absolute value */ - const boolean src_requires_loading = - (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && - inst->Src[0].Register.File != TGSI_FILE_INPUT) || - ctx->src[0].neg || ctx->src[0].abs; + const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); boolean src_loaded = FALSE; + unsigned sampler_src_reg = 1; + + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { + /* TGSI moves the sampler to src reg 3 for TXD */ + sampler_src_reg = 3; + + for (i = 1; i < 3; i++) { + /* set gradients h/v */ + memset(&tex, 0, sizeof(struct r600_bc_tex)); + tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : + SQ_TEX_INST_SET_GRADIENTS_V; + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + + if (tgsi_tex_src_requires_loading(ctx, i)) { + tex.src_gpr = r600_get_temp(ctx); + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + + for (j = 0; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + r600_bc_src(&alu.src[0], &ctx->src[i], j); + alu.dst.sel = tex.src_gpr; + alu.dst.chan = j; + if (j == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } - src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; - - if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + } else { + tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); + tex.src_sel_x = ctx->src[i].swizzle[0]; + tex.src_sel_y = ctx->src[i].swizzle[1]; + tex.src_sel_z = ctx->src[i].swizzle[2]; + tex.src_sel_w = ctx->src[i].swizzle[3]; + tex.src_rel = ctx->src[i].rel; + } + tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ + tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; + } + r = r600_bc_add_tex(ctx->bc, &tex); + if (r) + return r; + } + } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { int out_chan; /* Add perspective divide */ if (ctx->bc->chiprev == CHIPREV_CAYMAN) { @@ -1954,13 +2082,24 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } opcode = ctx->inst_info->r600_opcode; - if (opcode == SQ_TEX_INST_SAMPLE && - (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) - opcode = SQ_TEX_INST_SAMPLE_C; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) { + switch (opcode) { + case SQ_TEX_INST_SAMPLE: + opcode = SQ_TEX_INST_SAMPLE_C; + break; + case SQ_TEX_INST_SAMPLE_L: + opcode = SQ_TEX_INST_SAMPLE_C_L; + break; + case SQ_TEX_INST_SAMPLE_G: + opcode = SQ_TEX_INST_SAMPLE_C_G; + break; + } + } memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; @@ -3085,7 +3224,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3191,7 +3330,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, @@ -3243,7 +3382,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -3401,7 +3540,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 8f96ce5085c..76aebf2b1ea 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -46,6 +46,8 @@ struct r600_shader { enum radeon_family family; boolean uses_kill; boolean fs_write_all; + boolean clamp_color; + unsigned nr_cbufs; }; #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a6cfa704ca5..7c1976f12e0 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -299,6 +299,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx, } rstate = &rs->rstate; + rs->clamp_vertex_color = state->clamp_vertex_color; + rs->clamp_fragment_color = state->clamp_fragment_color; rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; @@ -374,14 +376,17 @@ static void *r600_create_rs_state(struct pipe_context *ctx, static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { - struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + struct r600_pipe_sampler_state *ss = CALLOC_STRUCT(r600_pipe_sampler_state); + struct r600_pipe_state *rstate; union util_color uc; unsigned aniso_flag_offset = state->max_anisotropy > 1 ? 4 : 0; - if (rstate == NULL) { + if (ss == NULL) { return NULL; } + ss->seamless_cube_map = state->seamless_cube_map; + rstate = &ss->rstate; rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, @@ -412,7 +417,6 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_resource_state *rstate; const struct util_format_description *desc; @@ -422,7 +426,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c uint32_t word4 = 0, yuv_format = 0, pitch = 0; unsigned char swizzle[4], array_mode = 0, tile_type = 0; struct r600_bo *bo[2]; - unsigned height, depth; + unsigned width, height, depth, offset_level, last_level; if (resource == NULL) return NULL; @@ -448,7 +452,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c } desc = util_format_description(state->format); if (desc == NULL) { - R600_ERR("unknow format %d\n", state->format); + R600_ERR("unknown format %d\n", state->format); } tmp = (struct r600_resource_texture *)texture; if (tmp->depth && !tmp->is_flushing_texture) { @@ -464,12 +468,18 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c rbuffer = &tmp->resource; bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; - pitch = align(tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format), 8); - array_mode = tmp->array_mode[0]; + + offset_level = state->u.tex.first_level; + last_level = state->u.tex.last_level - offset_level; + width = u_minify(texture->width0, offset_level); + height = u_minify(texture->height0, offset_level); + depth = u_minify(texture->depth0, offset_level); + + pitch = align(tmp->pitch_in_blocks[offset_level] * + util_format_get_blockwidth(state->format), 8); + array_mode = tmp->array_mode[offset_level]; tile_type = tmp->tile_type; - height = texture->height0; - depth = texture->depth0; if (texture->target == PIPE_TEXTURE_1D_ARRAY) { height = 1; depth = texture->array_size; @@ -484,18 +494,18 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c S_038000_TILE_MODE(array_mode) | S_038000_TILE_TYPE(tile_type) | S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1)); + S_038000_TEX_WIDTH(width - 1)); rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; - rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[2] = (tmp->offset[offset_level] + r600_bo_offset(bo[0])) >> 8; + rstate->val[3] = (tmp->offset[offset_level+1] + r600_bo_offset(bo[1])) >> 8; rstate->val[4] = (word4 | S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_038010_REQUEST_SIZE(1) | S_038010_ENDIAN_SWAP(endian) | - S_038010_BASE_LEVEL(state->u.tex.first_level)); - rstate->val[5] = (S_038014_LAST_LEVEL(state->u.tex.last_level) | + S_038010_BASE_LEVEL(0)); + rstate->val[5] = (S_038014_LAST_LEVEL(last_level) | S_038014_BASE_ARRAY(state->u.tex.first_layer) | S_038014_LAST_ARRAY(state->u.tex.last_layer)); rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | @@ -524,13 +534,16 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; int i; + int has_depth = 0; for (i = 0; i < count; i++) { if (&rctx->ps_samplers.views[i]->base != views[i]) { - if (resource[i]) + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i + R600_MAX_CONST_BUFFERS); - else + } else r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i + R600_MAX_CONST_BUFFERS); @@ -538,6 +551,11 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], views[i]); + } else { + if (resource[i]) { + if (((struct r600_resource_texture *)resource[i]->base.texture)->depth) + has_depth = 1; + } } } for (i = count; i < NUM_TEX_UNITS; i++) { @@ -547,30 +565,61 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); } } + rctx->have_depth_texture = has_depth; rctx->ps_samplers.n_views = count; } +static void r600_set_seamless_cubemap(struct r600_pipe_context *rctx, boolean enable) +{ + struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); + if (rstate == NULL) + return; + + rstate->id = R600_PIPE_STATE_SEAMLESS_CUBEMAP; + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + (enable ? 0 : S_009508_DISABLE_CUBE_WRAP(1)), + 1, NULL); + + free(rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP]); + rctx->states[R600_PIPE_STATE_SEAMLESS_CUBEMAP] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states; + int seamless = -1; memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); + r600_context_pipe_state_set_ps_sampler(&rctx->ctx, &sstates[i]->rstate, i); + + if (sstates[i]) + seamless = sstates[i]->seamless_cube_map; } + + if (seamless != -1) + r600_set_seamless_cubemap(rctx, seamless); } static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + struct r600_pipe_sampler_state **sstates = (struct r600_pipe_sampler_state **)states; + int seamless = -1; for (int i = 0; i < count; i++) { - r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); + r600_context_pipe_state_set_vs_sampler(&rctx->ctx, &sstates[i]->rstate, i); + + if (sstates[i]) + seamless = sstates[i]->seamless_cube_map; } + + if (seamless != -1) + r600_set_seamless_cubemap(rctx, seamless); } static void r600_set_clip_state(struct pipe_context *ctx, @@ -730,6 +779,9 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; + if (rtex->depth) + rctx->have_depth_fb = TRUE; + if (rtex->depth && !rtex->is_flushing_texture) { r600_texture_depth_flush(&rctx->context, state->cbufs[cb]->texture, TRUE); rtex = rtex->flushed_depth_texture; @@ -892,6 +944,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&rctx->framebuffer, state); /* build states */ + rctx->have_depth_fb = 0; for (int i = 0; i < state->nr_cbufs; i++) { r600_cb(rctx, rstate, state, i); } @@ -1031,6 +1084,46 @@ void r600_init_state_functions(struct r600_pipe_context *rctx) rctx->context.texture_barrier = r600_texture_barrier; } +void r600_adjust_gprs(struct r600_pipe_context *rctx) +{ + enum radeon_family family; + struct r600_pipe_state rstate; + unsigned num_ps_gprs = rctx->default_ps_gprs; + unsigned num_vs_gprs = rctx->default_vs_gprs; + unsigned tmp; + int diff; + + family = r600_get_family(rctx->radeon); + + if (family >= CHIP_CEDAR) + return; + + if (!rctx->ps_shader && !rctx->vs_shader) + return; + + if (rctx->ps_shader->shader.bc.ngpr > rctx->default_ps_gprs) + { + diff = rctx->ps_shader->shader.bc.ngpr - rctx->default_ps_gprs; + num_vs_gprs -= diff; + num_ps_gprs += diff; + } + + if (rctx->vs_shader->shader.bc.ngpr > rctx->default_vs_gprs) + { + diff = rctx->vs_shader->shader.bc.ngpr - rctx->default_vs_gprs; + num_ps_gprs -= diff; + num_vs_gprs += diff; + } + + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rstate.nregs = 0; + r600_pipe_state_add_reg(&rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0x0FFFFFFF, NULL); + + r600_context_pipe_state_set(&rctx->ctx, &rstate); +} + void r600_init_config(struct r600_pipe_context *rctx) { int ps_prio; @@ -1173,6 +1266,9 @@ void r600_init_config(struct r600_pipe_context *rctx) break; } + rctx->default_ps_gprs = num_ps_gprs; + rctx->default_vs_gprs = num_vs_gprs; + rstate->id = R600_PIPE_STATE_CONFIG; /* SQ_CONFIG */ @@ -1206,7 +1302,7 @@ void r600_init_config(struct r600_pipe_context *rctx) /* SQ_GPR_RESOURCE_MGMT_2 */ tmp = 0; tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_GS_GPRS(num_es_gprs); + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); /* SQ_THREAD_RESOURCE_MGMT */ @@ -1234,14 +1330,22 @@ void r600_init_config(struct r600_pipe_context *rctx) if (family >= CHIP_RV770) { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00004000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000002, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + S_009508_DISABLE_CUBE_ANISO(1) | + S_009508_SYNC_GRADIENT(1) | + S_009508_SYNC_WALKER(1) | + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); } else { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, + S_009508_DISABLE_CUBE_ANISO(1) | + S_009508_SYNC_GRADIENT(1) | + S_009508_SYNC_WALKER(1) | + S_009508_SYNC_ALIGNER(1), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a670ac02be2..d9140403e5a 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -28,6 +28,7 @@ #include <util/u_format.h> #include <pipebuffer/pb_buffer.h> #include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" #include "r600_formats.h" #include "r600_pipe.h" #include "r600d.h" @@ -99,6 +100,8 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) if (state == NULL) return; + rctx->clamp_vertex_color = rs->clamp_vertex_color; + rctx->clamp_fragment_color = rs->clamp_fragment_color; rctx->flatshade = rs->flatshade; rctx->sprite_coord_enable = rs->sprite_coord_enable; rctx->rasterizer = rs; @@ -112,7 +115,7 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) r600_polygon_offset_update(rctx); } if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + rctx->spi_dirty = true; } void r600_delete_rs_state(struct pipe_context *ctx, void *state) @@ -257,7 +260,9 @@ void *r600_create_shader_state(struct pipe_context *ctx, struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); int r; - r = r600_pipe_shader_create(ctx, shader, state->tokens); + shader->tokens = tgsi_dup_tokens(state->tokens); + + r = r600_pipe_shader_create(ctx, shader); if (r) { return NULL; } @@ -273,8 +278,10 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate); } - if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + if (rctx->ps_shader && rctx->vs_shader) { + rctx->spi_dirty = true; + r600_adjust_gprs(rctx); + } } void r600_bind_vs_shader(struct pipe_context *ctx, void *state) @@ -286,8 +293,10 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate); } - if (rctx->ps_shader && rctx->vs_shader) - r600_spi_update(rctx); + if (rctx->ps_shader && rctx->vs_shader) { + rctx->spi_dirty = true; + r600_adjust_gprs(rctx); + } } void r600_delete_ps_shader(struct pipe_context *ctx, void *state) @@ -299,6 +308,7 @@ void r600_delete_ps_shader(struct pipe_context *ctx, void *state) rctx->ps_shader = NULL; } + free(shader->tokens); r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -312,6 +322,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) rctx->vs_shader = NULL; } + free(shader->tokens); r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -347,14 +358,23 @@ static void r600_spi_update(struct r600_pipe_context *rctx) struct r600_pipe_shader *shader = rctx->ps_shader; struct r600_pipe_state *rstate = &rctx->spi; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp; + unsigned i, tmp, sid; if (rctx->spi.id == 0) r600_spi_block_init(rctx, &rctx->spi); rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { - tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION || + rshader->input[i].name == TGSI_SEMANTIC_FACE) + if (rctx->family >= CHIP_CEDAR) + continue; + else + sid=0; + else + sid=r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i); + + tmp = S_028644_SEMANTIC(sid); if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || @@ -378,6 +398,7 @@ static void r600_spi_update(struct r600_pipe_context *rctx) r600_pipe_state_mod_reg(rstate, tmp); } + rctx->spi_dirty = false; r600_context_pipe_state_set(&rctx->ctx, rstate); } @@ -517,21 +538,39 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } } +static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + int r; + + r600_pipe_shader_destroy(ctx, shader); + r = r600_pipe_shader_create(ctx, shader); + if (r) { + return r; + } + r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); + + return 0; +} + void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer; - u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask; struct r600_draw rdraw; - struct r600_drawl draw = {}; - unsigned prim; + struct r600_drawl draw; + unsigned prim, mask; - r600_flush_depth_textures(rctx); - u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info, NULL, NULL); + if (!rctx->blit) { + if (rctx->have_depth_fb || rctx->have_depth_texture) + r600_flush_depth_textures(rctx); + } + u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info); r600_vertex_buffer_update(rctx); draw.info = *info; draw.ctx = ctx; + draw.index_buffer = NULL; if (info->indexed && rctx->index_buffer.buffer) { draw.info.start += rctx->index_buffer.offset / rctx->index_buffer.index_size; pipe_resource_reference(&draw.index_buffer, rctx->index_buffer.buffer); @@ -549,57 +588,29 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_upload_index_buffer(rctx, &draw); } } else { + draw.index_size = 0; + draw.index_buffer_offset = 0; draw.info.index_bias = info->start; } - vgt_dma_swap_mode = 0; - switch (draw.index_size) { - case 2: - vgt_draw_initiator = 0; - vgt_dma_index_type = 0; - if (R600_BIG_ENDIAN) { - vgt_dma_swap_mode = ENDIAN_8IN16; - } - break; - case 4: - vgt_draw_initiator = 0; - vgt_dma_index_type = 1; - if (R600_BIG_ENDIAN) { - vgt_dma_swap_mode = ENDIAN_8IN32; - } - break; - case 0: - vgt_draw_initiator = 2; - vgt_dma_index_type = 0; - break; - default: - R600_ERR("unsupported index size %d\n", draw.index_size); - return; - } if (r600_conv_pipe_prim(draw.info.mode, &prim)) return; - if (unlikely(rctx->ps_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - if (unlikely(rctx->vs_shader == NULL)) { - R600_ERR("missing vertex shader\n"); - return; - } - /* there should be enough input */ - if (rctx->vertex_elements->count < rctx->vs_shader->shader.bc.nresource) { - R600_ERR("%d resources provided, expecting %d\n", - rctx->vertex_elements->count, rctx->vs_shader->shader.bc.nresource); - return; - } + + if (rctx->vs_shader->shader.clamp_color != rctx->clamp_vertex_color) + r600_shader_rebuild(ctx, rctx->vs_shader); + + if ((rctx->ps_shader->shader.clamp_color != rctx->clamp_fragment_color) || + ((rctx->family >= CHIP_CEDAR) && rctx->ps_shader->shader.fs_write_all && + (rctx->ps_shader->shader.nr_cbufs != rctx->nr_cbufs))) + r600_shader_rebuild(ctx, rctx->ps_shader); + + if (rctx->spi_dirty) + r600_spi_update(rctx); if (rctx->alpha_ref_dirty) r600_update_alpha_ref(rctx); - mask = 0; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - mask |= (0xF << (i * 4)); - } + mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1; if (rctx->vgt.id != R600_PIPE_STATE_VGT) { rctx->vgt.id = R600_PIPE_STATE_VGT; @@ -633,8 +644,10 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = draw.info.instance_count; - rdraw.vgt_index_type = vgt_dma_index_type | (vgt_dma_swap_mode << 2); - rdraw.vgt_draw_initiator = vgt_draw_initiator; + rdraw.vgt_index_type = ((draw.index_size == 4) ? 1 : 0); + if (R600_BIG_ENDIAN) + rdraw.vgt_index_type |= (draw.index_size >> 1) << 2; + rdraw.vgt_draw_initiator = draw.index_size ? 0 : 2; rdraw.indices = NULL; if (draw.index_buffer) { rbuffer = (struct r600_resource*)draw.index_buffer; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index acd41a21214..8711dbf1720 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -284,7 +284,7 @@ static inline uint32_t r600_translate_dbformat(enum pipe_format format) static inline uint32_t r600_translate_colorswap(enum pipe_format format) { switch (format) { - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: return V_0280A0_SWAP_ALT_REV; case PIPE_FORMAT_I8_UNORM: @@ -297,7 +297,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_L4A4_UNORM: return V_0280A0_SWAP_ALT; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_SWAP_STD_REV; @@ -320,9 +320,10 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R16_UNORM: case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16_FLOAT: return V_0280A0_SWAP_STD; - /* 32-bit buffers. */ + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: return V_0280A0_SWAP_STD_REV; @@ -368,13 +369,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) case PIPE_FORMAT_R32_FLOAT: return V_0280A0_SWAP_STD; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_FLOAT: - /* 128-bit buffers. */ + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: @@ -392,7 +393,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_L4A4_UNORM: return V_0280A0_COLOR_4_4; - /* 8-bit buffers. */ + /* 8-bit buffers. */ case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: @@ -401,7 +402,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R8_SNORM: return V_0280A0_COLOR_8; - /* 16-bit buffers. */ + /* 16-bit buffers. */ case PIPE_FORMAT_B5G6R5_UNORM: return V_0280A0_COLOR_5_6_5; @@ -425,7 +426,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R16_SNORM: return V_0280A0_COLOR_16; - /* 32-bit buffers. */ + case PIPE_FORMAT_R16_FLOAT: + return V_0280A0_COLOR_16_FLOAT; + + /* 32-bit buffers. */ case PIPE_FORMAT_A8B8G8R8_SRGB: case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -468,7 +472,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R11G11B10_FLOAT: return V_0280A0_COLOR_10_11_11_FLOAT; - /* 64-bit buffers. */ + /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16_USCALED: case PIPE_FORMAT_R16G16B16A16_USCALED: case PIPE_FORMAT_R16G16B16_SSCALED: @@ -488,20 +492,21 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R32G32_SSCALED: return V_0280A0_COLOR_32_32; - /* 128-bit buffers. */ + /* 96-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: return V_0280A0_COLOR_32_32_32_FLOAT; + + /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_SNORM: case PIPE_FORMAT_R32G32B32A32_UNORM: return V_0280A0_COLOR_32_32_32_32; - /* YUV buffers. */ + /* YUV buffers. */ case PIPE_FORMAT_UYVY: case PIPE_FORMAT_YUYV: default: - /* R600_ERR("unsupported color format %d %s\n", format, util_format_name(format)); */ return ~0; /* Unsupported. */ } } @@ -513,11 +518,11 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_4_4: return(ENDIAN_NONE); - /* 8-bit buffers. */ + /* 8-bit buffers. */ case V_0280A0_COLOR_8: return(ENDIAN_NONE); - /* 16-bit buffers. */ + /* 16-bit buffers. */ case V_0280A0_COLOR_5_6_5: case V_0280A0_COLOR_1_5_5_5: case V_0280A0_COLOR_4_4_4_4: @@ -525,7 +530,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_8_8: return(ENDIAN_8IN16); - /* 32-bit buffers. */ + /* 32-bit buffers. */ case V_0280A0_COLOR_8_8_8_8: case V_0280A0_COLOR_2_10_10_10: case V_0280A0_COLOR_8_24: @@ -535,7 +540,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_16_16: return(ENDIAN_8IN32); - /* 64-bit buffers. */ + /* 64-bit buffers. */ case V_0280A0_COLOR_16_16_16_16: case V_0280A0_COLOR_16_16_16_16_FLOAT: return(ENDIAN_8IN16); @@ -544,7 +549,7 @@ static INLINE uint32_t r600_colorformat_endian_swap(uint32_t colorformat) case V_0280A0_COLOR_32_32: return(ENDIAN_8IN32); - /* 128-bit buffers. */ + /* 128-bit buffers. */ case V_0280A0_COLOR_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32_FLOAT: case V_0280A0_COLOR_32_32_32_32: @@ -565,7 +570,7 @@ static INLINE boolean r600_is_sampler_format_supported(struct pipe_screen *scree static INLINE boolean r600_is_colorbuffer_format_supported(enum pipe_format format) { return r600_translate_colorformat(format) != ~0 && - r600_translate_colorswap(format) != ~0; + r600_translate_colorswap(format) != ~0; } static INLINE boolean r600_is_zs_format_supported(enum pipe_format format) diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 77cdd8dc33d..854761d17cb 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -243,10 +243,11 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); unsigned size, layer_size, i, offset; - unsigned nblocksx, nblocksy; + unsigned nblocksx, nblocksy, extra_size = 0; for (i = 0, offset = 0; i <= ptex->last_level; i++) { unsigned blocksize = util_format_get_blocksize(ptex->format); + unsigned base_align = r600_get_base_alignment(screen, ptex->format, array_mode); r600_texture_set_array_mode(screen, rtex, i, array_mode); @@ -265,9 +266,13 @@ static void r600_setup_miptree(struct pipe_screen *screen, else size = layer_size * ptex->array_size; + /* evergreen stores depth and stencil separately */ + if ((chipc >= EVERGREEN) && util_format_is_depth_or_stencil(ptex->format)) + extra_size = align(extra_size + (nblocksx * nblocksy * 1), base_align); + /* align base image and start of miptree */ if ((i == 0) || (i == 1)) - offset = align(offset, r600_get_base_alignment(screen, ptex->format, array_mode)); + offset = align(offset, base_align); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch_in_blocks[i] = nblocksx; /* CB talks in elements */ @@ -275,7 +280,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, offset += size; } - rtex->size = offset; + rtex->size = offset + extra_size; } /* Figure out whether u_blitter will fallback to a transfer operation. @@ -1091,8 +1096,9 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, goto out_word4; } } - + goto out_unknown; } + out_word4: if (word4_p) *word4_p = word4; diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c index 7482d15e12f..307fd57e21a 100644 --- a/src/gallium/drivers/r600/r600_translate.c +++ b/src/gallium/drivers/r600/r600_translate.c @@ -48,6 +48,7 @@ void r600_translate_index_buffer(struct r600_pipe_context *r600, &r600->context, *index_buffer, 0, *start, count, ptr); pipe_resource_reference(index_buffer, out_buffer); + pipe_resource_reference(&out_buffer, NULL); *index_size = 2; *start = out_offset / 2; break; diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 9281b08bd82..f6eec24cc05 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -2556,6 +2556,9 @@ #define S_009508_DISABLE_CUBE_WRAP(x) (((x) & 0x1) << 0) #define G_009508_DISABLE_CUBE_WRAP(x) (((x) >> 0) & 0x1) #define C_009508_DISABLE_CUBE_WRAP 0xFFFFFFFE +#define S_009508_DISABLE_CUBE_ANISO(x) (((x) & 0x1) << 1) +#define G_009508_DISABLE_CUBE_ANISO(x) (((x) >> 1) & 0x1) +#define C_009508_DISABLE_CUBE_ANISO (~(1 << 1)) #define S_009508_SYNC_GRADIENT(x) (((x) & 0x1) << 24) #define G_009508_SYNC_GRADIENT(x) (((x) >> 24) & 0x1) #define C_009508_SYNC_GRADIENT 0xFEFFFFFF @@ -3465,9 +3468,14 @@ #define SQ_TEX_INST_LD 0x03 #define SQ_TEX_INST_GET_GRADIENTS_H 0x7 #define SQ_TEX_INST_GET_GRADIENTS_V 0x8 +#define SQ_TEX_INST_SET_GRADIENTS_H 0xB +#define SQ_TEX_INST_SET_GRADIENTS_V 0xC #define SQ_TEX_INST_SAMPLE 0x10 #define SQ_TEX_INST_SAMPLE_L 0x11 +#define SQ_TEX_INST_SAMPLE_G 0x14 #define SQ_TEX_INST_SAMPLE_C 0x18 +#define SQ_TEX_INST_SAMPLE_C_L 0x19 +#define SQ_TEX_INST_SAMPLE_C_G 0x1C #endif |