diff options
author | Andre Maasikas <[email protected]> | 2010-01-15 21:23:31 -0500 |
---|---|---|
committer | Alex Deucher <[email protected]> | 2010-01-15 23:06:05 -0500 |
commit | 7283a246ce659e69ea9931098a8076848be4b9da (patch) | |
tree | 3ac0ad368e9faee63ae46f60450b43c91dd00fee | |
parent | ec8ce40083364a97af1dc8ff27868140db83ee48 (diff) |
r600: add initial blit support
-rw-r--r-- | src/mesa/drivers/dri/r600/Makefile | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_blit.c | 797 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_blit.h | 21 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_blit_shaders.h | 28 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_texcopy.c | 169 |
7 files changed, 701 insertions, 323 deletions
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 26f47b72687..b90efce17a1 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -59,6 +59,8 @@ DRIVER_SOURCES = \ r700_render.c \ r600_tex.c \ r600_texstate.c \ + r600_blit.c \ + r600_texcopy.c \ r700_debug.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c index fbca0889173..6ea1a073701 100644 --- a/src/mesa/drivers/dri/r600/r600_blit.c +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -29,22 +29,85 @@ #include "r600_context.h" #include "r600_blit.h" +#include "r600_blit_shaders.h" #include "r600_cmdbuf.h" +static uint32_t mesa_format_to_cb_format(gl_format mesa_format) +{ + uint32_t comp_swap, format, color_info = 0; + //fprintf(stderr,"format for copy %s\n",_mesa_get_format_name(mesa_format)); + switch(mesa_format) { + /* XXX check and add these */ + case MESA_FORMAT_RGBA8888: + comp_swap = SWAP_STD_REV; + format = FMT_8_8_8_8; + break; + case MESA_FORMAT_RGBA8888_REV: + comp_swap = SWAP_STD; + format = FMT_8_8_8_8; + break; + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + comp_swap = SWAP_ALT; + format = FMT_8_8_8_8; + break; + case MESA_FORMAT_ARGB8888_REV: + comp_swap = SWAP_ALT_REV; + format = FMT_8_8_8_8; + break; + case MESA_FORMAT_RGB565: + comp_swap = SWAP_ALT_REV; + format = FMT_5_6_5; + break; + case MESA_FORMAT_ARGB1555: + comp_swap = SWAP_ALT_REV; + format = FMT_1_5_5_5; + break; + case MESA_FORMAT_ARGB4444: + comp_swap = SWAP_ALT; + format = FMT_4_4_4_4; + break; + case MESA_FORMAT_S8_Z24: + format = FMT_8_24; + comp_swap = SWAP_STD; + break; + default: + fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format)); + assert("Invalid format for US output\n"); + return 0; + } + + SETfield(color_info, format, CB_COLOR0_INFO__FORMAT_shift, + CB_COLOR0_INFO__FORMAT_mask); + SETfield(color_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask); + + return color_info; + +} + static inline void -set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) +set_render_target(context_t *context, struct radeon_bo *bo, gl_format format, + int pitch, int bpp, int w, int h, intptr_t dst_offset) { - int id = 0; + uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view; + int nPitchInPixel, id = 0; + BATCH_LOCALS(&context->radeon); + + cb_color0_base = dst_offset / 256; + cb_color0_view = 0; - nPitchInPixel = rrb->pitch/rrb->cpp; + cb_color0_info = mesa_format_to_cb_format(format); + + nPitchInPixel = pitch/bpp; SETfield(cb_color0_size, (nPitchInPixel / 8) - 1, PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask); - SETfield(cb_color0_size, ((nPitchInPixel * height) / 64) - 1, + SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1, SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask); SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL, - cb_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); - if(4 == rrb->cpp) + CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); +/* + if(4 == bpp) { SETfield(cb_color0_info, COLOR_8_8_8_8, CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask); @@ -57,13 +120,15 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 SETfield(cb_color0_info, SWAP_ALT_REV, COMP_SWAP_shift, COMP_SWAP_mask); } +*/ SETbit(cb_color0_info, SOURCE_FORMAT_bit); - SETbit(cb_color0_info, BLEND_CLAMP_bit); + //SETbit(cb_color0_info, BLEND_CLAMP_bit); + SETbit(cb_color0_info, BLEND_BYPASS_bit); SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1); - R600_OUT_BATCH(0); + R600_OUT_BATCH(cb_color0_base); R600_OUT_BATCH_RELOC(0, bo, 0, @@ -91,68 +156,116 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 } +static inline void load_shaders(GLcontext * ctx) +{ + + radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); + context_t *context = R700_CONTEXT(ctx); + int i, size; + uint32_t *shader; + + if (context->blit_bo_loaded == 1) + return; + + size = 4096; + context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0, + size, 256, RADEON_GEM_DOMAIN_GTT, 0); + radeon_bo_map(context->blit_bo, 1); + shader = context->blit_bo->ptr; + + for(i=0; i<sizeof(r6xx_vs)/4; i++) { + shader[128+i] = r6xx_vs[i]; + } + for(i=0; i<sizeof(r6xx_ps)/4; i++) { + shader[256+i] = r6xx_ps[i]; + } + + radeon_bo_unmap(context->blit_bo); + context->blit_bo_loaded = 1; + +} + static inline void -set_shaders(struct drm_device *dev) +set_shaders(context_t *context) { - /* FS */ - pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - r700->fs.SQ_PGM_START_FS.u32All = r700->vs.SQ_PGM_START_VS.u32All; - r700->fs.SQ_PGM_RESOURCES_FS.u32All = 0; - r700->fs.SQ_PGM_CF_OFFSET_FS.u32All = 0; - /* XXX */ + struct radeon_bo * pbo = context->blit_bo; + BATCH_LOCALS(&context->radeon); + + uint32_t sq_pgm_start_fs = (512 >> 8); + uint32_t sq_pgm_resources_fs = 0; + uint32_t sq_pgm_cf_offset_fs = 0; + + uint32_t sq_pgm_start_vs = (512 >> 8); + uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_vs = 0; + + uint32_t sq_pgm_start_ps = (1024 >> 8); + uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift); + uint32_t sq_pgm_cf_offset_ps = 0; + uint32_t sq_pgm_exports_ps = (1 << 1); - if (!pbo) - return; + radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + pbo, + RADEON_GEM_DOMAIN_GTT, 0); r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + /* FS */ + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1); - R600_OUT_BATCH(r700->fs.SQ_PGM_START_FS.u32All); - R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All, + R600_OUT_BATCH(sq_pgm_start_fs); + R600_OUT_BATCH_RELOC(sq_pgm_start_fs, pbo, - r700->fs.SQ_PGM_START_FS.u32All, + sq_pgm_start_fs, RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, r700->fs.SQ_PGM_RESOURCES_FS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, r700->fs.SQ_PGM_CF_OFFSET_FS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs); END_BATCH(); /* VS */ - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); - R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); - R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, + R600_OUT_BATCH(sq_pgm_start_vs); + R600_OUT_BATCH_RELOC(sq_pgm_start_vs, pbo, - r700->vs.SQ_PGM_START_VS.u32All, + sq_pgm_start_vs, RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs); END_BATCH(); /* PS */ - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); - R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); - R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, + R600_OUT_BATCH(sq_pgm_start_ps); + R600_OUT_BATCH_RELOC(sq_pgm_start_ps, pbo, - r700->ps.SQ_PGM_START_PS.u32All, + sq_pgm_start_ps, RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps); + END_BATCH(); + + BEGIN_BATCH_NO_AUTOSTATE(18); + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1 + R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0); + R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift)); + R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0); + R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0); END_BATCH(); COMMIT_BATCH(); @@ -160,8 +273,10 @@ set_shaders(struct drm_device *dev) } static inline void -set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) +set_vtx_resource(context_t *context) { + struct radeon_bo *bo = context->blit_bo; + BATCH_LOCALS(&context->radeon); BEGIN_BATCH_NO_AUTOSTATE(6); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); @@ -194,49 +309,109 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) R600_OUT_BATCH(0); R600_OUT_BATCH(0); R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift); - R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, - paos->bo, - uSQ_VTX_CONSTANT_WORD0_0, + R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0, + bo, + SQ_VTX_CONSTANT_WORD0_0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); COMMIT_BATCH(); + radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + bo, + RADEON_GEM_DOMAIN_GTT, 0); + } -static inline void -set_tex_resource(drm_radeon_private_t *dev_priv, - int format, int w, int h, int pitch, u64 gpu_addr) +static void +set_tex_resource(context_t * context, + gl_format format, struct radeon_bo *bo, int w, int h, + int pitch, intptr_t src_offset) { - uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; - RING_LOCALS; - DRM_DEBUG("\n"); - - r700SyncSurf(context, bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); - - BEGIN_BATCH_NO_AUTOSTATE(9 + 4); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); - - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, - bo, - offset, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, - bo, - r700->textures[i]->SQ_TEX_RESOURCE3, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - COMMIT_BATCH(); + uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6; + int bpp = _mesa_get_format_bytes(format); + int TexelPitch = pitch/bpp; + // int tex_format = mesa_format_to_us_format(format); + + sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0; + BATCH_LOCALS(&context->radeon); + + SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask); + SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, + SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask); + + if (bpp == 4) { + SETfield(sq_tex_resource1, FMT_8_8_8_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + } else { + SETfield(sq_tex_resource1, FMT_5_6_5, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + SETfield(sq_tex_resource4, SQ_SEL_Z, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); + SETfield(sq_tex_resource4, SQ_SEL_Y, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_1, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + + } + + SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask); + SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask); + SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); + + sq_tex_resource2 = src_offset / 256; + + SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, + SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(0 * 7); + + R600_OUT_BATCH(sq_tex_resource0); + R600_OUT_BATCH(sq_tex_resource1); + R600_OUT_BATCH(sq_tex_resource2); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3 + R600_OUT_BATCH(sq_tex_resource4); + R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5 + R600_OUT_BATCH(sq_tex_resource6); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +/* if (h < 1) h = 1; @@ -253,251 +428,203 @@ set_tex_resource(drm_radeon_private_t *dev_priv, (2 << 22) | (3 << 25)); - BEGIN_RING(9); - OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - OUT_RING(0); - OUT_RING(sq_tex_resource_word0); - OUT_RING(sq_tex_resource_word1); - OUT_RING(gpu_addr >> 8); - OUT_RING(gpu_addr >> 8); - OUT_RING(sq_tex_resource_word4); - OUT_RING(0); - OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30); - ADVANCE_RING(); +*/ +} + +static void +set_tex_sampler(context_t * context) +{ + uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0; + int i = 0; + + SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit); + + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(sq_tex_sampler_word0); + R600_OUT_BATCH(sq_tex_sampler_word1); + R600_OUT_BATCH(sq_tex_sampler_word2); + END_BATCH(); } static inline void -set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) +set_scissors(context_t *context, int x1, int y1, int x2, int y2) { - RING_LOCALS; - DRM_DEBUG("\n"); - - BEGIN_BATCH_NO_AUTOSTATE(22); - R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); - R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All); - - R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 12); - R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All); - R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All); - - R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); - R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All); - R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All); - END_BATCH(); - COMMIT_BATCH(); - - - BEGIN_RING(12); - OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); - OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); - OUT_RING((x1 << 0) | (y1 << 16)); - OUT_RING((x2 << 0) | (y2 << 16)); - - OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); - OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); - OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); - OUT_RING((x2 << 0) | (y2 << 16)); - - OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2)); - OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2); - OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31)); - OUT_RING((x2 << 0) | (y2 << 16)); - ADVANCE_RING(); + + int i; + uint32_t clip_tl = 0, clip_br = 0; + + SETfield(clip_tl, 0, PA_SC_CLIPRECT_0_TL__TL_X_shift, + PA_SC_CLIPRECT_0_TL__TL_X_mask); + SETfield(clip_tl, 0, PA_SC_CLIPRECT_0_TL__TL_Y_shift, + PA_SC_CLIPRECT_0_TL__TL_Y_mask); + SETfield(clip_br, 8191, PA_SC_CLIPRECT_0_BR__BR_X_shift, + PA_SC_CLIPRECT_0_BR__BR_X_mask); + SETfield(clip_br, 8191, PA_SC_CLIPRECT_0_BR__BR_Y_shift, + PA_SC_CLIPRECT_0_BR__BR_Y_mask); + + BATCH_LOCALS(&context->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(13); + R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + + R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3); + R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + +/* clip disabled ? + R600_OUT_BATCH(CLIP_RULE_mask); //PA_SC_CLIPRECT_RULE); + R600_OUT_BATCH(clip_tl); //PA_SC_CLIPRECT_0_TL + R600_OUT_BATCH(clip_br); //PA_SC_CLIPRECT_0_BR + R600_OUT_BATCH(clip_tl); // 1 + R600_OUT_BATCH(clip_br); + R600_OUT_BATCH(clip_tl); // 2 + R600_OUT_BATCH(clip_br); + R600_OUT_BATCH(clip_tl); // 3 + R600_OUT_BATCH(clip_br); +*/ + + R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + END_BATCH(); + + /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */ + BEGIN_BATCH_NO_AUTOSTATE(4); + R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 ); + R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); + R600_OUT_BATCH((x2 << 0) | (y2 << 16)); + END_BATCH(); + +/* + BEGIN_BATCH_NO_AUTOSTATE(2 + 2 * PA_SC_VPORT_ZMIN_0_num); + R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_ZMIN_0, 2 * PA_SC_VPORT_ZMIN_0_num); + for(i = 0 i < PA_SC_VPORT_ZMIN_0_num; i++) + R600_OUT_BATCH(radeonPackFloat32(0.0F)); + R600_OUT_BATCH(radeonPackFloat32(1.0F)); + } + END_BATCH(); +*/ + COMMIT_BATCH(); + } -static inline void -draw_auto(drm_radeon_private_t *dev_priv) +static void +set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y, + int w, int h, int src_h, unsigned flip_y) { - RING_LOCALS; - DRM_DEBUG("\n"); + float *vb; + radeon_bo_map(context->blit_bo, 1); + vb = context->blit_bo->ptr; - BEGIN_RING(10); - OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2); - OUT_RING(DI_PT_RECTLIST); + vb[0] = (float)(dst_x); + vb[1] = (float)(dst_y); + vb[2] = (float)(src_x); + vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y; - OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - OUT_RING(DI_INDEX_SIZE_16_BIT); + vb[4] = (float)(dst_x); + vb[5] = (float)(dst_y + h); + vb[6] = (float)(src_x); + vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); - OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - OUT_RING(1); + vb[8] = (float)(dst_x + w); + vb[9] = (float)(dst_y + h); + vb[10] = (float)(src_x + w); + vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h); - OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); - OUT_RING(3); - OUT_RING(DI_SRC_SEL_AUTO_INDEX); + radeon_bo_unmap(context->blit_bo); - ADVANCE_RING(); - COMMIT_RING(); } static inline void -set_default_state(drm_radeon_private_t *dev_priv) +draw_auto(context_t *context) { - int i; - u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2; - u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2; - int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs; - int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; - int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; - RING_LOCALS; - - switch ((dev_priv->flags & RADEON_FAMILY_MASK)) { - case CHIP_R600: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV630: - case CHIP_RV635: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 40; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV610: - case CHIP_RV620: - case CHIP_RS780: - case CHIP_RS880: - default: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV670: - num_ps_gprs = 144; - num_vs_gprs = 40; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 136; - num_vs_threads = 48; - num_gs_threads = 4; - num_es_threads = 4; - num_ps_stack_entries = 40; - num_vs_stack_entries = 40; - num_gs_stack_entries = 32; - num_es_stack_entries = 16; - break; - case CHIP_RV770: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 256; - num_vs_stack_entries = 256; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV730: - case CHIP_RV740: - num_ps_gprs = 84; - num_vs_gprs = 36; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 188; - num_vs_threads = 60; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - case CHIP_RV710: - num_ps_gprs = 192; - num_vs_gprs = 56; - num_temp_gprs = 4; - num_gs_gprs = 0; - num_es_gprs = 0; - num_ps_threads = 144; - num_vs_threads = 48; - num_gs_threads = 0; - num_es_threads = 0; - num_ps_stack_entries = 128; - num_vs_stack_entries = 128; - num_gs_stack_entries = 0; - num_es_stack_entries = 0; - break; - } + BATCH_LOCALS(&context->radeon); + uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices; + + SETfield(vgt_primitive_type, DI_PT_RECTLIST, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, + INDEX_TYPE_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, + MAJOR_MODE_mask); + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, + SOURCE_SELECT_mask); + + vgt_num_indices = 3; + + BEGIN_BATCH_NO_AUTOSTATE(10); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + + END_BATCH(); + COMMIT_BATCH(); +} + +static inline void +set_default_state(context_t *context) +{ + int i; + BATCH_LOCALS(&context->radeon); +/* + BEGIN_BATCH_NO_AUTOSTATE(sizeof(r7xx_default_state)/4); + for(i=0; i< sizeof(r7xx_default_state)/4; i++) + R600_OUT_BATCH(r7xx_default_state[i]); + END_BATCH(); +*/ + BEGIN_BATCH_NO_AUTOSTATE(45); + R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL, + (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); + R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); + R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); + R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); + R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); + R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift)); - if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710)) - sq_config = 0; - else - sq_config = R600_VC_ENABLE; - - sq_config |= (R600_DX9_CONSTS | - R600_ALU_INST_PREFER_VECTOR | - R600_PS_PRIO(0) | - R600_VS_PRIO(1) | - R600_GS_PRIO(2) | - R600_ES_PRIO(3)); - - sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) | - R600_NUM_VS_GPRS(num_vs_gprs) | - R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); - sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) | - R600_NUM_ES_GPRS(num_es_gprs)); - sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) | - R600_NUM_VS_THREADS(num_vs_threads) | - R600_NUM_GS_THREADS(num_gs_threads) | - R600_NUM_ES_THREADS(num_es_threads)); - sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | - R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); - sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | - R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries)); - - if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { +/* + R600_OUT_BATCH_REGSEQ(PA_CL_VPORT_XSCALE_0, 6); + R600_OUT_BATCH(0.0f); // PA_CL_VPORT_XSCALE + R600_OUT_BATCH(0.0f); // PA_CL_VPORT_XOFFSET + R600_OUT_BATCH(0.0f); // PA_CL_VPORT_YSCALE + R600_OUT_BATCH(0.0f); // PA_CL_VPORT_YOFFSET + R600_OUT_BATCH(0.0f); // PA_CL_VPORT_ZSCALE + R600_OUT_BATCH(0.0f); // PA_CL_VPORT_ZOFFSET +*/ + R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0 ); + R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift)); + R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) | + (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | + (X_1_256TH << QUANT_MODE_shift)); + R600_OUT_BATCH_REGVAL(VGT_MAX_VTX_INDX, 2048); + END_BATCH(); +/* + if (dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) { BEGIN_RING(r7xx_default_size + 10); for (i = 0; i < r7xx_default_size; i++) OUT_RING(r7xx_default_state[i]); @@ -508,7 +635,15 @@ set_default_state(drm_radeon_private_t *dev_priv) } OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0)); OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT); + + //OR : + r700WaitForIdleClean(context_t *context); +*/ /* SQ config */ + /*XXX*/ +/* + r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom); + // OR: OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6)); OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2); OUT_RING(sq_config); @@ -518,21 +653,22 @@ set_default_state(drm_radeon_private_t *dev_priv) OUT_RING(sq_stack_resource_mgmt_1); OUT_RING(sq_stack_resource_mgmt_2); ADVANCE_RING(); +*/ } -static GLboolean validate_buffers(struct r300_context *r300, +static GLboolean validate_buffers(context_t *rmesa, struct radeon_bo *src_bo, struct radeon_bo *dst_bo) { int ret; - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, src_bo, RADEON_GEM_DOMAIN_VRAM, 0); - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, + radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); - ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - first_elem(&r300->radeon.dma.reserved)->bo, + ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, + first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); if (ret) return GL_FALSE; @@ -540,21 +676,29 @@ static GLboolean validate_buffers(struct r300_context *r300, return GL_TRUE; } -GLboolean r600_blit(struct r300_context *r300, +GLboolean r600_blit(context_t *context, struct radeon_bo *src_bo, intptr_t src_offset, gl_format src_mesaformat, unsigned src_pitch, unsigned src_width, unsigned src_height, + unsigned src_x, + unsigned src_y, struct radeon_bo *dst_bo, intptr_t dst_offset, gl_format dst_mesaformat, unsigned dst_pitch, unsigned dst_width, - unsigned dst_height) + unsigned dst_height, + unsigned dst_x, + unsigned dst_y, + unsigned w, + unsigned h, + unsigned flip_y) { int id = 0; + uint32_t cb_bpp; if (src_bo == dst_bo) { return GL_FALSE; @@ -571,39 +715,46 @@ GLboolean r600_blit(struct r300_context *r300, _mesa_get_format_name(dst_mesaformat)); } - if (!validate_buffers(r300, src_bo, dst_bo)) + /* Flush is needed to make sure that source buffer has correct data */ + radeonFlush(context->radeon.glCtx); + + if (!validate_buffers(context, src_bo, dst_bo)) return GL_FALSE; - rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__); + rcommonEnsureCmdBufSpace(&context->radeon, 200, __FUNCTION__); + /* set clear state */ + set_default_state(context); /* src */ - set_tex_resource(dev_priv, tex_format, - src_pitch / cpp, - sy2, src_pitch / cpp, - src_gpu_addr); + set_tex_resource(context, src_mesaformat, src_bo, + src_width, src_height, src_pitch, src_offset); - r700SyncSurf(context, src_bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); + set_tex_sampler(context); /* dst */ - set_render_target(dev_priv, cb_format, - dst_pitch / cpp, dy2, - dst_gpu_addr); + cb_bpp = _mesa_get_format_bytes(dst_mesaformat); + set_render_target(context, dst_bo, dst_mesaformat, + dst_pitch, cb_bpp, dst_width, dst_height, dst_offset); + /* shaders */ + load_shaders(context->radeon.glCtx); + + set_shaders(context); /* scissors */ - set_scissors(dev_priv, 0, 0, width, height); + set_scissors(context, 0, 0, 8191, 8191); + set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y); /* Vertex buffer setup */ - set_vtx_resource(dev_priv, vb_addr); + set_vtx_resource(context); /* draw */ - draw_auto(dev_priv); + draw_auto(context); - r700SyncSurf(context, dst_bo, 0, RADEON_GEM_DOMAIN_VRAM, + r700SyncSurf(context, dst_bo, 0, + RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT, CB_ACTION_ENA_bit | (1 << (id + 6))); - radeonFlush(r300->radeon.glCtx); + radeonFlush(context->radeon.glCtx); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r600_blit.h b/src/mesa/drivers/dri/r600/r600_blit.h new file mode 100644 index 00000000000..f30c13c288d --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit.h @@ -0,0 +1,21 @@ +GLboolean r600_blit(context_t *context, + struct radeon_bo *src_bo, + intptr_t src_offset, + gl_format src_mesaformat, + unsigned src_pitch, + unsigned src_width, + unsigned src_height, + unsigned src_x_offset, + unsigned src_y_offset, + struct radeon_bo *dst_bo, + intptr_t dst_offset, + gl_format dst_mesaformat, + unsigned dst_pitch, + unsigned dst_width, + unsigned dst_height, + unsigned dst_x_offset, + unsigned dst_y_offset, + unsigned w, + unsigned h, + unsigned flip_y); + diff --git a/src/mesa/drivers/dri/r600/r600_blit_shaders.h b/src/mesa/drivers/dri/r600/r600_blit_shaders.h new file mode 100644 index 00000000000..492dde96368 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_blit_shaders.h @@ -0,0 +1,28 @@ +const uint32_t r6xx_vs[] = +{ + 0x00000004, // CF_DWORD0(ADDR(4)) + 0x81000000, // SQ_CF_INST_VTX COUNT(1) + 0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0) + 0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1) + 0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0) + 0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0) + 0x00000000, + 0x00000000, + 0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16) + 0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED + 0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1) + 0x00000000, // VTX_DWORD_PAD +}; + +const uint32_t r6xx_ps[] = +{ + 0x00000002, // CF_DWORD0 AADR(2) + 0x80800000, // SQ_CF_INST_TEX COUNT(1) + 0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0) + 0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW + 0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0) + 0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED + 0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW + 0x00000000, // TEX_DWORD_PAD +}; + diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index cb549497f54..55680fad32d 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -384,6 +384,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r700InitIoctlFuncs(&functions); radeonInitBufferObjectFuncs(&functions); + if (r600->radeon.radeonScreen->kernel_mm) { + r600_init_texcopy_functions(&functions); + } + if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index a1b4af715e2..774b30eb8d1 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -148,6 +148,8 @@ struct r600_context { GLint nNumActiveAos; StreamDesc stream_desc[VERT_ATTRIB_MAX]; struct r700_index_buffer ind_buf; + struct radeon_bo *blit_bo; + GLboolean blit_bo_loaded; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -181,6 +183,7 @@ extern GLboolean r700SyncSurf(context_t *context, extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); extern void r700InitDraw(GLcontext *ctx); +extern void r600_init_texcopy_functions(struct dd_function_table *table); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 diff --git a/src/mesa/drivers/dri/r600/r600_texcopy.c b/src/mesa/drivers/dri/r600/r600_texcopy.c new file mode 100644 index 00000000000..287a82ea8f3 --- /dev/null +++ b/src/mesa/drivers/dri/r600/r600_texcopy.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2009 Maciej Cencora <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_common.h" +#include "r600_context.h" + +#include "main/image.h" +#include "main/teximage.h" +#include "main/texstate.h" +#include "drivers/common/meta.h" + +#include "radeon_mipmap_tree.h" +#include "r600_blit.h" +#include <main/debug.h> + +// TODO: +// need to pass correct pitch for small dst textures! +static GLboolean +do_copy_texsubimage(GLcontext *ctx, + GLenum target, GLint level, + struct radeon_tex_obj *tobj, + radeon_texture_image *timg, + GLint dstx, GLint dsty, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + context_t *context = R700_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + + if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) { + rrb = radeon_get_depthbuffer(&context->radeon); + } else { + rrb = radeon_get_colorbuffer(&context->radeon); + } + + if (!timg->mt) { + radeon_validate_texture_miptree(ctx, &tobj->base); + } + + assert(rrb && rrb->bo); + assert(timg->mt->bo); + assert(timg->base.Width >= dstx + width); + assert(timg->base.Height >= dsty + height); + + intptr_t src_offset = rrb->draw_offset; + intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level); + + if (src_offset % 256 || dst_offset % 256) { + return GL_FALSE; + } + + if (0) { + fprintf(stderr, "%s: copying to face %d, level %d\n", + __FUNCTION__, _mesa_tex_target_to_face(target), level); + fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset); + fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d\n", + x, y, rrb->base.Width, rrb->base.Height, (uint32_t) src_offset, rrb->pitch/rrb->cpp); + fprintf(stderr, "src size %d, dst size %d\n", rrb->bo->size, timg->mt->bo->size); + + } + + + /* blit from src buffer to texture */ + return r600_blit(context, rrb->bo, src_offset, rrb->base.Format, rrb->pitch, + rrb->base.Width, rrb->base.Height, x, y, + timg->mt->bo, dst_offset, timg->base.TexFormat, + timg->mt->levels[level].rowstride, timg->base.Width, timg->base.Height, + dstx, dsty, width, height, 1); +} + +static void +r600CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) +{ + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + int srcx, srcy, dstx, dsty; + + if (border) + goto fail; + + /* Setup or redefine the texture object, mipmap tree and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, GL_UNSIGNED_BYTE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + srcx = x; + srcy = y; + dstx = 0; + dsty = 0; + if (!_mesa_clip_copytexsubimage(ctx, + &dstx, &dsty, + &srcx, &srcy, + &width, &height)) { + return; + } + + if (!do_copy_texsubimage(ctx, target, level, + radeon_tex_obj(texObj), (radeon_texture_image *)texImage, + 0, 0, x, y, width, height)) { + goto fail; + } + + return; + +fail: + _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y, + width, height, border); +} + +static void +r600CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); + struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); + + if (!do_copy_texsubimage(ctx, target, level, + radeon_tex_obj(texObj), (radeon_texture_image *)texImage, + xoffset, yoffset, x, y, width, height)) { + + //DEBUG_FALLBACKS + + _mesa_meta_CopyTexSubImage2D(ctx, target, level, + xoffset, yoffset, x, y, width, height); + } +} + + +void r600_init_texcopy_functions(struct dd_function_table *table) +{ + table->CopyTexImage2D = r600CopyTexImage2D; + table->CopyTexSubImage2D = r600CopyTexSubImage2D; +} |