From 81283b0bf0a8f7b31517adc224c20531e27fab42 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 22 Sep 2009 16:39:11 -0400 Subject: r600 : add draw_prim support. --- src/mesa/drivers/dri/r600/Makefile | 1 + src/mesa/drivers/dri/r600/r600_context.c | 3 + src/mesa/drivers/dri/r600/r600_context.h | 32 ++ src/mesa/drivers/dri/r600/r700_assembler.c | 127 ++++ src/mesa/drivers/dri/r600/r700_assembler.h | 8 + src/mesa/drivers/dri/r600/r700_chip.c | 100 +++- src/mesa/drivers/dri/r600/r700_render.c | 667 +++++++++++++++++++++- src/mesa/drivers/dri/r600/r700_shader.c | 90 +++ src/mesa/drivers/dri/r600/r700_shader.h | 1 + src/mesa/drivers/dri/r600/r700_state.c | 20 +- src/mesa/drivers/dri/r600/r700_state.h | 1 + src/mesa/drivers/dri/r600/r700_vertprog.c | 195 ++++++- src/mesa/drivers/dri/r600/r700_vertprog.h | 17 +- src/mesa/drivers/dri/r600/radeon_buffer_objects.c | 1 + src/mesa/drivers/dri/r600/radeon_buffer_objects.h | 1 + 15 files changed, 1217 insertions(+), 47 deletions(-) create mode 120000 src/mesa/drivers/dri/r600/radeon_buffer_objects.c create mode 120000 src/mesa/drivers/dri/r600/radeon_buffer_objects.h diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 36bf773c054..7d5a7b1ab6f 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -29,6 +29,7 @@ COMMON_SOURCES = \ RADEON_COMMON_SOURCES = \ radeon_bo_legacy.c \ radeon_common_context.c \ + radeon_buffer_objects.c \ radeon_common.c \ radeon_cs_legacy.c \ radeon_dma.c \ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 354b263f5cb..6fc6d9d7bfa 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -257,6 +257,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); r700InitIoctlFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, @@ -375,6 +376,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + r700InitDraw(ctx); + radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 9397ecde81b..a296ea23fa4 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -126,6 +126,34 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; +typedef struct StreamDesc +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + + struct radeon_bo *bo; + GLint bo_offset; + + GLuint dwords; + GLuint dst_loc; + GLuint _signed; + GLboolean normalize; + GLboolean is_named_bo; + GLubyte element; +} StreamDesc; + +typedef struct r700_index_buffer +{ + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; + + GLboolean bHostIb; +} r700_index_buffer; + /** * \brief R600 context structure. */ @@ -144,6 +172,9 @@ struct r600_context { GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; + GLint nNumActiveAos; + StreamDesc stream_desc[VERT_ATTRIB_MAX]; + struct r700_index_buffer ind_buf; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -177,6 +208,7 @@ extern GLboolean r700SyncSurf(context_t *context, extern void r700SetupStreams(GLcontext * ctx); extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); +extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index f46bc322011..81269350e43 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -786,6 +786,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + GLuint gethelpr(r700_AssemblerBase* pAsm) { GLuint r = pAsm->uHelpReg; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f9c4d849c65..4e6e20011ad 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -411,6 +411,14 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 06d7e9c9ab1..783427a94c9 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -208,6 +208,80 @@ static void r700SetupVTXConstants(GLcontext * ctx, } +extern int getTypeSize(GLenum type); +static void r700SetupVTXConstants2(GLcontext * ctx, + void * pAos, + StreamDesc * pStreamDesc) +{ + context_t *context = R700_CONTEXT(ctx); + struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; + BATCH_LOCALS(&context->radeon); + + unsigned int uSQ_VTX_CONSTANT_WORD0_0; + unsigned int uSQ_VTX_CONSTANT_WORD1_0; + unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; + unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; + + if (!paos->bo) + return; + + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); + else + r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; + + SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, + SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); + SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, + SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 2); + + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(0); + R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); + R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, + paos->bo, + uSQ_VTX_CONSTANT_WORD0_0, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + +} + void r700SetupStreams(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); @@ -256,14 +330,24 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; imesa_program->Base.InputsRead & (1 << i)) { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + if(1 == context->selected_vp->uiVersion) + { + /* currently aos are packed */ + r700SetupVTXConstants(ctx, + i, + (void*)(&context->radeon.tcl.aos[j]), + (unsigned int)context->radeon.tcl.aos[j].components, + (unsigned int)context->radeon.tcl.aos[j].stride * 4, + (unsigned int)context->radeon.tcl.aos[j].count); + } + else + { /* context->selected_vp->uiVersion == 2 : aos not always packed */ + r700SetupVTXConstants2(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + } j++; } } diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index b1c3648ca56..b58859b6ba3 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -43,6 +43,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -53,6 +54,7 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" #include "radeon_common_context.h" void r700WaitForIdle(context_t *context); @@ -270,46 +272,82 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim if (type < 0 || num_indices <= 0) return; - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + num_indices + 3; /* DRAW_INDEX_IMMD */ - BEGIN_BATCH_NO_AUTOSTATE(total_emit); + BEGIN_BATCH_NO_AUTOSTATE(total_emit); // prim - SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); + R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); + R600_OUT_BATCH(vgt_primitive_type); // index type - SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(vgt_index_type); + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); // draw packet - vgt_num_indices = num_indices; - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); - R600_OUT_BATCH(vgt_num_indices); - R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + if(NULL == context->ind_buf.bo) + { for (i = start; i < (start + num_indices); i++) { - if(vb->Elts) - R600_OUT_BATCH(vb->Elts[i]); - else - R600_OUT_BATCH(i); + if(vb->Elts) + { + R600_OUT_BATCH(vb->Elts[i]); + } + else + R600_OUT_BATCH(i); } - END_BATCH(); - COMMIT_BATCH(); + } + else + { + if(GL_TRUE == context->ind_buf.bHostIb) + { + if(GL_TRUE != context->ind_buf.is_32bit) + { + GLushort * pIndex = (GLushort*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + pIndex += start; + for (i = 0; i < num_indices; i++) + { + R600_OUT_BATCH(*pIndex); + pIndex++; + } + } + else + { + GLuint * pIndex = (GLuint*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + pIndex += start; + + for (i = 0; i < num_indices; i++) + { + R600_OUT_BATCH(*pIndex); + pIndex++; + } + } + } + else + { + /* TODO : hw ib draw */ + } + } + END_BATCH(); + COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ @@ -477,4 +515,585 @@ const struct tnl_pipeline_stage *r700_pipeline[] = 0, }; +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + assert(context->stream_desc[index].bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 4; + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 8; + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 12; + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + context->stream_desc[index].stride = 16; + break; + default: + assert(0); + break; + } + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + context->radeon.tcl.aos_count = context->nNumActiveAos; + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeon_bo_unref(context->ind_buf.bo); + } + else + { + FREE(context->ind_buf.bo->ptr); + FREE(context->ind_buf.bo); + context->ind_buf.bo = NULL; + } + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + out = (GLuint *)context->ind_buf.bo->ptr; + } + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &r300->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + out = (GLuint *)context->ind_buf.bo->ptr; + } + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } +} + +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + + context->ind_buf.bHostIb = GL_TRUE; + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + if(context->ind_buf.bHostIb != GL_TRUE) + { + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + } + else + { + context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); + context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); + context->ind_buf.bo_offset = 0; + dst_ptr = context->ind_buf.bo->ptr; + } + + _mesa_memcpy(dst_ptr, src_ptr, size); + + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + GLuint i, id = 0; + GLboolean bValidedbuffer; + struct radeon_renderbuffer *rrb; + + if (ctx->NewState) + { + _mesa_update_state( ctx ); + } + + bValidedbuffer = r600ValidateBuffers(ctx); + + /* always emit CB base to prevent + * lock ups on some chips. + */ + R600_STATECHANGE(context, cb_target); + /* mark vtx as dirty since it changes per-draw */ + R600_STATECHANGE(context, vtx); + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + r700SetupStreams2(ctx, arrays, max_index + 1); + r700UpdateShaders2(ctx); + + r700SetScissor(context); + + r700SetupVertexProgram(ctx); + + r700SetupFragmentProgram(ctx); + + r600UpdateTextureState(ctx); + + GLuint emit_end = r700PredictRenderSize(ctx) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupIndexBuffer(ctx, ib); + + radeonEmitState(radeon); + + for (i = 0; i < nr_prims; ++i) + { + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + } + + /* Flush render op cached for last several quads. */ + r700WaitForIdleClean(context); + + rrb = radeon_get_colorbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + CB_ACTION_ENA_bit | (1 << (id + 6))); + + rrb = radeon_get_depthbuffer(&context->radeon); + if (rrb && rrb->bo) + r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, + DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); + + r700FreeData(ctx); + + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } + + return GL_TRUE; +} + +static void r700DrawPrimsRe(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + GLboolean retval = GL_FALSE; + + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe ); + return; + } + + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + context_t *context = R700_CONTEXT(ctx); + + /* For non indexed drawing, using tnl pipe. */ + if(!ib) + { + context->ind_buf.bo = NULL; + + _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib, + index_bounds_valid, min_index, max_index); + return; + } + + r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index); +} + +void r700InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims = r700DrawPrims; +} + diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index b4fd51c1370..955ea4e4e1d 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * plstCFInstructions->uNumOfNode++; } +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + GLuint ulIndex = 0; + GLboolean bFound = GL_FALSE; + R700ShaderInstruction * pPrevInst = NULL; + R700ShaderInstruction * pCurInst = plstCFInstructions->pHead; + + /* Need go thro list to make sure pInst is there. */ + while(NULL != pCurInst) + { + if(pCurInst == pInst) + { + bFound = GL_TRUE; + break; + } + + pPrevInst = pCurInst; + pCurInst = pCurInst->pNextInst; + } + if(GL_TRUE == bFound) + { + plstCFInstructions->uNumOfNode--; + + pCurInst = pInst->pNextInst; + ulIndex = pInst->m_uIndex; + while(NULL != pCurInst) + { + pCurInst->m_uIndex = ulIndex; + ulIndex++; + pCurInst = pCurInst->pNextInst; + } + + if(plstCFInstructions->pHead == pInst) + { + plstCFInstructions->pHead = pInst->pNextInst; + } + if(plstCFInstructions->pTail == pInst) + { + plstCFInstructions->pTail = pPrevInst; + } + if(NULL != pPrevInst) + { + pPrevInst->pNextInst = pInst->pNextInst; + } + + FREE(pInst); + } +} + void Init_R700_Shader(R700_Shader * pShader) { pShader->Type = R700_SHADER_INVALID; @@ -488,6 +537,47 @@ void DebugPrint(void) { } +void cleanup_vfetch_shaderinst(R700_Shader *pShader) +{ + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + R700VertexInstruction *pVTXInst; + R700ControlFlowInstruction *pCFInst; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pVTXInst = (R700VertexInstruction *)pInst; + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(NULL != pVTXInst->m_pLinkedGenericClause) + { + pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause); + + TakeInstOutFromList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType); + } + + pInst = pInst->pNextInst; + }; + + //destroy each item in pShader->lstVTXInstructions; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + + //set NULL pShader->lstVTXInstructions + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + void Clean_Up_Shader(R700_Shader *pShader) { FREE(pShader->pProgram); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index bfd01e1a93a..997cb05aaf8 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -143,6 +143,7 @@ void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); void DeleteInstructions(R700_Shader *pShader); void DebugPrint(void); +void cleanup_vfetch_shaderinst(R700_Shader *pShader); void Clean_Up_Shader(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index fc0b5116843..1043eabb149 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -92,7 +92,25 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- } } - r700SelectVertexShader(ctx); + r700SelectVertexShader(ctx, 1); + r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + context->radeon.NewGLState = 0; +} + +void r700UpdateShaders2(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!ctx->FragmentProgram._Current) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + + r700SelectFragmentShader(ctx); + + r700SelectVertexShader(ctx, 2); r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); context->radeon.NewGLState = 0; } diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 0f53d5b4c59..209189d8d72 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -35,6 +35,7 @@ extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r700UpdateShaders (GLcontext * ctx); +extern void r700UpdateShaders2(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 9ee26286d9b..e7a209be9d8 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -159,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions( return GL_TRUE; } -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; inNumActiveAos; i++) + { + assemble_vfetch_instruction2(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp) { GLuint ui; @@ -175,11 +203,22 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) - { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; //error - } + if(1 == vp->uiVersion) + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); + return; + } + } + else + { + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; + } + } // Map Outputs pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); @@ -261,7 +300,8 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, } struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp) + struct gl_vertex_program *mesa_vp, + GLint nVer) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; @@ -271,6 +311,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, unsigned int i; vp = _mesa_calloc(sizeof(*vp)); + vp->uiVersion = nVer; vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); if (mesa_vp->IsPositionInvariant) @@ -296,7 +337,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, vp->mesa_program ); + Map_Vertex_Program(ctx, vp, vp->mesa_program ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { @@ -325,7 +366,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return vp; } -void r700SelectVertexShader(GLcontext *ctx) +void r700SelectVertexShader(GLcontext *ctx, GLint nVersion) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; @@ -365,7 +406,7 @@ void r700SelectVertexShader(GLcontext *ctx) } } - vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion); if(!vp) { radeon_error("Failed to translate vertex shader. \n"); @@ -377,6 +418,140 @@ void r700SelectVertexShader(GLcontext *ctx) return; } +int getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) +{ + context_t *context = R700_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size + : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; +} + +void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; + + while(unBit) + { + if(unBit & 1) + { + r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } +} + void * r700GetActiveVpShaderBo(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index c48764c43ba..f9a3e395ee9 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -52,7 +52,7 @@ struct r700_vertex_program GLboolean translated; GLboolean loaded; - GLboolean needUpdateVF; + GLint uiVersion; void * shaderbo; @@ -76,19 +76,28 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, GLboolean Process_Vertex_Program_Vfetch_Instructions( struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp); + struct gl_vertex_program *mesa_vp, + GLint nVer); /* Interface */ -extern void r700SelectVertexShader(GLcontext *ctx); +extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion); +extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern int getTypeSize(GLenum type); + #endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c new file mode 120000 index 00000000000..f6a5f664701 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h new file mode 120000 index 00000000000..2f134fd17b8 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h \ No newline at end of file -- cgit v1.2.3