diff options
Diffstat (limited to 'src/gallium/auxiliary/translate/translate_generic.c')
-rw-r--r-- | src/gallium/auxiliary/translate/translate_generic.c | 213 |
1 files changed, 118 insertions, 95 deletions
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 42cfd763e9c..ad809db720d 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -64,6 +64,14 @@ struct translate_generic { unsigned input_stride; unsigned max_index; + /* this value is set to -1 if this is a normal element with output_format != input_format: + * in this case, u_format is used to do a full conversion + * + * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids: + * in this case, memcpy is used to copy this amount of bytes + */ + int copy_size; + } attrib[PIPE_MAX_ATTRIBS]; unsigned nr_attrib; @@ -354,7 +362,65 @@ static emit_func get_emit_func( enum pipe_format format ) } } +static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg, + unsigned elt, + unsigned instance_id, + void *vert ) +{ + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset; + + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + const uint8_t *src; + unsigned index; + int copy_size; + + if (tg->attrib[attr].instance_divisor) { + index = instance_id / tg->attrib[attr].instance_divisor; + } + else { + index = elt; + } + + /* clamp to void going out of bounds */ + index = MIN2(index, tg->attrib[attr].max_index); + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; + + copy_size = tg->attrib[attr].copy_size; + if(likely(copy_size >= 0)) + memcpy(dst, src, copy_size); + else + { + tg->attrib[attr].fetch( data, src, 0, 0 ); + + if (0) + debug_printf("Fetch linear attr %d from %p stride %d index %d: " + " %f, %f, %f, %f \n", + attr, + tg->attrib[attr].input_ptr, + tg->attrib[attr].input_stride, + index, + data[0], data[1],data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + } else { + if(likely(tg->attrib[attr].copy_size >= 0)) + memcpy(data, &instance_id, 4); + else + { + data[0] = (float)instance_id; + tg->attrib[attr].emit( data, dst ); + } + } + } +} /** * Fetch vertex attributes for 'count' vertices. @@ -367,62 +433,45 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, { struct translate_generic *tg = translate_generic(translate); char *vert = output_buffer; - unsigned nr_attrs = tg->nr_attrib; - unsigned attr; unsigned i; - /* loop over vertex attributes (vertex shader inputs) - */ for (i = 0; i < count; i++) { - const unsigned elt = *elts++; - - for (attr = 0; attr < nr_attrs; attr++) { - float data[4]; - char *dst = vert + tg->attrib[attr].output_offset; - - if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { - const uint8_t *src; - unsigned index; - - if (tg->attrib[attr].instance_divisor) { - index = instance_id / tg->attrib[attr].instance_divisor; - } else { - index = elt; - } - - /* clamp to void going out of bounds */ - index = MIN2(index, tg->attrib[attr].max_index); - - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * index; - - tg->attrib[attr].fetch( data, src, 0, 0 ); - - if (0) - debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: " - " %f, %f, %f, %f \n", - attr, - tg->attrib[attr].input_ptr, - tg->attrib[attr].input_stride, - tg->attrib[attr].instance_divisor, - tg->attrib[attr].max_index, - index, - data[0], data[1],data[2], data[3]); - } else { - data[0] = (float)instance_id; - } + generic_run_one(tg, *elts++, instance_id, vert); + vert += tg->translate.key.output_stride; + } +} - if (0) - debug_printf("vert %d/%d attr %d: %f %f %f %f\n", - i, elt, attr, data[0], data[1], data[2], data[3]); +static void PIPE_CDECL generic_run_elts16( struct translate *translate, + const uint16_t *elts, + unsigned count, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned i; - tg->attrib[attr].emit( data, dst ); - } + for (i = 0; i < count; i++) { + generic_run_one(tg, *elts++, instance_id, vert); vert += tg->translate.key.output_stride; } } +static void PIPE_CDECL generic_run_elts8( struct translate *translate, + const uint8_t *elts, + unsigned count, + unsigned instance_id, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned i; + for (i = 0; i < count; i++) { + generic_run_one(tg, *elts++, instance_id, vert); + vert += tg->translate.key.output_stride; + } +} static void PIPE_CDECL generic_run( struct translate *translate, unsigned start, @@ -432,57 +481,10 @@ static void PIPE_CDECL generic_run( struct translate *translate, { struct translate_generic *tg = translate_generic(translate); char *vert = output_buffer; - unsigned nr_attrs = tg->nr_attrib; - unsigned attr; unsigned i; - /* loop over vertex attributes (vertex shader inputs) - */ for (i = 0; i < count; i++) { - unsigned elt = start + i; - - for (attr = 0; attr < nr_attrs; attr++) { - float data[4]; - char *dst = vert + tg->attrib[attr].output_offset; - - if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { - const uint8_t *src; - unsigned index; - - if (tg->attrib[attr].instance_divisor) { - index = instance_id / tg->attrib[attr].instance_divisor; - } - else { - index = elt; - } - - /* clamp to void going out of bounds */ - index = MIN2(index, tg->attrib[attr].max_index); - - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * index; - - tg->attrib[attr].fetch( data, src, 0, 0 ); - - if (0) - debug_printf("Fetch linear attr %d from %p stride %d index %d: " - " %f, %f, %f, %f \n", - attr, - tg->attrib[attr].input_ptr, - tg->attrib[attr].input_stride, - index, - data[0], data[1],data[2], data[3]); - } else { - data[0] = (float)instance_id; - } - - if (0) - debug_printf("vert %d attr %d: %f %f %f %f\n", - i, attr, data[0], data[1], data[2], data[3]); - - tg->attrib[attr].emit( data, dst ); - } - + generic_run_one(tg, start + i, instance_id, vert); vert += tg->translate.key.output_stride; } } @@ -528,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.release = generic_release; tg->translate.set_buffer = generic_set_buffer; tg->translate.run_elts = generic_run_elts; + tg->translate.run_elts16 = generic_run_elts16; + tg->translate.run_elts8 = generic_run_elts8; tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { @@ -544,9 +548,28 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->attrib[i].input_offset = key->element[i].input_offset; tg->attrib[i].instance_divisor = key->element[i].instance_divisor; - tg->attrib[i].emit = get_emit_func(key->element[i].output_format); tg->attrib[i].output_offset = key->element[i].output_offset; + tg->attrib[i].copy_size = -1; + if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID) + { + if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED + || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED) + tg->attrib[i].copy_size = 4; + } + else + { + if(key->element[i].input_format == key->element[i].output_format + && format_desc->block.width == 1 + && format_desc->block.height == 1 + && !(format_desc->block.bits & 7)) + tg->attrib[i].copy_size = format_desc->block.bits >> 3; + } + + if(tg->attrib[i].copy_size < 0) + tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + else + tg->attrib[i].emit = NULL; } tg->nr_attrib = key->nr_elements; |