diff options
-rw-r--r-- | src/gallium/drivers/panfrost/pan_varyings.c | 254 |
1 files changed, 217 insertions, 37 deletions
diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c index 40d7d98bf65..69e9e6d036d 100644 --- a/src/gallium/drivers/panfrost/pan_varyings.c +++ b/src/gallium/drivers/panfrost/pan_varyings.c @@ -24,6 +24,7 @@ */ #include "pan_context.h" +#include "util/u_prim.h" static mali_ptr panfrost_emit_varyings( @@ -46,6 +47,33 @@ panfrost_emit_varyings( } static void +panfrost_emit_streamout( + struct panfrost_context *ctx, + union mali_attr *slot, + unsigned stride, + unsigned offset, + unsigned count, + struct pipe_stream_output_target *target) +{ + /* Fill out the descriptor */ + slot->stride = stride * 4; + slot->shift = slot->extra_flags = 0; + + unsigned max_size = target->buffer_size; + unsigned expected_size = slot->stride * count; + + slot->size = MIN2(max_size, expected_size); + + /* Grab the BO and bind it to the batch */ + struct panfrost_job *batch = panfrost_get_job_for_fbo(ctx); + struct panfrost_bo *bo = pan_resource(target->buffer)->bo; + panfrost_job_add_bo(batch, bo); + + mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride); + slot->elements = addr; +} + +static void panfrost_emit_point_coord(union mali_attr *slot) { slot->elements = MALI_VARYING_POINT_COORD | MALI_ATTR_LINEAR; @@ -110,6 +138,44 @@ panfrost_emit_varying_meta( } } +static bool +has_point_coord(unsigned mask, gl_varying_slot loc) +{ + if ((loc >= VARYING_SLOT_TEX0) && (loc <= VARYING_SLOT_TEX7)) + return (mask & (1 << (loc - VARYING_SLOT_TEX0))); + else if (loc == VARYING_SLOT_PNTC) + return (mask & (1 << 8)); + else + return false; +} + +/* Helpers for manipulating stream out information so we can pack varyings + * accordingly. Compute the src_offset for a given captured varying */ + +static struct pipe_stream_output +pan_get_so(struct pipe_stream_output_info info, gl_varying_slot loc) +{ + for (unsigned i = 0; i < info.num_outputs; ++i) { + if (info.output[i].register_index == loc) + return info.output[i]; + } + + unreachable("Varying not captured"); +} + +/* TODO: Integers */ +static enum mali_format +pan_xfb_format(unsigned nr_components) +{ + switch (nr_components) { + case 1: return MALI_R32F; + case 2: return MALI_RG32F; + case 3: return MALI_RGB32F; + case 4: return MALI_RGBA32F; + default: unreachable("Invalid format"); + } +} + void panfrost_emit_varying_descriptor( struct panfrost_context *ctx, @@ -129,53 +195,55 @@ panfrost_emit_varying_descriptor( struct panfrost_transfer trans = panfrost_allocate_transient(ctx, vs_size + fs_size); - for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { - if (!is_special_varying(vs->varyings_loc[i])) - vs->varyings[i].src_offset = 16 * (num_gen_varyings++); - } - - for (unsigned i = 0; i < fs->tripipe->varying_count; i++) { - unsigned j; + struct pipe_stream_output_info so = vs->stream_output; - /* If we have a point sprite replacement, handle that here. We - * have to translate location first. TODO: Flip y in shader. - * We're already keying ... just time crunch .. */ + /* Check if this varying is linked by us. This is the case for + * general-purpose, non-captured varyings. If it is, link it. If it's + * not, use the provided stream out information to determine the + * offset, since it was already linked for us. */ - unsigned loc = fs->varyings_loc[i]; - unsigned pnt_loc = - (loc >= VARYING_SLOT_TEX0) ? (loc - VARYING_SLOT_TEX0) : - (loc == VARYING_SLOT_PNTC) ? 8 : - ~0; + for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { + gl_varying_slot loc = vs->varyings_loc[i]; - if (~pnt_loc && fs->point_sprite_mask & (1 << pnt_loc)) { - /* gl_PointCoord index by convention */ - fs->varyings[i].index = 3; - fs->reads_point_coord = true; + bool special = is_special_varying(loc); + bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); - /* Swizzle out the z/w to 0/1 */ - fs->varyings[i].format = MALI_RG16F; - fs->varyings[i].swizzle = - panfrost_get_default_swizzle(2); + if (captured) { + struct pipe_stream_output o = pan_get_so(so, loc); - continue; + unsigned dst_offset = o.dst_offset * 4; /* dwords */ + vs->varyings[i].src_offset = dst_offset; + } else if (!special) { + vs->varyings[i].src_offset = 16 * (num_gen_varyings++); } + } - if (fs->varyings[i].index) - continue; + /* Conversely, we need to set src_offset for the captured varyings. + * Here, the layout is defined by the stream out info, not us */ + + /* Link up with fragment varyings */ + bool reads_point_coord = fs->reads_point_coord; + + for (unsigned i = 0; i < fs->tripipe->varying_count; i++) { + gl_varying_slot loc = fs->varyings_loc[i]; + signed vs_idx = -1; - /* - * Re-use the VS general purpose varying pos if it exists, - * create a new one otherwise. - */ - for (j = 0; j < vs->tripipe->varying_count; j++) { - if (fs->varyings_loc[i] == vs->varyings_loc[j]) + /* Link up */ + for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) { + if (vs->varyings_loc[j] == loc) { + vs_idx = j; break; + } } - if (j < vs->tripipe->varying_count) - fs->varyings[i].src_offset = vs->varyings[j].src_offset; + /* Either assign or reuse */ + if (vs_idx >= 0) + fs->varyings[i].src_offset = vs->varyings[vs_idx].src_offset; else fs->varyings[i].src_offset = 16 * (num_gen_varyings++); + + if (has_point_coord(fs->point_sprite_mask, loc)) + reads_point_coord |= true; } memcpy(trans.cpu, vs->varyings, vs_size); @@ -183,13 +251,45 @@ panfrost_emit_varying_descriptor( union mali_attr varyings[PIPE_MAX_ATTRIBS]; - unsigned idx = 0; + /* Figure out how many streamout buffers could be bound */ + unsigned so_count = ctx->streamout.num_targets; + for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { + gl_varying_slot loc = vs->varyings_loc[i]; + + bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); + if (!captured) continue; + + struct pipe_stream_output o = pan_get_so(so, loc); + so_count = MAX2(so_count, o.output_buffer + 1); + } + + signed idx = so_count; signed general = idx++; signed gl_Position = idx++; signed gl_PointSize = vs->writes_point_size ? (idx++) : -1; - signed gl_PointCoord = fs->reads_point_coord ? (idx++) : -1; + signed gl_PointCoord = reads_point_coord ? (idx++) : -1; signed gl_FrontFacing = fs->reads_face ? (idx++) : -1; + /* Emit the stream out buffers */ + + unsigned output_count = u_stream_outputs_for_vertices( + ctx->active_prim, ctx->vertex_count); + + for (unsigned i = 0; i < so_count; ++i) { + struct pipe_stream_output_target *target = + (i < ctx->streamout.num_targets) ? ctx->streamout.targets[i] : NULL; + + if (target) { + panfrost_emit_streamout(ctx, &varyings[i], so.stride[i], ctx->streamout.offsets[i], output_count, target); + } else { + /* Emit a dummy buffer */ + panfrost_emit_varyings(ctx, &varyings[i], so.stride[i] * 4, output_count); + + /* Clear the attribute type */ + varyings[i].elements &= ~0xF; + } + } + panfrost_emit_varyings(ctx, &varyings[general], num_gen_varyings * 16, vertex_count); @@ -204,7 +304,7 @@ panfrost_emit_varying_descriptor( panfrost_emit_varyings(ctx, &varyings[gl_PointSize], 2, vertex_count); - if (fs->reads_point_coord) + if (reads_point_coord) panfrost_emit_point_coord(&varyings[gl_PointCoord]); if (fs->reads_face) @@ -221,6 +321,86 @@ panfrost_emit_varying_descriptor( general, gl_Position, gl_PointSize, gl_PointCoord, gl_FrontFacing); + /* Replace streamout */ + + struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu); + struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size); + + for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { + gl_varying_slot loc = vs->varyings_loc[i]; + + bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); + if (!captured) continue; + + struct pipe_stream_output o = pan_get_so(so, loc); + ovs[i].index = o.output_buffer; + + /* Set the type appropriately. TODO: Integer varyings XXX */ + assert(o.stream == 0); + ovs[i].format = pan_xfb_format(o.num_components); + ovs[i].swizzle = panfrost_get_default_swizzle(o.num_components); + + /* Link to the fragment */ + signed fs_idx = -1; + + /* Link up */ + for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) { + if (fs->varyings_loc[j] == loc) { + fs_idx = j; + break; + } + } + + if (fs_idx >= 0) { + ofs[fs_idx].index = ovs[i].index; + ofs[fs_idx].format = ovs[i].format; + ofs[fs_idx].swizzle = ovs[i].swizzle; + } + } + + /* Replace point sprite */ + for (unsigned i = 0; i < fs->tripipe->varying_count; i++) { + /* If we have a point sprite replacement, handle that here. We + * have to translate location first. TODO: Flip y in shader. + * We're already keying ... just time crunch .. */ + + if (has_point_coord(fs->point_sprite_mask, fs->varyings_loc[i])) { + ofs[i].index = gl_PointCoord; + + /* Swizzle out the z/w to 0/1 */ + ofs[i].format = MALI_RG16F; + ofs[i].swizzle = + panfrost_get_default_swizzle(2); + } + } + + /* Fix up unaligned addresses */ + for (unsigned i = 0; i < so_count; ++i) { + unsigned align = (varyings[i].elements & 63); + + /* While we're at it, the SO buffers are linear */ + + if (!align) { + varyings[i].elements |= MALI_ATTR_LINEAR; + continue; + } + + /* We need to adjust alignment */ + varyings[i].elements &= ~63; + varyings[i].elements |= MALI_ATTR_LINEAR; + varyings[i].size += align; + + for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) { + if (ovs[v].index == i) + ovs[v].src_offset = vs->varyings[v].src_offset + align; + } + + for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) { + if (ofs[f].index == i) + ofs[f].src_offset = fs->varyings[f].src_offset + align; + } + } + mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, idx * sizeof(union mali_attr)); ctx->payloads[PIPE_SHADER_VERTEX].postfix.varyings = varyings_p; ctx->payloads[PIPE_SHADER_FRAGMENT].postfix.varyings = varyings_p; |