summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-07-09 22:51:06 -0700
committerEric Anholt <[email protected]>2015-07-14 11:31:57 -0700
commit7432017f65174e82a3de7afef3e4e6f60932356c (patch)
tree4777351f67cfadb00379723d7b918903b08b6353
parenta0d3915663fb7cbd3c1a5561450e256e00ecf11b (diff)
vc4: Rework cl handling to be friendlier to the compiler.
Drops 680 bytes of code, from avoiding a bunch of extra updates to the next pointer in the struct.
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h113
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c6
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c109
-rw-r--r--src/gallium/drivers/vc4/vc4_emit.c57
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c59
6 files changed, 203 insertions, 152 deletions
diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c
index 97f6b89024c..ced4f2dfa86 100644
--- a/src/gallium/drivers/vc4/vc4_cl.c
+++ b/src/gallium/drivers/vc4/vc4_cl.c
@@ -66,8 +66,15 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
return hindex;
}
- cl_u32(&vc4->bo_handles, bo->handle);
- cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
+ struct vc4_cl_out *out;
+
+ out = cl_start(&vc4->bo_handles);
+ cl_u32(&out, bo->handle);
+ cl_end(&vc4->bo_handles, out);
+
+ out = cl_start(&vc4->bo_pointers);
+ cl_ptr(&out, vc4_bo_reference(bo));
+ cl_end(&vc4->bo_pointers, out);
return hindex;
}
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index b914745ed4f..95f1a531d34 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -33,10 +33,16 @@
struct vc4_bo;
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc4_cl_out;
+
struct vc4_cl {
void *base;
- void *next;
- void *reloc_next;
+ struct vc4_cl_out *next;
+ struct vc4_cl_out *reloc_next;
uint32_t size;
uint32_t reloc_count;
};
@@ -55,122 +61,135 @@ static inline uint32_t cl_offset(struct vc4_cl *cl)
}
static inline void
-put_unaligned_32(void *ptr, uint32_t val)
+cl_advance(struct vc4_cl_out **cl, uint32_t n)
{
- struct unaligned_32 *p = ptr;
- p->x = val;
+ (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
}
-static inline void
-put_unaligned_16(void *ptr, uint16_t val)
+static inline struct vc4_cl_out *
+cl_start(struct vc4_cl *cl)
{
- struct unaligned_16 *p = ptr;
- p->x = val;
+ return cl->next;
}
static inline void
-cl_u8(struct vc4_cl *cl, uint8_t n)
+cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
{
- assert(cl_offset(cl) + 1 <= cl->size);
-
- *(uint8_t *)cl->next = n;
- cl->next++;
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
}
+
static inline void
-cl_u16(struct vc4_cl *cl, uint16_t n)
+put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
{
- assert(cl_offset(cl) + 2 <= cl->size);
-
- put_unaligned_16(cl->next, n);
- cl->next += 2;
+ struct unaligned_32 *p = (void *)ptr;
+ p->x = val;
}
static inline void
-cl_u32(struct vc4_cl *cl, uint32_t n)
+put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
{
- assert(cl_offset(cl) + 4 <= cl->size);
+ struct unaligned_16 *p = (void *)ptr;
+ p->x = val;
+}
- put_unaligned_32(cl->next, n);
- cl->next += 4;
+static inline void
+cl_u8(struct vc4_cl_out **cl, uint8_t n)
+{
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
}
static inline void
-cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
+cl_u16(struct vc4_cl_out **cl, uint16_t n)
{
- assert(cl_offset(cl) + 4 <= cl->size);
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
+}
- *(uint32_t *)cl->next = n;
- cl->next += 4;
+static inline void
+cl_u32(struct vc4_cl_out **cl, uint32_t n)
+{
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
}
static inline void
-cl_ptr(struct vc4_cl *cl, void *ptr)
+cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
{
- assert(cl_offset(cl) + sizeof(void *) <= cl->size);
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
+}
- *(void **)cl->next = ptr;
- cl->next += sizeof(void *);
+static inline void
+cl_ptr(struct vc4_cl_out **cl, void *ptr)
+{
+ *(struct vc4_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
}
static inline void
-cl_f(struct vc4_cl *cl, float f)
+cl_f(struct vc4_cl_out **cl, float f)
{
cl_u32(cl, fui(f));
}
static inline void
-cl_aligned_f(struct vc4_cl *cl, float f)
+cl_aligned_f(struct vc4_cl_out **cl, float f)
{
cl_aligned_u32(cl, fui(f));
}
static inline void
-cl_start_reloc(struct vc4_cl *cl, uint32_t n)
+cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
{
assert(n == 1 || n == 2);
assert(cl->reloc_count == 0);
cl->reloc_count = n;
- cl_u8(cl, VC4_PACKET_GEM_HANDLES);
- cl->reloc_next = cl->next;
- cl_u32(cl, 0); /* Space where hindex will be written. */
- cl_u32(cl, 0); /* Space where hindex will be written. */
+ cl_u8(out, VC4_PACKET_GEM_HANDLES);
+ cl->reloc_next = *out;
+ cl_u32(out, 0); /* Space where hindex will be written. */
+ cl_u32(out, 0); /* Space where hindex will be written. */
}
-static inline void
+static inline struct vc4_cl_out *
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
{
assert(cl->reloc_count == 0);
cl->reloc_count = n;
cl->reloc_next = cl->next;
- /* Space where hindex will be written. */
- cl->next += n * 4;
+ /* Reserve the space where hindex will be written. */
+ cl_advance(&cl->next, n * 4);
+
+ return cl->next;
}
static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
struct vc4_bo *bo, uint32_t offset)
{
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
- cl->reloc_next += 4;
+ cl_advance(&cl->reloc_next, 4);
cl->reloc_count--;
- cl_u32(cl, offset);
+ cl_u32(cl_out, offset);
}
static inline void
cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
+ struct vc4_cl_out **cl_out,
+ struct vc4_bo *bo, uint32_t offset)
{
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
- cl->reloc_next += 4;
+ cl_advance(&cl->reloc_next, 4);
cl->reloc_count--;
- cl_aligned_u32(cl, offset);
+ cl_aligned_u32(cl_out, offset);
}
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 60da218e59e..fff63158c9d 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -61,9 +61,11 @@ vc4_flush(struct pipe_context *pctx)
* FLUSH completes.
*/
cl_ensure_space(&vc4->bcl, 8);
- cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
- cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
+ cl_u8(&bcl, VC4_PACKET_FLUSH);
+ cl_end(&vc4->bcl, bcl);
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_write, cbuf);
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 5e6d70d6f33..fc3c2321abb 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -71,37 +71,40 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t height = vc4->framebuffer.height;
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
- cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
- cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
- cl_u8(&vc4->bcl, tilew);
- cl_u8(&vc4->bcl, tileh);
- cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
+ cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
+ cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
+ cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
+ cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
+ cl_u8(&bcl, tilew);
+ cl_u8(&bcl, tileh);
+ cl_u8(&bcl, 0); /* flags, filled by kernel. */
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
+ cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
- cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
- VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+ cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
+ cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
+ VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
vc4->draw_call_queued = true;
vc4->draw_width = width;
vc4->draw_height = height;
+
+ cl_end(&vc4->bcl, bcl);
}
static void
@@ -167,28 +170,29 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
*/
uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
/* Emit the shader record. */
- cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
- cl_u16(&vc4->shader_rec,
+ struct vc4_cl_out *shader_rec =
+ cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
+ cl_u16(&shader_rec,
VC4_SHADER_FLAG_ENABLE_CLIPPING |
((info->mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex) ?
VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
- cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
- cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
+ cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
+ cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ cl_u16(&shader_rec, 0); /* vs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ cl_u16(&shader_rec, 0); /* cs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
uint32_t max_index = 0xffff;
uint32_t vpm_offset = 0;
@@ -202,11 +206,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
uint32_t elem_size =
util_format_get_blocksize(elem->src_format);
- cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
- cl_u8(&vc4->shader_rec, elem_size - 1);
- cl_u8(&vc4->shader_rec, vb->stride);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
+ cl_u8(&shader_rec, elem_size - 1);
+ cl_u8(&shader_rec, vb->stride);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
vpm_offset += align(elem_size, 4);
@@ -219,21 +223,23 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (vtx->num_elements == 0) {
assert(num_elements_emit == 1);
struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
- cl_reloc(vc4, &vc4->shader_rec, bo, 0);
- cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
- cl_u8(&vc4->shader_rec, 0); /* stride */
- cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
- cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
+ cl_u8(&shader_rec, 16 - 1); /* element size */
+ cl_u8(&shader_rec, 0); /* stride */
+ cl_u8(&shader_rec, 0); /* VS VPM offset */
+ cl_u8(&shader_rec, 0); /* CS VPM offset */
vc4_bo_unreference(&bo);
}
+ cl_end(&vc4->shader_rec, shader_rec);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
/* the actual draw call. */
- cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
+ cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
assert(vtx->num_elements <= 8);
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into shader_rec.
*/
- cl_u32(&vc4->bcl, num_elements_emit & 0x7);
+ cl_u32(&bcl, num_elements_emit & 0x7);
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
@@ -251,25 +257,26 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
struct vc4_resource *rsc = vc4_resource(prsc);
- cl_start_reloc(&vc4->bcl, 1);
- cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
- cl_u8(&vc4->bcl,
+ cl_start_reloc(&vc4->bcl, &bcl, 1);
+ cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
+ cl_u8(&bcl,
info->mode |
(index_size == 2 ?
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
- cl_u32(&vc4->bcl, info->count);
- cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
- cl_u32(&vc4->bcl, max_index);
+ cl_u32(&bcl, info->count);
+ cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
+ cl_u32(&bcl, max_index);
if (vc4->indexbuf.index_size == 4)
pipe_resource_reference(&prsc, NULL);
} else {
- cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
- cl_u8(&vc4->bcl, info->mode);
- cl_u32(&vc4->bcl, info->count);
- cl_u32(&vc4->bcl, info->start);
+ cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
+ cl_u8(&bcl, info->mode);
+ cl_u32(&bcl, info->count);
+ cl_u32(&bcl, info->start);
}
+ cl_end(&vc4->bcl, bcl);
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
vc4->resolve |= PIPE_CLEAR_DEPTH;
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index d2b54fccf91..f5925734415 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -28,6 +28,7 @@ vc4_emit_state(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
float *vpscale = vc4->viewport.scale;
float *vptranslate = vc4->viewport.translate;
@@ -40,11 +41,11 @@ vc4_emit_state(struct pipe_context *pctx)
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
- cl_u16(&vc4->bcl, minx);
- cl_u16(&vc4->bcl, miny);
- cl_u16(&vc4->bcl, maxx - minx);
- cl_u16(&vc4->bcl, maxy - miny);
+ cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
+ cl_u16(&bcl, minx);
+ cl_u16(&bcl, miny);
+ cl_u16(&bcl, maxx - minx);
+ cl_u16(&bcl, maxy - miny);
vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
@@ -53,47 +54,49 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
- cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[0] |
vc4->zsa->config_bits[0]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[1] |
vc4->zsa->config_bits[1]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
+ cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
+ cl_u16(&bcl, vc4->rasterizer->offset_factor);
+ cl_u16(&bcl, vc4->rasterizer->offset_units);
- cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
- cl_f(&vc4->bcl, vc4->rasterizer->point_size);
+ cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
+ cl_f(&bcl, vc4->rasterizer->point_size);
- cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
- cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
+ cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
+ cl_f(&bcl, vc4->rasterizer->base.line_width);
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
- cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
+ cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
+ cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.translate[2]);
- cl_f(&vc4->bcl, vc4->viewport.scale[2]);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
+ cl_f(&bcl, vc4->viewport.translate[2]);
+ cl_f(&bcl, vc4->viewport.scale[2]);
- cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
+ cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
- cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
+ cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
+ cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
vc4->prog.fs->color_inputs : 0);
}
+
+ cl_end(&vc4->bcl, bcl);
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a7aa3172a75..e61ea2170ff 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2530,13 +2530,14 @@ static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
static void
write_texture_p0(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
{
struct pipe_sampler_view *texture = texstate->textures[unit];
struct vc4_resource *rsc = vc4_resource(texture->texture);
- cl_reloc(vc4, &vc4->uniforms, rsc->bo,
+ cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo,
VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
VC4_SET_FIELD(texture->u.tex.last_level -
texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
@@ -2547,6 +2548,7 @@ write_texture_p0(struct vc4_context *vc4,
static void
write_texture_p1(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
{
@@ -2570,7 +2572,7 @@ write_texture_p1(struct vc4_context *vc4,
(sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
- cl_aligned_u32(&vc4->uniforms,
+ cl_aligned_u32(uniforms,
VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
VC4_SET_FIELD(texture->texture->height0 & 2047,
VC4_TEX_P1_HEIGHT) |
@@ -2589,6 +2591,7 @@ write_texture_p1(struct vc4_context *vc4,
static void
write_texture_p2(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t data)
{
@@ -2596,7 +2599,7 @@ write_texture_p2(struct vc4_context *vc4,
struct pipe_sampler_view *texture = texstate->textures[unit];
struct vc4_resource *rsc = vc4_resource(texture->texture);
- cl_aligned_u32(&vc4->uniforms,
+ cl_aligned_u32(uniforms,
VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
VC4_TEX_P2_PTYPE) |
VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
@@ -2613,6 +2616,7 @@ write_texture_p2(struct vc4_context *vc4,
static void
write_texture_border_color(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
struct vc4_texture_stateobj *texstate,
uint32_t unit)
{
@@ -2673,7 +2677,7 @@ write_texture_border_color(struct vc4_context *vc4,
}
}
- cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
+ cl_aligned_u32(uniforms, uc.ui[0]);
}
static uint32_t
@@ -2693,7 +2697,8 @@ get_texrect_scale(struct vc4_texture_stateobj *texstate,
}
static struct vc4_bo *
-vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+vc4_upload_ubo(struct vc4_context *vc4,
+ struct vc4_compiled_shader *shader,
const uint32_t *gallium_uniforms)
{
if (!shader->ubo_size)
@@ -2722,72 +2727,78 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
cl_ensure_space(&vc4->uniforms, (uinfo->count +
uinfo->num_texture_samples) * 4);
- cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
+ struct vc4_cl_out *uniforms =
+ cl_start_shader_reloc(&vc4->uniforms,
+ uinfo->num_texture_samples);
for (int i = 0; i < uinfo->count; i++) {
switch (uinfo->contents[i]) {
case QUNIFORM_CONSTANT:
- cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
break;
case QUNIFORM_UNIFORM:
- cl_aligned_u32(&vc4->uniforms,
+ cl_aligned_u32(&uniforms,
gallium_uniforms[uinfo->data[i]]);
break;
case QUNIFORM_VIEWPORT_X_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
+ cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
break;
case QUNIFORM_VIEWPORT_Y_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
+ cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
break;
case QUNIFORM_VIEWPORT_Z_OFFSET:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
+ cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
break;
case QUNIFORM_VIEWPORT_Z_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
+ cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
break;
case QUNIFORM_USER_CLIP_PLANE:
- cl_aligned_f(&vc4->uniforms,
+ cl_aligned_f(&uniforms,
vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
break;
case QUNIFORM_TEXTURE_CONFIG_P0:
- write_texture_p0(vc4, texstate, uinfo->data[i]);
+ write_texture_p0(vc4, &uniforms, texstate,
+ uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
- write_texture_p1(vc4, texstate, uinfo->data[i]);
+ write_texture_p1(vc4, &uniforms, texstate,
+ uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P2:
- write_texture_p2(vc4, texstate, uinfo->data[i]);
+ write_texture_p2(vc4, &uniforms, texstate,
+ uinfo->data[i]);
break;
case QUNIFORM_UBO_ADDR:
- cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
+ cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
break;
case QUNIFORM_TEXTURE_BORDER_COLOR:
- write_texture_border_color(vc4, texstate, uinfo->data[i]);
+ write_texture_border_color(vc4, &uniforms,
+ texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXRECT_SCALE_X:
case QUNIFORM_TEXRECT_SCALE_Y:
- cl_aligned_u32(&vc4->uniforms,
+ cl_aligned_u32(&uniforms,
get_texrect_scale(texstate,
uinfo->contents[i],
uinfo->data[i]));
break;
case QUNIFORM_BLEND_CONST_COLOR:
- cl_aligned_f(&vc4->uniforms,
+ cl_aligned_f(&uniforms,
CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
break;
case QUNIFORM_STENCIL:
- cl_aligned_u32(&vc4->uniforms,
+ cl_aligned_u32(&uniforms,
vc4->zsa->stencil_uniforms[uinfo->data[i]] |
(uinfo->data[i] <= 1 ?
(vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
@@ -2795,16 +2806,18 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
break;
case QUNIFORM_ALPHA_REF:
- cl_aligned_f(&vc4->uniforms,
+ cl_aligned_f(&uniforms,
vc4->zsa->base.alpha.ref_value);
break;
}
#if 0
- uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
shader, i, written_val, uif(written_val));
#endif
}
+
+ cl_end(&vc4->uniforms, uniforms);
}
static void