summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2016-10-26 12:46:58 -0700
committerEric Anholt <[email protected]>2017-06-30 12:25:45 -0700
commit4cef255872e8467aabce52938038a9d2bf27d9b2 (patch)
tree967db9b8853e6a5fb2b9609aafea981cc1b3cffc
parent7f80a9ff1312406dcffae88bf6dcaaf99ca9e3a1 (diff)
vc4: Start using the pack header.
This slightly inflates the size of the generated code, in exchange for getting us some convenient tools. before: 4389 0 0 4389 1125 src/gallium/drivers/vc4/.libs/vc4_draw.o 808 0 0 808 328 src/gallium/drivers/vc4/.libs/vc4_emit.o after: 4449 0 0 4449 1161 src/gallium/drivers/vc4/.libs/vc4_draw.o 988 0 0 988 3dc src/gallium/drivers/vc4/.libs/vc4_emit.o
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h63
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c49
-rw-r--r--src/gallium/drivers/vc4/vc4_emit.c65
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c4
4 files changed, 130 insertions, 51 deletions
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 74bf8cfcaaa..bec177cd03b 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -40,6 +40,27 @@ struct vc4_job;
*/
struct vc4_cl_out;
+/** A reference to a BO used in the CL packing functions */
+struct vc4_cl_reloc {
+ struct vc4_bo *bo;
+ uint32_t offset;
+};
+
+/* We don't call anything that packs a reloc yet, so don't implement it. */
+static inline void cl_pack_emit_reloc(void *cl, const struct vc4_cl_reloc *reloc)
+{
+ abort();
+}
+
+/* We don't use the data arg yet */
+#define __gen_user_data void
+#define __gen_address_type struct vc4_cl_reloc
+#define __gen_address_offset(reloc) ((reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+
+#include "kernel/vc4_packet.h"
+#include "broadcom/cle/v3d_packet_v21_pack.h"
+
struct vc4_cl {
void *base;
struct vc4_cl_out *next;
@@ -205,4 +226,46 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
+#define cl_packet_header(packet) V3D21_ ## packet ## _header
+#define cl_packet_length(packet) V3D21_ ## packet ## _length
+#define cl_packet_pack(packet) V3D21_ ## packet ## _pack
+#define cl_packet_struct(packet) V3D21_ ## packet
+
+static inline void *
+cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
+{
+ void *addr = *cl;
+ cl_advance(cl, size);
+ return addr;
+}
+
+/* Macro for setting up an emit of a CL struct. A temporary unpacked struct
+ * is created, which you get to set fields in of the form:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
+ * .flags.flat_shade_flags = 1 << 2,
+ * }
+ *
+ * or default values only can be emitted with just:
+ *
+ * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
+ *
+ * The trick here is that we make a for loop that will execute the body
+ * (either the block or the ';' after the macro invocation) exactly once.
+ * Also, *dst is actually of the wrong type, it's the
+ * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
+ */
+#define cl_emit(cl_out, packet, name) \
+ for (struct cl_packet_struct(packet) name = { \
+ cl_packet_header(packet) \
+ }, \
+ *_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
+ __builtin_expect(_dst != NULL, 1); \
+ ({ \
+ cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \
+ VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \
+ cl_packet_length(packet))); \
+ _dst = NULL; \
+ })) \
+
#endif /* VC4_CL_H */
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 0aee73ed10b..4b3fa8ab8ff 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -82,32 +82,28 @@ vc4_start_draw(struct vc4_context *vc4)
vc4_get_draw_cl_space(job, 0);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
- // Tile state data is 48 bytes per tile, I think it can be thrown away
- // as soon as binning is finished.
- cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
- cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
- cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
- cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
- cl_u8(&bcl, job->draw_tiles_x);
- cl_u8(&bcl, job->draw_tiles_y);
- /* Other flags are filled by kernel. */
- cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
+ cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
+ bin.width_in_tiles = job->draw_tiles_x;
+ bin.height_in_tiles = job->draw_tiles_y;
+ bin.multisample_mode_4x = job->msaa;
+ }
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
+ cl_emit(&bcl, START_TILE_BINNING, start);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
- cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
- VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+ cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
+ list.data_type = _16_BIT_INDEX;
+ list.primitive_type = TRIANGLES_LIST;
+ }
job->needs_flush = true;
job->draw_width = vc4->framebuffer.width;
@@ -221,13 +217,15 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
cl_end(&job->shader_rec, shader_rec);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
- /* the actual draw call. */
- cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
- assert(vtx->num_elements <= 8);
- /* Note that number of attributes == 0 in the packet means 8
- * attributes. This field also contains the offset into shader_rec.
- */
- cl_u32(&bcl, num_elements_emit & 0x7);
+ cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
+ /* Note that number of attributes == 0 in the packet means 8
+ * attributes. This field also contains the offset into
+ * shader_rec.
+ */
+ assert(vtx->num_elements <= 8);
+ shader_state.number_of_attribute_arrays =
+ num_elements_emit & 0x7;
+ }
cl_end(&job->bcl, bcl);
vc4_write_uniforms(vc4, vc4->prog.fs,
@@ -436,10 +434,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
}
- cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
- cl_u8(&bcl, info->mode);
- cl_u32(&bcl, this_count);
- cl_u32(&bcl, start);
+ cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
+ array.primitive_mode = info->mode;
+ array.length = this_count;
+ array.index_of_first_vertex = start;
+ }
job->draw_calls_queued++;
count -= step;
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index b48d89a0604..9fc266e5baa 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -60,11 +60,12 @@ vc4_emit_state(struct pipe_context *pctx)
maxy = MIN2(vp_maxy, vc4->scissor.maxy);
}
- cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
- cl_u16(&bcl, minx);
- cl_u16(&bcl, miny);
- cl_u16(&bcl, maxx - minx);
- cl_u16(&bcl, maxy - miny);
+ cl_emit(&bcl, CLIP_WINDOW, clip) {
+ clip.clip_window_left_pixel_coordinate = minx;
+ clip.clip_window_bottom_pixel_coordinate = miny;
+ clip.clip_window_height_in_pixels = maxy - miny;
+ clip.clip_window_width_in_pixels = maxx - minx;
+ }
job->draw_min_x = MIN2(job->draw_min_x, minx);
job->draw_min_y = MIN2(job->draw_min_y, miny);
@@ -113,35 +114,51 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
- cl_u16(&bcl, vc4->rasterizer->offset_factor);
- cl_u16(&bcl, vc4->rasterizer->offset_units);
+ cl_emit(&bcl, DEPTH_OFFSET, depth) {
+ depth.depth_offset_units =
+ vc4->rasterizer->offset_units;
+ depth.depth_offset_factor =
+ vc4->rasterizer->offset_factor;
+ }
- cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
- cl_f(&bcl, vc4->rasterizer->point_size);
+ cl_emit(&bcl, POINT_SIZE, points) {
+ points.point_size = vc4->rasterizer->point_size;
+ }
- cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
- cl_f(&bcl, vc4->rasterizer->base.line_width);
+ cl_emit(&bcl, LINE_WIDTH, points) {
+ points.line_width = vc4->rasterizer->base.line_width;
+ }
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
- cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
- cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
+ cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
+ clip.viewport_half_width_in_1_16th_of_pixel =
+ vc4->viewport.scale[0] * 16.0f;
+ clip.viewport_half_height_in_1_16th_of_pixel =
+ vc4->viewport.scale[1] * 16.0f;
+ }
- cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
- cl_f(&bcl, vc4->viewport.translate[2]);
- cl_f(&bcl, vc4->viewport.scale[2]);
+ cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ clip.viewport_z_offset_zc_to_zs =
+ vc4->viewport.translate[2];
+ clip.viewport_z_scale_zc_to_zs =
+ vc4->viewport.scale[2];
+ }
- cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
- cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
- cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
+ cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
+ vp.viewport_centre_x_coordinate =
+ 16 * vc4->viewport.translate[0];
+ vp.viewport_centre_y_coordinate =
+ 16 * vc4->viewport.translate[1];
+ }
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
- cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
- vc4->prog.fs->color_inputs : 0);
+ cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
+ if (vc4->rasterizer->base.flatshade)
+ flags.flat_shading_flags =
+ vc4->prog.fs->color_inputs;
+ }
}
cl_end(&job->bcl, bcl);
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index d39472ef131..afdac8c991d 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -378,11 +378,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
*/
cl_ensure_space(&job->bcl, 8);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
- cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+ cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
/* The FLUSH caps all of our bin lists with a
* VC4_PACKET_RETURN.
*/
- cl_u8(&bcl, VC4_PACKET_FLUSH);
+ cl_emit(&bcl, FLUSH, flush);
cl_end(&job->bcl, bcl);
}
struct drm_vc4_submit_cl submit = {