summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-12-21 13:10:25 -0800
committerEric Anholt <[email protected]>2014-12-24 08:28:33 -1000
commit20e3a2430e0435b8ee4861553e5acd13c58cf90f (patch)
treee83995a22e06c3e8f7712780440b3374f6b0cfdd
parent4616b2ef850cb2b3e33005809ed77a697afc4186 (diff)
vc4: Avoid repeated hindex lookups in the loop over tiles.
Improves norast performance of a microbenchmark by 11.1865% +/- 2.37673% (n=20).
-rw-r--r--src/gallium/drivers/vc4/vc4_cl.h12
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c27
2 files changed, 24 insertions, 15 deletions
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 634a4b0a421..86cd0c797a6 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -117,10 +117,9 @@ cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
}
static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
+cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
{
- *(uint32_t *)(cl->base + cl->reloc_next) = vc4_gem_hindex(vc4, bo);
+ *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
cl->reloc_next += 4;
cl->reloc_count--;
@@ -128,4 +127,11 @@ cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
cl_u32(cl, offset);
}
+static inline void
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+ struct vc4_bo *bo, uint32_t offset)
+{
+ cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+}
+
#endif /* VC4_CL_H */
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index e49d6549929..906af05b44b 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -119,7 +119,6 @@ vc4_setup_rcl(struct vc4_context *vc4)
*/
struct vc4_surface *render_surf = csurf ? csurf : zsurf;
struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
-
cl_start_reloc(&vc4->rcl, 1);
cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
@@ -152,6 +151,10 @@ vc4_setup_rcl(struct vc4_context *vc4)
cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
}
+ uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
+ uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
+ uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
+
for (int y = 0; y < ytiles; y++) {
for (int x = 0; x < xtiles; x++) {
bool end_of_frame = (x == xtiles - 1 &&
@@ -175,8 +178,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
vc4_rt_format_is_565(csurf->base.format) ?
VC4_LOADSTORE_TILE_BUFFER_BGR565 :
VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
- cl_reloc(vc4, &vc4->rcl, ctex->bo,
- csurf->offset);
+ cl_reloc_hindex(&vc4->rcl, color_hindex,
+ csurf->offset);
vc4_tile_coordinates(vc4, x, y, &coords_emitted);
}
@@ -191,8 +194,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
(zsurf->tiling <<
VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
cl_u8(&vc4->rcl, 0);
- cl_reloc(vc4, &vc4->rcl, ztex->bo,
- zsurf->offset);
+ cl_reloc_hindex(&vc4->rcl, depth_hindex,
+ zsurf->offset);
vc4_tile_coordinates(vc4, x, y, &coords_emitted);
}
@@ -211,8 +214,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
cl_start_reloc(&vc4->rcl, 1);
cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
- cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
- (y * xtiles + x) * 32);
+ cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
+ (y * xtiles + x) * 32);
if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
vc4_tile_coordinates(vc4, x, y, &coords_emitted);
@@ -225,11 +228,11 @@ vc4_setup_rcl(struct vc4_context *vc4)
VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
cl_u8(&vc4->rcl,
VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
- cl_reloc(vc4, &vc4->rcl, ztex->bo,
- zsurf->offset |
- ((end_of_frame &&
- !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
- VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+ cl_reloc_hindex(&vc4->rcl, depth_hindex,
+ zsurf->offset |
+ ((end_of_frame &&
+ !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
+ VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
coords_emitted = false;
}