summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-06-01 12:50:49 -0700
committerEric Anholt <[email protected]>2015-06-09 12:25:50 -0700
commite67b12eaf89acc9c446de77b77120a2f6cdbbe12 (patch)
tree9cafb89e5dc133cf5e8f1accce07d9a874e7ee92 /src/gallium/drivers/vc4
parentc5e11e5f7f67fe5a1d28b1446f87af7aa3ba68d8 (diff)
vc4: Update to current kernel validation code.
After profiling on real hardware, I found a few ways to cut down the kernel overhead.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_drv.h3
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate.c68
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.h1
3 files changed, 37 insertions, 35 deletions
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 325f944bf25..dede7162c42 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -89,7 +89,8 @@ struct vc4_exec_info {
bool found_wait_on_semaphore_packet;
uint8_t bin_tiles_x, bin_tiles_y;
uint32_t fb_width, fb_height;
- uint32_t tile_alloc_init_block_size;
+ uint32_t tile_alloc_init_block_mask;
+ uint32_t tile_alloc_init_block_last;
struct drm_gem_cma_object *tile_alloc_bo;
/**
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index 2d04a4a7b9a..2b57ca0b4b0 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -156,24 +156,30 @@ check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
uint32_t utile_w = utile_width(cpp);
uint32_t utile_h = utile_height(cpp);
- /* The values are limited by the packet/texture parameter bitfields,
- * so we don't need to worry as much about integer overflow.
+ /* The shaded vertex format stores signed 12.4 fixed point
+ * (-2048,2047) offsets from the viewport center, so we should
+ * never have a render target larger than 4096. The texture
+ * unit can only sample from 2048x2048, so it's even more
+ * restricted. This lets us avoid worrying about overflow in
+ * our math.
*/
- BUG_ON(width > 65535);
- BUG_ON(height > 65535);
+ if (width > 4096 || height > 4096) {
+ DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+ return false;
+ }
switch (tiling_format) {
case VC4_TILING_FORMAT_LINEAR:
- aligned_width = roundup(width, utile_w);
+ aligned_width = round_up(width, utile_w);
aligned_height = height;
break;
case VC4_TILING_FORMAT_T:
- aligned_width = roundup(width, utile_w * 8);
- aligned_height = roundup(height, utile_h * 8);
+ aligned_width = round_up(width, utile_w * 8);
+ aligned_height = round_up(height, utile_h * 8);
break;
case VC4_TILING_FORMAT_LT:
- aligned_width = roundup(width, utile_w);
- aligned_height = roundup(height, utile_h);
+ aligned_width = round_up(width, utile_w);
+ aligned_height = round_up(height, utile_h);
break;
default:
DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
@@ -181,13 +187,6 @@ check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
}
stride = aligned_width * cpp;
-
- if (INT_MAX / stride < aligned_height) {
- DRM_ERROR("Overflow in fbo size (%dx%d -> %dx%d)\n",
- width, height,
- aligned_width, aligned_height);
- return false;
- }
size = stride * aligned_height;
if (size + offset < size ||
@@ -269,14 +268,11 @@ validate_wait_on_semaphore(VALIDATE_ARGS)
static int
validate_branch_to_sublist(VALIDATE_ARGS)
{
- struct drm_gem_cma_object *target;
uint32_t offset;
- if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &target))
- return -EINVAL;
-
- if (target != exec->tile_alloc_bo) {
- DRM_ERROR("Jumping to BOs other than tile alloc unsupported\n");
+ if (!exec->tile_alloc_bo) {
+ DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST seen before "
+ "binner setup\n");
return -EINVAL;
}
@@ -286,15 +282,14 @@ validate_branch_to_sublist(VALIDATE_ARGS)
}
offset = *(uint32_t *)(untrusted + 0);
- if (offset % exec->tile_alloc_init_block_size ||
- offset / exec->tile_alloc_init_block_size >=
- exec->bin_tiles_x * exec->bin_tiles_y) {
+ if (offset & exec->tile_alloc_init_block_mask ||
+ offset > exec->tile_alloc_init_block_last) {
DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial "
"tile allocation space.\n");
return -EINVAL;
}
- *(uint32_t *)(validated + 0) = target->paddr + offset;
+ *(uint32_t *)(validated + 0) = exec->tile_alloc_bo->paddr + offset;
return 0;
}
@@ -496,6 +491,7 @@ validate_tile_binning_config(VALIDATE_ARGS)
struct drm_gem_cma_object *tile_state_data_array;
uint8_t flags;
uint32_t tile_allocation_size;
+ uint32_t tile_alloc_init_block_size;
if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) ||
!vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array))
@@ -547,15 +543,19 @@ validate_tile_binning_config(VALIDATE_ARGS)
*(uint32_t *)validated = tile_allocation->paddr;
exec->tile_alloc_bo = tile_allocation;
- exec->tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3));
+ tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3));
if (exec->bin_tiles_x * exec->bin_tiles_y *
- exec->tile_alloc_init_block_size > tile_allocation_size) {
+ tile_alloc_init_block_size > tile_allocation_size) {
DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n",
exec->bin_tiles_x * exec->bin_tiles_y *
- exec->tile_alloc_init_block_size,
+ tile_alloc_init_block_size,
tile_allocation_size);
return -EINVAL;
}
+ exec->tile_alloc_init_block_mask = tile_alloc_init_block_size - 1;
+ exec->tile_alloc_init_block_last = tile_alloc_init_block_size *
+ (exec->bin_tiles_x * exec->bin_tiles_y - 1);
+
if (*(uint32_t *)(untrusted + 8) != 0) {
DRM_ERROR("TSDA offset != 0 unsupported\n");
return -EINVAL;
@@ -927,15 +927,15 @@ reloc_tex(struct vc4_exec_info *exec,
switch (tiling_format) {
case VC4_TILING_FORMAT_T:
- aligned_width = roundup(level_width, utile_w * 8);
- aligned_height = roundup(level_height, utile_h * 8);
+ aligned_width = round_up(level_width, utile_w * 8);
+ aligned_height = round_up(level_height, utile_h * 8);
break;
case VC4_TILING_FORMAT_LT:
- aligned_width = roundup(level_width, utile_w);
- aligned_height = roundup(level_height, utile_h);
+ aligned_width = round_up(level_width, utile_w);
+ aligned_height = round_up(level_height, utile_h);
break;
default:
- aligned_width = roundup(level_width, utile_w);
+ aligned_width = round_up(level_width, utile_w);
aligned_height = level_height;
break;
}
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 1f0c6b67c0f..a1903269a20 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -43,6 +43,7 @@ struct vc4_exec_info;
#define kfree(ptr) free(ptr)
#define krealloc(ptr, size, args) realloc(ptr, size)
#define roundup(x, y) align(x, y)
+#define round_up(x, y) align(x, y)
#define max(x, y) MAX2(x, y)
#define min(x, y) MiN2(x, y)
#define BUG_ON(condition) assert(!(condition))