summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-07-21 11:27:35 -0700
committerEric Anholt <[email protected]>2014-08-11 14:45:28 -0700
commita8f2bf0f51222a96a49dfb3d6f9b36d3e54d08cd (patch)
tree5f7ab98c9ba5a69d761b253560908ac9c2358a81 /src
parent6a5ece12aac0f079dff3aaf00b2ec5c420b9b1f8 (diff)
vc4: Rewrite the kernel ABI to support texture uniform relocation.
This required building a shader parser that would walk the program to find where the texturing-related uniforms are in the uniforms stream. Note that as of this commit, a new kernel is required for rendering on actual VC4 hardware (currently that commit is named "drm/vc4: Introduce shader validation and better command stream validation.", but is likely to be squashed as part of an eventual merge of the kernel driver).
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c3
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h7
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c22
-rw-r--r--src/gallium/drivers/vc4/vc4_drm.h23
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c82
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c37
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.c133
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.h63
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c334
10 files changed, 608 insertions, 97 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index ee351835896..414a64ab472 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -17,5 +17,6 @@ C_SOURCES := \
vc4_screen.c \
vc4_simulator.c \
vc4_simulator_validate.c \
+ vc4_simulator_validate_shaders.c \
vc4_state.c \
$()
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index a9fa7ef70f1..08e85ed6312 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -107,6 +107,8 @@ vc4_flush(struct pipe_context *pctx)
submit.shader_records = vc4->shader_rec.base;
submit.shader_record_len = vc4->shader_rec.next - vc4->shader_rec.base;
submit.shader_record_count = vc4->shader_rec_count;
+ submit.uniforms = vc4->uniforms.base;
+ submit.uniforms_len = vc4->uniforms.next - vc4->uniforms.base;
if (!(vc4_debug & VC4_DEBUG_NORAST)) {
int ret;
@@ -123,6 +125,7 @@ vc4_flush(struct pipe_context *pctx)
vc4_reset_cl(&vc4->bcl);
vc4_reset_cl(&vc4->rcl);
vc4_reset_cl(&vc4->shader_rec);
+ vc4_reset_cl(&vc4->uniforms);
vc4_reset_cl(&vc4->bo_handles);
#ifdef USE_VC4_SIMULATOR
vc4_reset_cl(&vc4->bo_pointers);
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index ee9ddcfd82b..010727ff4de 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -70,6 +70,7 @@ struct vc4_shader_uniform_info {
enum quniform_contents *contents;
uint32_t *data;
uint32_t count;
+ uint32_t num_texture_samples;
};
struct vc4_compiled_shader {
@@ -120,6 +121,7 @@ struct vc4_context {
struct vc4_cl bcl;
struct vc4_cl rcl;
struct vc4_cl shader_rec;
+ struct vc4_cl uniforms;
struct vc4_cl bo_handles;
#ifdef USE_VC4_SIMULATOR
struct vc4_cl bo_pointers;
@@ -195,12 +197,11 @@ int vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args,
struct vc4_surface *color_surf);
-void vc4_get_uniform_bo(struct vc4_context *vc4,
+void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
struct vc4_texture_stateobj *texstate,
- int shader_index, struct vc4_bo **out_bo,
- uint32_t *out_offset);
+ int shader_index);
void vc4_flush(struct pipe_context *pctx);
void vc4_emit_state(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index d5628d0d3ca..8559bf3b2fe 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -162,40 +162,38 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
// Shader Record
- struct vc4_bo *fs_ubo, *vs_ubo, *cs_ubo;
- uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset;
- vc4_get_uniform_bo(vc4, vc4->prog.fs,
+ vc4_write_uniforms(vc4, vc4->prog.fs,
&vc4->constbuf[PIPE_SHADER_FRAGMENT],
&vc4->fragtex,
- 0, &fs_ubo, &fs_ubo_offset);
- vc4_get_uniform_bo(vc4, vc4->prog.vs,
+ 0);
+ vc4_write_uniforms(vc4, vc4->prog.vs,
&vc4->constbuf[PIPE_SHADER_VERTEX],
&vc4->verttex,
- 0, &vs_ubo, &vs_ubo_offset);
- vc4_get_uniform_bo(vc4, vc4->prog.vs,
+ 0);
+ vc4_write_uniforms(vc4, vc4->prog.vs,
&vc4->constbuf[PIPE_SHADER_VERTEX],
&vc4->verttex,
- 1, &cs_ubo, &cs_ubo_offset);
+ 1);
- cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements);
+ cl_start_shader_reloc(&vc4->shader_rec, 3 + vtx->num_elements);
cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING);
cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
- cl_reloc(vc4, &vc4->shader_rec, fs_ubo, fs_ubo_offset);
+ cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* vs attribute array bitfield */
cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
- cl_reloc(vc4, &vc4->shader_rec, vs_ubo, vs_ubo_offset);
+ cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
cl_u8(&vc4->shader_rec, (1 << vtx->num_elements) - 1); /* cs attribute array bitfield */
cl_u8(&vc4->shader_rec, 16 * vtx->num_elements); /* vs total attribute size */
cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo,
vc4->prog.vs->coord_shader_offset);
- cl_reloc(vc4, &vc4->shader_rec, cs_ubo, cs_ubo_offset);
+ cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
for (int i = 0; i < vtx->num_elements; i++) {
struct pipe_vertex_element *elem = &vtx->pipe[i];
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index b958f1d03d0..cc4c735d881 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -74,6 +74,21 @@ struct drm_vc4_submit_cl {
*/
void __user *shader_records;
+ /* Pointer to uniform data and texture handles for the textures
+ * referenced by the shader.
+ *
+ * For each shader state record, there is a set of uniform data in the
+ * order referenced by the record (FS, VS, then CS). Each set of
+ * uniform data has a uint32_t index into bo_handles per texture
+ * sample operation, in the order the QPU_W_TMUn_S writes appear in
+ * the program. Following the texture BO handle indices is the actual
+ * uniform data.
+ *
+ * The individual uniform state blocks don't have sizes passed in,
+ * because the kernel has to determine the sizes anyway during shader
+ * code validation.
+ */
+ void __user *uniforms;
void __user *bo_handles;
/* Size in bytes of the binner command list. */
@@ -84,11 +99,13 @@ struct drm_vc4_submit_cl {
uint32_t shader_record_len;
/* Number of shader records.
*
- * This could just be computed from the contents of shader_records,
- * but it keeps the kernel from having to resize various allocations
- * it makes.
+ * This could just be computed from the contents of shader_records and
+ * the address bits of references to them from the bin CL, but it
+ * keeps the kernel from having to resize some allocations it makes.
*/
uint32_t shader_record_count;
+ /** Size in bytes of the uniform state. */
+ uint32_t uniforms_len;
/* Number of BO handles passed in (size is that times 4). */
uint32_t bo_handle_count;
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 15e1ff25b04..b7ed1bf60a0 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -57,6 +57,7 @@ struct tgsi_to_qir {
enum quniform_contents *uniform_contents;
uint32_t num_uniforms;
uint32_t num_outputs;
+ uint32_t num_texture_samples;
};
struct vc4_key {
@@ -332,6 +333,7 @@ tgsi_to_qir_tex(struct tgsi_to_qir *trans,
qir_TEX_S(c, s, sampler_p1);
}
+ trans->num_texture_samples++;
qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef));
for (int i = 0; i < 4; i++) {
@@ -938,6 +940,7 @@ copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
uinfo->contents = malloc(count * sizeof(*uinfo->contents));
memcpy(uinfo->contents, trans->uniform_contents,
count * sizeof(*uinfo->contents));
+ uinfo->num_texture_samples = trans->num_texture_samples;
}
static void
@@ -1141,26 +1144,23 @@ static uint32_t translate_wrap(uint32_t p_wrap)
}
}
-static uint32_t
-get_texture_p0(struct vc4_texture_stateobj *texstate,
- uint32_t tex_and_sampler)
+static void
+write_texture_p0(struct vc4_context *vc4,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t tex_and_sampler)
{
uint32_t texi = (tex_and_sampler >> 0) & 0xff;
struct pipe_sampler_view *texture = texstate->textures[texi];
struct vc4_resource *rsc = vc4_resource(texture->texture);
- return (texture->u.tex.last_level |
-#if USE_VC4_SIMULATOR
- simpenrose_hw_addr(rsc->bo->map) /* XXX */
-#else
- 0 /* XXX */
-#endif
- /* XXX: data type */);
+ cl_reloc(vc4, &vc4->uniforms, rsc->bo,
+ texture->u.tex.last_level);
}
-static uint32_t
-get_texture_p1(struct vc4_texture_stateobj *texstate,
- uint32_t tex_and_sampler)
+static void
+write_texture_p1(struct vc4_context *vc4,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t tex_and_sampler)
{
uint32_t texi = (tex_and_sampler >> 0) & 0xff;
uint32_t sampi = (tex_and_sampler >> 8) & 0xff;
@@ -1176,14 +1176,15 @@ get_texture_p1(struct vc4_texture_stateobj *texstate,
[PIPE_TEX_FILTER_LINEAR] = 0,
};
- return ((1 << 31) /* XXX: data type */|
- (texture->texture->height0 << 20) |
- (texture->texture->width0 << 8) |
- (imgfilter_map[sampler->mag_img_filter] << 7) |
- ((imgfilter_map[sampler->min_img_filter] +
- mipfilter_map[sampler->min_mip_filter]) << 4) |
- (translate_wrap(sampler->wrap_t) << 2) |
- (translate_wrap(sampler->wrap_s) << 0));
+ cl_u32(&vc4->uniforms,
+ (1 << 31) /* XXX: data type */|
+ (texture->texture->height0 << 20) |
+ (texture->texture->width0 << 8) |
+ (imgfilter_map[sampler->mag_img_filter] << 7) |
+ ((imgfilter_map[sampler->min_img_filter] +
+ mipfilter_map[sampler->min_mip_filter]) << 4) |
+ (translate_wrap(sampler->wrap_t) << 2) |
+ (translate_wrap(sampler->wrap_s) << 0));
}
static uint32_t
@@ -1203,56 +1204,57 @@ get_texrect_scale(struct vc4_texture_stateobj *texstate,
}
void
-vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
struct vc4_texture_stateobj *texstate,
- int shader_index, struct vc4_bo **out_bo,
- uint32_t *out_offset)
+ int shader_index)
{
struct vc4_shader_uniform_info *uinfo = &shader->uniforms[shader_index];
- struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen,
- MAX2(1, uinfo->count * 4), "ubo");
- uint32_t *map = vc4_bo_map(ubo);
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+
+ cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
for (int i = 0; i < uinfo->count; i++) {
switch (uinfo->contents[i]) {
case QUNIFORM_CONSTANT:
- map[i] = uinfo->data[i];
+ cl_u32(&vc4->uniforms, uinfo->data[i]);
break;
case QUNIFORM_UNIFORM:
- map[i] = ((uint32_t *)cb->cb[0].user_buffer)[uinfo->data[i]];
+ cl_u32(&vc4->uniforms,
+ gallium_uniforms[uinfo->data[i]]);
break;
case QUNIFORM_VIEWPORT_X_SCALE:
- map[i] = fui(vc4->framebuffer.width * 16.0f / 2.0f);
+ cl_u32(&vc4->uniforms, fui(vc4->framebuffer.width *
+ 16.0f / 2.0f));
break;
case QUNIFORM_VIEWPORT_Y_SCALE:
- map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
+ cl_u32(&vc4->uniforms, fui(vc4->framebuffer.height *
+ -16.0f / 2.0f));
break;
case QUNIFORM_TEXTURE_CONFIG_P0:
- map[i] = get_texture_p0(texstate, uinfo->data[i]);
+ write_texture_p0(vc4, texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXTURE_CONFIG_P1:
- map[i] = get_texture_p1(texstate, uinfo->data[i]);
+ write_texture_p1(vc4, texstate, uinfo->data[i]);
break;
case QUNIFORM_TEXRECT_SCALE_X:
case QUNIFORM_TEXRECT_SCALE_Y:
- map[i] = get_texrect_scale(texstate,
- uinfo->contents[i],
- uinfo->data[i]);
+ cl_u32(&vc4->uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
break;
}
#if 0
+ uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
- shader, shader_index, i, map[i], uif(map[i]));
+ shader, shader_index, i, written_val, uif(written_val));
#endif
}
-
- *out_bo = ubo;
- *out_offset = 0;
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 2b59aa53f5a..0dada687911 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -63,9 +63,9 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
}
static int
-vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
- struct exec_info *exec)
+vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
{
+ struct drm_vc4_submit_cl *args = exec->args;
struct vc4_context *vc4 = dev->vc4;
struct vc4_bo **bos = vc4->bo_pointers.base;
@@ -84,8 +84,7 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
}
static int
-vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
- struct exec_info *exec)
+vc4_simulator_unpin_bos(struct exec_info *exec)
{
for (int i = 0; i < exec->bo_count; i++) {
struct drm_gem_cma_object *obj = exec->bo[i];
@@ -102,9 +101,9 @@ vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
}
static int
-vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
- struct exec_info *exec)
+vc4_cl_validate(struct drm_device *dev, struct exec_info *exec)
{
+ struct drm_vc4_submit_cl *args = exec->args;
void *temp = NULL;
void *bin, *render, *shader_rec;
int ret = 0;
@@ -112,12 +111,14 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
uint32_t render_offset = bin_offset + args->bin_cl_len;
uint32_t shader_rec_offset = roundup(render_offset +
args->render_cl_len, 16);
- uint32_t exec_size = shader_rec_offset + args->shader_record_len;
+ uint32_t uniforms_offset = shader_rec_offset + args->shader_record_len;
+ uint32_t exec_size = uniforms_offset + args->uniforms_len;
uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
args->shader_record_count);
if (shader_rec_offset < render_offset ||
- exec_size < shader_rec_offset ||
+ uniforms_offset < shader_rec_offset ||
+ exec_size < uniforms_offset ||
args->shader_record_count >= (UINT_MAX /
sizeof(struct vc4_shader_state)) ||
temp_size < exec_size) {
@@ -142,6 +143,7 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
bin = temp + bin_offset;
render = temp + render_offset;
shader_rec = temp + shader_rec_offset;
+ exec->uniforms_u = temp + uniforms_offset;
exec->shader_state = temp + exec_size;
exec->shader_state_size = args->shader_record_count;
@@ -164,6 +166,13 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
goto fail;
}
+ ret = copy_from_user(exec->uniforms_u, args->uniforms,
+ args->uniforms_len);
+ if (ret) {
+ DRM_ERROR("Failed to copy in uniforms cl\n");
+ goto fail;
+ }
+
exec->exec_bo = drm_gem_cma_create(dev, exec_size);
#if 0
if (IS_ERR(exec->exec_bo)) {
@@ -180,6 +189,10 @@ vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
exec->ct1ea = exec->ct1ca + args->render_cl_len;
exec->shader_paddr = exec->exec_bo->paddr + shader_rec_offset;
+ exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
+ exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
+ exec->uniforms_size = args->uniforms_len;
+
ret = vc4_validate_cl(dev,
exec->exec_bo->vaddr + bin_offset,
bin,
@@ -243,18 +256,20 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args,
}
}
- ret = vc4_simulator_pin_bos(dev, args, &exec);
+ exec.args = args;
+
+ ret = vc4_simulator_pin_bos(dev, &exec);
if (ret)
return ret;
- ret = vc4_cl_validate(dev, args, &exec);
+ ret = vc4_cl_validate(dev, &exec);
if (ret)
return ret;
simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
- ret = vc4_simulator_unpin_bos(args, &exec);
+ ret = vc4_simulator_unpin_bos(&exec);
if (ret)
return ret;
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c
index 14701b171c7..a67e2345b11 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.c
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c
@@ -347,6 +347,30 @@ vc4_validate_cl(struct drm_device *dev,
return 0;
}
+static bool
+reloc_tex(struct exec_info *exec,
+ void *uniform_data_u,
+ struct vc4_texture_sample_info *sample,
+ uint32_t texture_handle_index)
+
+{
+ struct drm_gem_cma_object *tex;
+ uint32_t unvalidated_p0 = *(uint32_t *)(uniform_data_u +
+ sample->p_offset[0]);
+ uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+
+ if (texture_handle_index >= exec->bo_count) {
+ DRM_ERROR("texture handle index %d >= %d\n",
+ texture_handle_index, exec->bo_count);
+ return false;
+ }
+ tex = exec->bo[texture_handle_index];
+
+ *validated_p0 = tex->paddr + unvalidated_p0;
+
+ return true;
+}
+
static int
validate_shader_rec(struct drm_device *dev,
struct exec_info *exec,
@@ -358,45 +382,54 @@ validate_shader_rec(struct drm_device *dev,
uint32_t *src_handles = unvalidated;
void *src_pkt;
void *dst_pkt = validated;
- static const int gl_bo_offsets[] = {
- 4, 8, /* fs code, ubo */
- 16, 20, /* vs code, ubo */
- 28, 32, /* cs code, ubo */
+ enum shader_rec_reloc_type {
+ RELOC_CODE,
+ RELOC_VBO,
+ };
+ struct shader_rec_reloc {
+ enum shader_rec_reloc_type type;
+ uint32_t offset;
+ };
+ static const struct shader_rec_reloc gl_relocs[] = {
+ { RELOC_CODE, 4 }, /* fs */
+ { RELOC_CODE, 16 }, /* vs */
+ { RELOC_CODE, 28 }, /* cs */
};
- static const int nv_bo_offsets[] = {
- 4, 8, /* fs code, ubo */
- 12, /* vbo */
+ static const struct shader_rec_reloc nv_relocs[] = {
+ { RELOC_CODE, 4 }, /* fs */
+ { RELOC_VBO, 12 }
};
- struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_bo_offsets) + 8];
- const int *bo_offsets;
- uint32_t nr_attributes = 0, nr_bo, packet_size;
+ const struct shader_rec_reloc *relocs;
+ struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
+ uint32_t nr_attributes = 0, nr_relocs, packet_size;
int i;
+ struct vc4_validated_shader_info *validated_shader = NULL;
if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
- bo_offsets = nv_bo_offsets;
- nr_bo = ARRAY_SIZE(nv_bo_offsets);
+ relocs = nv_relocs;
+ nr_relocs = ARRAY_SIZE(nv_relocs);
packet_size = 16;
} else {
- bo_offsets = gl_bo_offsets;
- nr_bo = ARRAY_SIZE(gl_bo_offsets);
+ relocs = gl_relocs;
+ nr_relocs = ARRAY_SIZE(gl_relocs);
nr_attributes = state->addr & 0x7;
if (nr_attributes == 0)
nr_attributes = 8;
packet_size = 36 + nr_attributes * 8;
}
- if ((nr_bo + nr_attributes) * 4 + packet_size > len) {
+ if ((nr_relocs + nr_attributes) * 4 + packet_size > len) {
DRM_ERROR("overflowed shader packet read "
"(handles %d, packet %d, len %d)\n",
- (nr_bo + nr_attributes) * 4, packet_size, len);
+ (nr_relocs + nr_attributes) * 4, packet_size, len);
return -EINVAL;
}
- src_pkt = unvalidated + 4 * (nr_bo + nr_attributes);
+ src_pkt = unvalidated + 4 * (nr_relocs + nr_attributes);
memcpy(dst_pkt, src_pkt, packet_size);
- for (i = 0; i < nr_bo + nr_attributes; i++) {
+ for (i = 0; i < nr_relocs + nr_attributes; i++) {
if (src_handles[i] >= exec->bo_count) {
DRM_ERROR("shader rec bo index %d > %d\n",
src_handles[i], exec->bo_count);
@@ -405,21 +438,73 @@ validate_shader_rec(struct drm_device *dev,
bo[i] = exec->bo[src_handles[i]];
}
- for (i = 0; i < nr_bo; i++) {
- /* XXX: validation */
- uint32_t o = bo_offsets[i];
- *(uint32_t *)(dst_pkt + o) =
- bo[i]->paddr + *(uint32_t *)(src_pkt + o);
+ for (i = 0; i < nr_relocs; i++) {
+ uint32_t o = relocs[i].offset;
+ uint32_t src_offset = *(uint32_t *)(src_pkt + o);
+ *(uint32_t *)(dst_pkt + o) = bo[i]->paddr + src_offset;
+ uint32_t *texture_handles_u;
+ void *uniform_data_u;
+ uint32_t tex;
+
+ switch (relocs[i].type) {
+ case RELOC_CODE:
+ kfree(validated_shader);
+ validated_shader = vc4_validate_shader(bo[i],
+ src_offset);
+ if (!validated_shader)
+ goto fail;
+
+ if (validated_shader->uniforms_src_size >
+ exec->uniforms_size) {
+ DRM_ERROR("Uniforms src buffer overflow\n");
+ goto fail;
+ }
+
+ texture_handles_u = exec->uniforms_u;
+ uniform_data_u = (texture_handles_u +
+ validated_shader->num_texture_samples);
+
+ memcpy(exec->uniforms_v, uniform_data_u,
+ validated_shader->uniforms_size);
+
+ for (tex = 0;
+ tex < validated_shader->num_texture_samples;
+ tex++) {
+ if (!reloc_tex(exec,
+ uniform_data_u,
+ &validated_shader->texture_samples[tex],
+ texture_handles_u[tex])) {
+ goto fail;
+ }
+ }
+
+ *(uint32_t *)(dst_pkt + o + 4) = exec->uniforms_p;
+
+ exec->uniforms_u += validated_shader->uniforms_src_size;
+ exec->uniforms_v += validated_shader->uniforms_size;
+ exec->uniforms_p += validated_shader->uniforms_size;
+
+ break;
+
+ case RELOC_VBO:
+ break;
+ }
}
for (i = 0; i < nr_attributes; i++) {
/* XXX: validation */
uint32_t o = 36 + i * 8;
*(uint32_t *)(dst_pkt + o) =
- bo[nr_bo + i]->paddr + *(uint32_t *)(src_pkt + o);
+ bo[nr_relocs + i]->paddr + *(uint32_t *)(src_pkt + o);
}
+ kfree(validated_shader);
+
return 0;
+
+fail:
+ kfree(validated_shader);
+ return -EINVAL;
}
int
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 4a2a2181ab4..885a754a9d5 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -26,15 +26,20 @@
#include <stdbool.h>
#include <string.h>
+#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <errno.h>
+#include "vc4_context.h"
+#include "vc4_qpu_defines.h"
+
#define DRM_INFO(...) fprintf(stderr, __VA_ARGS__)
#define DRM_ERROR(...) fprintf(stderr, __VA_ARGS__)
#define kmalloc(size, arg) malloc(size)
+#define kcalloc(size, count, arg) calloc(size, count)
#define kfree(ptr) free(ptr)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define krealloc(ptr, size, args) realloc(ptr, size)
#define roundup(x, y) align(x, y)
static inline int
@@ -64,6 +69,9 @@ struct drm_gem_cma_object {
};
struct exec_info {
+ /* Kernel-space copy of the ioctl arguments */
+ struct drm_vc4_submit_cl *args;
+
/* This is the array of BOs that were looked up at the start of exec.
* Command validation will use indices into this array.
*/
@@ -79,9 +87,8 @@ struct exec_info {
uint32_t bo_index[2];
uint32_t max_width, max_height;
- /**
- * This is the BO where we store the validated command lists
- * and shader records.
+ /* This is the BO where we store the validated command lists, shader
+ * records, and uniforms.
*/
struct drm_gem_cma_object *exec_bo;
@@ -108,6 +115,50 @@ struct exec_info {
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
uint32_t shader_paddr;
+
+ /* Pointers to the uniform data. These pointers are incremented, and
+ * size decremented, as each batch of uniforms is uploaded.
+ */
+ void *uniforms_u;
+ void *uniforms_v;
+ uint32_t uniforms_p;
+ uint32_t uniforms_size;
+};
+
+/**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+ *
+ * This will be used at draw time to relocate the reference to the texture
+ * contents in p0, and validate that the offset combined with
+ * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
+ * Note that the hardware treats unprovided config parameters as 0, so not all
+ * of them need to be set up for every texure sample, and we'll store ~0 as
+ * the offset to mark the unused ones.
+ *
+ * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
+ * Setup") for definitions of the texture parameters.
+ */
+struct vc4_texture_sample_info {
+ uint32_t p_offset[4];
+};
+
+/**
+ * struct vc4_validated_shader_info - information about validated shaders that
+ * needs to be used from command list validation.
+ *
+ * For a given shader, each time a shader state record references it, we need
+ * to verify that the shader doesn't read more uniforms than the shader state
+ * record's uniform BO pointer can provide, and we need to apply relocations
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+struct vc4_validated_shader_info
+{
+ uint32_t uniforms_size;
+ uint32_t uniforms_src_size;
+ uint32_t num_texture_samples;
+ struct vc4_texture_sample_info *texture_samples;
};
int vc4_validate_cl(struct drm_device *dev,
@@ -123,4 +174,8 @@ int vc4_validate_shader_recs(struct drm_device *dev,
uint32_t len,
struct exec_info *exec);
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
+ uint32_t start_offset);
+
#endif /* VC4_SIMULATOR_VALIDATE_H */
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c b/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c
new file mode 100644
index 00000000000..c02deb406c7
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Shader validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory. So, a user with access
+ * to execute shaders could escalate privilege by overwriting system memory
+ * (using the VPM write address register in the general-purpose DMA mode) or
+ * reading system memory it shouldn't (reading it as a texture, or uniform
+ * data, or vertex data).
+ *
+ * This walks over a shader starting from some offset within a BO, ensuring
+ * that its accesses are appropriately bounded, and recording how many texture
+ * accesses are made and where so that we can do relocations for them in the
+ * uniform stream.
+ *
+ * The kernel API has shaders stored in user-mapped BOs. The BOs will be
+ * forcibly unmapped from the process before validation, and any cache of
+ * validated state will be flushed if the mapping is faulted back in.
+ *
+ * Storing the shaders in BOs means that the validation process will be slow
+ * due to uncached reads, but since shaders are long-lived and shader BOs are
+ * never actually modified, this shouldn't be a problem.
+ */
+
+#include "vc4_simulator_validate.h"
+#include "vc4_qpu.h"
+#include "vc4_qpu_defines.h"
+
+struct vc4_shader_validation_state {
+ struct vc4_texture_sample_info tmu_setup[2];
+ int tmu_write_count[2];
+};
+
+static bool
+is_tmu_write(uint32_t waddr)
+{
+ return (waddr >= QPU_W_TMU0_S &&
+ waddr <= QPU_W_TMU1_B);
+}
+
+static bool
+check_register_write(uint32_t waddr, bool is_b)
+{
+ switch (waddr) {
+ case QPU_W_UNIFORMS_ADDRESS:
+ /* XXX: We'll probably need to support this for reladdr, but
+ * it's definitely a security-related one.
+ */
+ DRM_ERROR("uniforms address load unsupported\n");
+ return false;
+
+ case QPU_W_TLB_COLOR_MS:
+ case QPU_W_TLB_COLOR_ALL:
+ case QPU_W_TLB_Z:
+ /* XXX: We need to track which buffers get written by the
+ * shader, to make sure that we have those buffers set up by
+ * the config packets. But we need to pass them for now to
+ * get things up and running.
+ */
+ return true;
+
+ case QPU_W_TMU0_S:
+ case QPU_W_TMU0_T:
+ case QPU_W_TMU0_R:
+ case QPU_W_TMU0_B:
+ case QPU_W_TMU1_S:
+ case QPU_W_TMU1_T:
+ case QPU_W_TMU1_R:
+ case QPU_W_TMU1_B:
+ /* XXX: We need to track where the uniforms get loaded for
+ * texturing so that we can do relocations, and to validate
+ * those uniform contents.
+ */
+ return true;
+
+ case QPU_W_HOST_INT:
+ case QPU_W_TMU_NOSWAP:
+ case QPU_W_TLB_STENCIL_SETUP:
+ case QPU_W_TLB_ALPHA_MASK:
+ case QPU_W_MUTEX_RELEASE:
+ /* XXX: I haven't thought about these, so don't support them
+ * for now.
+ */
+ DRM_ERROR("Unsupported waddr %d\n", waddr);
+ return false;
+
+ case QPU_W_VPM_ADDR:
+ DRM_ERROR("General VPM DMA unsupported\n");
+ return false;
+
+ case QPU_W_VPM:
+ case QPU_W_VPMVCD_SETUP:
+ /* We allow VPM setup in general, even including VPM DMA
+ * configuration setup, because the (unsafe) DMA can only be
+ * triggered by QPU_W_VPM_ADDR writes.
+ */
+ return true;
+ }
+
+ return true;
+}
+
+static bool
+record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ int tmu)
+{
+ uint32_t s = validated_shader->num_texture_samples;
+ int i;
+ struct vc4_texture_sample_info *temp_samples;
+
+ temp_samples = krealloc(validated_shader->texture_samples,
+ (s + 1) * sizeof(*temp_samples),
+ GFP_KERNEL);
+ if (!temp_samples)
+ return false;
+
+ memcpy(temp_samples[s].p_offset,
+ validation_state->tmu_setup[tmu].p_offset,
+ validation_state->tmu_write_count[tmu] * sizeof(uint32_t));
+ for (i = validation_state->tmu_write_count[tmu]; i < 4; i++)
+ temp_samples[s].p_offset[i] = ~0;
+
+ validated_shader->num_texture_samples = s + 1;
+ validated_shader->texture_samples = temp_samples;
+
+ return true;
+}
+
+static bool
+check_tmu_writes(uint64_t inst,
+ struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state,
+ uint32_t waddr)
+{
+ int tmu = waddr > QPU_W_TMU0_B;
+
+ if (!is_tmu_write(waddr))
+ return true;
+
+ if (validation_state->tmu_write_count[tmu] >= 4) {
+ DRM_ERROR("TMU%d got too many parameters before dispatch\n",
+ tmu);
+ return false;
+ }
+ validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
+ validated_shader->uniforms_size;
+ validation_state->tmu_write_count[tmu]++;
+ validated_shader->uniforms_size += 4;
+
+ if (waddr == QPU_W_TMU0_S || waddr == QPU_W_TMU1_S) {
+ if (!record_validated_texture_sample(validated_shader,
+ validation_state, tmu)) {
+ return false;
+ }
+
+ validation_state->tmu_write_count[tmu] = 0;
+ }
+
+ return true;
+}
+
+static bool
+check_instruction_writes(uint64_t inst,
+ struct vc4_validated_shader_info *validated_shader,
+ struct vc4_shader_validation_state *validation_state)
+{
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ bool ws = inst & QPU_WS;
+
+ if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
+ DRM_ERROR("ADD and MUL both set up textures\n");
+ return false;
+ }
+
+ if (!check_tmu_writes(inst, validated_shader, validation_state,
+ waddr_add)) {
+ return false;
+ }
+
+ if (!check_tmu_writes(inst, validated_shader, validation_state,
+ waddr_mul)) {
+ return false;
+ }
+
+ return (check_register_write(waddr_add, ws) &&
+ check_register_write(waddr_mul, !ws));
+}
+
+static bool
+check_instruction_reads(uint64_t inst,
+ struct vc4_validated_shader_info *validated_shader)
+{
+ uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+ if (raddr_a == QPU_R_UNIF ||
+ raddr_b == QPU_R_UNIF) {
+ if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) {
+ DRM_ERROR("uniform read in the same instruction as "
+ "texture setup");
+ return false;
+ }
+
+ /* This can't overflow the uint32_t, because we're reading 8
+ * bytes of instruction to increment by 4 here, so we'd
+ * already be OOM.
+ */
+ validated_shader->uniforms_size += 4;
+ }
+
+ return true;
+}
+
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
+ uint32_t start_offset)
+{
+ bool found_shader_end = false;
+ int shader_end_ip = 0;
+ uint32_t ip, max_ip;
+ uint64_t *shader;
+ struct vc4_validated_shader_info *validated_shader;
+ struct vc4_shader_validation_state validation_state;
+
+ memset(&validation_state, 0, sizeof(validation_state));
+
+ if (start_offset + sizeof(uint64_t) > shader_obj->base.size) {
+ DRM_ERROR("shader starting at %d outside of BO sized %d\n",
+ start_offset,
+ shader_obj->base.size);
+ return NULL;
+ }
+ shader = shader_obj->vaddr + start_offset;
+ max_ip = (shader_obj->base.size - start_offset) / sizeof(uint64_t);
+
+ validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
+ if (!validated_shader)
+ return NULL;
+
+ for (ip = 0; ip < max_ip; ip++) {
+ uint64_t inst = shader[ip];
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+ switch (sig) {
+ case QPU_SIG_NONE:
+ case QPU_SIG_WAIT_FOR_SCOREBOARD:
+ case QPU_SIG_SCOREBOARD_UNLOCK:
+ case QPU_SIG_LOAD_TMU0:
+ case QPU_SIG_LOAD_TMU1:
+ if (!check_instruction_writes(inst, validated_shader,
+ &validation_state)) {
+ DRM_ERROR("Bad write at ip %d\n", ip);
+ goto fail;
+ }
+
+ if (!check_instruction_reads(inst, validated_shader))
+ goto fail;
+
+ break;
+
+ case QPU_SIG_LOAD_IMM:
+ if (!check_instruction_writes(inst, validated_shader,
+ &validation_state)) {
+ DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
+ goto fail;
+ }
+ break;
+
+ case QPU_SIG_PROG_END:
+ found_shader_end = true;
+ shader_end_ip = ip;
+ break;
+
+ default:
+ DRM_ERROR("Unsupported QPU signal %d at "
+ "instruction %d\n", sig, ip);
+ goto fail;
+ }
+
+ /* There are two delay slots after program end is signaled
+ * that are still executed, then we're finished.
+ */
+ if (found_shader_end && ip == shader_end_ip + 2)
+ break;
+ }
+
+ if (ip == max_ip) {
+ DRM_ERROR("shader starting at %d failed to terminate before "
+ "shader BO end at %d\n",
+ start_offset,
+ shader_obj->base.size);
+ goto fail;
+ }
+
+ /* Again, no chance of integer overflow here because the worst case
+ * scenario is 8 bytes of uniforms plus handles per 8-byte
+ * instruction.
+ */
+ validated_shader->uniforms_src_size =
+ (validated_shader->uniforms_size +
+ 4 * validated_shader->num_texture_samples);
+
+ return validated_shader;
+
+fail:
+ kfree(validated_shader);
+ return NULL;
+}